mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-11-26 21:00:29 +00:00
Remove the PTX back-end and all of its artifacts (triple, etc.)
This back-end was deprecated in favor of the NVPTX back-end. NV_CONTRIB llvm-svn: 157417
This commit is contained in:
parent
03e1ab9c5b
commit
ee3de35dee
@ -83,7 +83,6 @@ set(LLVM_ALL_TARGETS
|
||||
MSP430
|
||||
NVPTX
|
||||
PowerPC
|
||||
PTX
|
||||
Sparc
|
||||
X86
|
||||
XCore
|
||||
|
@ -369,7 +369,6 @@ AC_CACHE_CHECK([target architecture],[llvm_cv_target_arch],
|
||||
msp430-*) llvm_cv_target_arch="MSP430" ;;
|
||||
hexagon-*) llvm_cv_target_arch="Hexagon" ;;
|
||||
mblaze-*) llvm_cv_target_arch="MBlaze" ;;
|
||||
ptx-*) llvm_cv_target_arch="PTX" ;;
|
||||
nvptx-*) llvm_cv_target_arch="NVPTX" ;;
|
||||
*) llvm_cv_target_arch="Unknown" ;;
|
||||
esac])
|
||||
@ -403,7 +402,6 @@ case $host in
|
||||
msp430-*) host_arch="MSP430" ;;
|
||||
hexagon-*) host_arch="Hexagon" ;;
|
||||
mblaze-*) host_arch="MBlaze" ;;
|
||||
ptx-*) host_arch="PTX" ;;
|
||||
*) host_arch="Unknown" ;;
|
||||
esac
|
||||
|
||||
@ -542,7 +540,6 @@ else
|
||||
MSP430) AC_SUBST(TARGET_HAS_JIT,0) ;;
|
||||
Hexagon) AC_SUBST(TARGET_HAS_JIT,0) ;;
|
||||
MBlaze) AC_SUBST(TARGET_HAS_JIT,0) ;;
|
||||
PTX) AC_SUBST(TARGET_HAS_JIT,0) ;;
|
||||
NVPTX) AC_SUBST(TARGET_HAS_JIT,0) ;;
|
||||
*) AC_SUBST(TARGET_HAS_JIT,0) ;;
|
||||
esac
|
||||
@ -655,13 +652,13 @@ TARGETS_TO_BUILD=""
|
||||
AC_ARG_ENABLE([targets],AS_HELP_STRING([--enable-targets],
|
||||
[Build specific host targets: all or target1,target2,... Valid targets are:
|
||||
host, x86, x86_64, sparc, powerpc, arm, mips, spu, hexagon,
|
||||
xcore, msp430, ptx, nvptx, and cpp (default=all)]),,
|
||||
xcore, msp430, nvptx, and cpp (default=all)]),,
|
||||
enableval=all)
|
||||
if test "$enableval" = host-only ; then
|
||||
enableval=host
|
||||
fi
|
||||
case "$enableval" in
|
||||
all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 CppBackend MBlaze PTX NVPTX Hexagon" ;;
|
||||
all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 CppBackend MBlaze NVPTX Hexagon" ;;
|
||||
*)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
|
||||
case "$a_target" in
|
||||
x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
|
||||
@ -677,7 +674,6 @@ case "$enableval" in
|
||||
cpp) TARGETS_TO_BUILD="CppBackend $TARGETS_TO_BUILD" ;;
|
||||
hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
|
||||
mblaze) TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
|
||||
ptx) TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
|
||||
nvptx) TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
|
||||
host) case "$llvm_cv_target_arch" in
|
||||
x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
|
||||
@ -691,7 +687,6 @@ case "$enableval" in
|
||||
XCore) TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
|
||||
MSP430) TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
|
||||
Hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
|
||||
PTX) TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
|
||||
NVPTX) TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
|
||||
*) AC_MSG_ERROR([Can not set target to build]) ;;
|
||||
esac ;;
|
||||
|
12
configure
vendored
12
configure
vendored
@ -1419,7 +1419,7 @@ Optional Features:
|
||||
--enable-targets Build specific host targets: all or
|
||||
target1,target2,... Valid targets are: host, x86,
|
||||
x86_64, sparc, powerpc, arm, mips, spu, hexagon,
|
||||
xcore, msp430, ptx, nvptx, and cpp (default=all)
|
||||
xcore, msp430, nvptx, and cpp (default=all)
|
||||
--enable-bindings Build specific language bindings:
|
||||
all,auto,none,{binding-name} (default=auto)
|
||||
--enable-libffi Check for the presence of libffi (default is NO)
|
||||
@ -3901,7 +3901,6 @@ else
|
||||
msp430-*) llvm_cv_target_arch="MSP430" ;;
|
||||
hexagon-*) llvm_cv_target_arch="Hexagon" ;;
|
||||
mblaze-*) llvm_cv_target_arch="MBlaze" ;;
|
||||
ptx-*) llvm_cv_target_arch="PTX" ;;
|
||||
nvptx-*) llvm_cv_target_arch="NVPTX" ;;
|
||||
*) llvm_cv_target_arch="Unknown" ;;
|
||||
esac
|
||||
@ -3935,7 +3934,6 @@ case $host in
|
||||
msp430-*) host_arch="MSP430" ;;
|
||||
hexagon-*) host_arch="Hexagon" ;;
|
||||
mblaze-*) host_arch="MBlaze" ;;
|
||||
ptx-*) host_arch="PTX" ;;
|
||||
*) host_arch="Unknown" ;;
|
||||
esac
|
||||
|
||||
@ -5147,8 +5145,6 @@ else
|
||||
Hexagon) TARGET_HAS_JIT=0
|
||||
;;
|
||||
MBlaze) TARGET_HAS_JIT=0
|
||||
;;
|
||||
PTX) TARGET_HAS_JIT=0
|
||||
;;
|
||||
NVPTX) TARGET_HAS_JIT=0
|
||||
;;
|
||||
@ -5336,7 +5332,7 @@ if test "$enableval" = host-only ; then
|
||||
enableval=host
|
||||
fi
|
||||
case "$enableval" in
|
||||
all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 CppBackend MBlaze PTX NVPTX Hexagon" ;;
|
||||
all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 CppBackend MBlaze NVPTX Hexagon" ;;
|
||||
*)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
|
||||
case "$a_target" in
|
||||
x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
|
||||
@ -5352,7 +5348,6 @@ case "$enableval" in
|
||||
cpp) TARGETS_TO_BUILD="CppBackend $TARGETS_TO_BUILD" ;;
|
||||
hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
|
||||
mblaze) TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
|
||||
ptx) TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
|
||||
nvptx) TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
|
||||
host) case "$llvm_cv_target_arch" in
|
||||
x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
|
||||
@ -5366,7 +5361,6 @@ case "$enableval" in
|
||||
XCore) TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
|
||||
MSP430) TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
|
||||
Hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
|
||||
PTX) TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
|
||||
NVPTX) TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
|
||||
*) { { echo "$as_me:$LINENO: error: Can not set target to build" >&5
|
||||
echo "$as_me: error: Can not set target to build" >&2;}
|
||||
@ -10349,7 +10343,7 @@ else
|
||||
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
|
||||
lt_status=$lt_dlunknown
|
||||
cat > conftest.$ac_ext <<EOF
|
||||
#line 10352 "configure"
|
||||
#line 10346 "configure"
|
||||
#include "confdefs.h"
|
||||
|
||||
#if HAVE_DLFCN_H
|
||||
|
@ -62,8 +62,6 @@ public:
|
||||
x86_64, // X86-64: amd64, x86_64
|
||||
xcore, // XCore: xcore
|
||||
mblaze, // MBlaze: mblaze
|
||||
ptx32, // PTX: ptx (32-bit)
|
||||
ptx64, // PTX: ptx (64-bit)
|
||||
nvptx, // NVPTX: 32-bit
|
||||
nvptx64, // NVPTX: 64-bit
|
||||
le32, // le32: generic little-endian 32-bit CPU (PNaCl / Emscripten)
|
||||
|
@ -445,6 +445,5 @@ include "llvm/IntrinsicsX86.td"
|
||||
include "llvm/IntrinsicsARM.td"
|
||||
include "llvm/IntrinsicsCellSPU.td"
|
||||
include "llvm/IntrinsicsXCore.td"
|
||||
include "llvm/IntrinsicsPTX.td"
|
||||
include "llvm/IntrinsicsHexagon.td"
|
||||
include "llvm/IntrinsicsNVVM.td"
|
||||
|
@ -870,3 +870,83 @@ def int_nvvm_compiler_error :
|
||||
Intrinsic<[], [llvm_anyptr_ty], [], "llvm.nvvm.compiler.error">;
|
||||
def int_nvvm_compiler_warn :
|
||||
Intrinsic<[], [llvm_anyptr_ty], [], "llvm.nvvm.compiler.warn">;
|
||||
|
||||
|
||||
// Old PTX back-end intrinsics retained here for backwards-compatibility
|
||||
|
||||
multiclass PTXReadSpecialRegisterIntrinsic_v4i32<string prefix> {
|
||||
// FIXME: Do we need the 128-bit integer type version?
|
||||
// def _r64 : Intrinsic<[llvm_i128_ty], [], [IntrNoMem]>;
|
||||
|
||||
// FIXME: Enable this once v4i32 support is enabled in back-end.
|
||||
// def _v4i16 : Intrinsic<[llvm_v4i32_ty], [], [IntrNoMem]>;
|
||||
|
||||
def _x : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
|
||||
GCCBuiltin<!strconcat(prefix, "_x")>;
|
||||
def _y : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
|
||||
GCCBuiltin<!strconcat(prefix, "_y")>;
|
||||
def _z : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
|
||||
GCCBuiltin<!strconcat(prefix, "_z")>;
|
||||
def _w : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
|
||||
GCCBuiltin<!strconcat(prefix, "_w")>;
|
||||
}
|
||||
|
||||
class PTXReadSpecialRegisterIntrinsic_r32<string name>
|
||||
: Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
|
||||
GCCBuiltin<name>;
|
||||
|
||||
class PTXReadSpecialRegisterIntrinsic_r64<string name>
|
||||
: Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>,
|
||||
GCCBuiltin<name>;
|
||||
|
||||
defm int_ptx_read_tid : PTXReadSpecialRegisterIntrinsic_v4i32
|
||||
<"__builtin_ptx_read_tid">;
|
||||
defm int_ptx_read_ntid : PTXReadSpecialRegisterIntrinsic_v4i32
|
||||
<"__builtin_ptx_read_ntid">;
|
||||
|
||||
def int_ptx_read_laneid : PTXReadSpecialRegisterIntrinsic_r32
|
||||
<"__builtin_ptx_read_laneid">;
|
||||
def int_ptx_read_warpid : PTXReadSpecialRegisterIntrinsic_r32
|
||||
<"__builtin_ptx_read_warpid">;
|
||||
def int_ptx_read_nwarpid : PTXReadSpecialRegisterIntrinsic_r32
|
||||
<"__builtin_ptx_read_nwarpid">;
|
||||
|
||||
defm int_ptx_read_ctaid : PTXReadSpecialRegisterIntrinsic_v4i32
|
||||
<"__builtin_ptx_read_ctaid">;
|
||||
defm int_ptx_read_nctaid : PTXReadSpecialRegisterIntrinsic_v4i32
|
||||
<"__builtin_ptx_read_nctaid">;
|
||||
|
||||
def int_ptx_read_smid : PTXReadSpecialRegisterIntrinsic_r32
|
||||
<"__builtin_ptx_read_smid">;
|
||||
def int_ptx_read_nsmid : PTXReadSpecialRegisterIntrinsic_r32
|
||||
<"__builtin_ptx_read_nsmid">;
|
||||
def int_ptx_read_gridid : PTXReadSpecialRegisterIntrinsic_r32
|
||||
<"__builtin_ptx_read_gridid">;
|
||||
|
||||
def int_ptx_read_lanemask_eq : PTXReadSpecialRegisterIntrinsic_r32
|
||||
<"__builtin_ptx_read_lanemask_eq">;
|
||||
def int_ptx_read_lanemask_le : PTXReadSpecialRegisterIntrinsic_r32
|
||||
<"__builtin_ptx_read_lanemask_le">;
|
||||
def int_ptx_read_lanemask_lt : PTXReadSpecialRegisterIntrinsic_r32
|
||||
<"__builtin_ptx_read_lanemask_lt">;
|
||||
def int_ptx_read_lanemask_ge : PTXReadSpecialRegisterIntrinsic_r32
|
||||
<"__builtin_ptx_read_lanemask_ge">;
|
||||
def int_ptx_read_lanemask_gt : PTXReadSpecialRegisterIntrinsic_r32
|
||||
<"__builtin_ptx_read_lanemask_gt">;
|
||||
|
||||
def int_ptx_read_clock : PTXReadSpecialRegisterIntrinsic_r32
|
||||
<"__builtin_ptx_read_clock">;
|
||||
def int_ptx_read_clock64 : PTXReadSpecialRegisterIntrinsic_r64
|
||||
<"__builtin_ptx_read_clock64">;
|
||||
|
||||
def int_ptx_read_pm0 : PTXReadSpecialRegisterIntrinsic_r32
|
||||
<"__builtin_ptx_read_pm0">;
|
||||
def int_ptx_read_pm1 : PTXReadSpecialRegisterIntrinsic_r32
|
||||
<"__builtin_ptx_read_pm1">;
|
||||
def int_ptx_read_pm2 : PTXReadSpecialRegisterIntrinsic_r32
|
||||
<"__builtin_ptx_read_pm2">;
|
||||
def int_ptx_read_pm3 : PTXReadSpecialRegisterIntrinsic_r32
|
||||
<"__builtin_ptx_read_pm3">;
|
||||
|
||||
def int_ptx_bar_sync : Intrinsic<[], [llvm_i32_ty], []>,
|
||||
GCCBuiltin<"__builtin_ptx_bar_sync">;
|
||||
|
@ -1,92 +0,0 @@
|
||||
//===- IntrinsicsPTX.td - Defines PTX intrinsics -----------*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines all of the PTX-specific intrinsics.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
let TargetPrefix = "ptx" in {
|
||||
multiclass PTXReadSpecialRegisterIntrinsic_v4i32<string prefix> {
|
||||
// FIXME: Do we need the 128-bit integer type version?
|
||||
// def _r64 : Intrinsic<[llvm_i128_ty], [], [IntrNoMem]>;
|
||||
|
||||
// FIXME: Enable this once v4i32 support is enabled in back-end.
|
||||
// def _v4i16 : Intrinsic<[llvm_v4i32_ty], [], [IntrNoMem]>;
|
||||
|
||||
def _x : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
|
||||
GCCBuiltin<!strconcat(prefix, "_x")>;
|
||||
def _y : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
|
||||
GCCBuiltin<!strconcat(prefix, "_y")>;
|
||||
def _z : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
|
||||
GCCBuiltin<!strconcat(prefix, "_z")>;
|
||||
def _w : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
|
||||
GCCBuiltin<!strconcat(prefix, "_w")>;
|
||||
}
|
||||
|
||||
class PTXReadSpecialRegisterIntrinsic_r32<string name>
|
||||
: Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
|
||||
GCCBuiltin<name>;
|
||||
|
||||
class PTXReadSpecialRegisterIntrinsic_r64<string name>
|
||||
: Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>,
|
||||
GCCBuiltin<name>;
|
||||
}
|
||||
|
||||
defm int_ptx_read_tid : PTXReadSpecialRegisterIntrinsic_v4i32
|
||||
<"__builtin_ptx_read_tid">;
|
||||
defm int_ptx_read_ntid : PTXReadSpecialRegisterIntrinsic_v4i32
|
||||
<"__builtin_ptx_read_ntid">;
|
||||
|
||||
def int_ptx_read_laneid : PTXReadSpecialRegisterIntrinsic_r32
|
||||
<"__builtin_ptx_read_laneid">;
|
||||
def int_ptx_read_warpid : PTXReadSpecialRegisterIntrinsic_r32
|
||||
<"__builtin_ptx_read_warpid">;
|
||||
def int_ptx_read_nwarpid : PTXReadSpecialRegisterIntrinsic_r32
|
||||
<"__builtin_ptx_read_nwarpid">;
|
||||
|
||||
defm int_ptx_read_ctaid : PTXReadSpecialRegisterIntrinsic_v4i32
|
||||
<"__builtin_ptx_read_ctaid">;
|
||||
defm int_ptx_read_nctaid : PTXReadSpecialRegisterIntrinsic_v4i32
|
||||
<"__builtin_ptx_read_nctaid">;
|
||||
|
||||
def int_ptx_read_smid : PTXReadSpecialRegisterIntrinsic_r32
|
||||
<"__builtin_ptx_read_smid">;
|
||||
def int_ptx_read_nsmid : PTXReadSpecialRegisterIntrinsic_r32
|
||||
<"__builtin_ptx_read_nsmid">;
|
||||
def int_ptx_read_gridid : PTXReadSpecialRegisterIntrinsic_r32
|
||||
<"__builtin_ptx_read_gridid">;
|
||||
|
||||
def int_ptx_read_lanemask_eq : PTXReadSpecialRegisterIntrinsic_r32
|
||||
<"__builtin_ptx_read_lanemask_eq">;
|
||||
def int_ptx_read_lanemask_le : PTXReadSpecialRegisterIntrinsic_r32
|
||||
<"__builtin_ptx_read_lanemask_le">;
|
||||
def int_ptx_read_lanemask_lt : PTXReadSpecialRegisterIntrinsic_r32
|
||||
<"__builtin_ptx_read_lanemask_lt">;
|
||||
def int_ptx_read_lanemask_ge : PTXReadSpecialRegisterIntrinsic_r32
|
||||
<"__builtin_ptx_read_lanemask_ge">;
|
||||
def int_ptx_read_lanemask_gt : PTXReadSpecialRegisterIntrinsic_r32
|
||||
<"__builtin_ptx_read_lanemask_gt">;
|
||||
|
||||
def int_ptx_read_clock : PTXReadSpecialRegisterIntrinsic_r32
|
||||
<"__builtin_ptx_read_clock">;
|
||||
def int_ptx_read_clock64 : PTXReadSpecialRegisterIntrinsic_r64
|
||||
<"__builtin_ptx_read_clock64">;
|
||||
|
||||
def int_ptx_read_pm0 : PTXReadSpecialRegisterIntrinsic_r32
|
||||
<"__builtin_ptx_read_pm0">;
|
||||
def int_ptx_read_pm1 : PTXReadSpecialRegisterIntrinsic_r32
|
||||
<"__builtin_ptx_read_pm1">;
|
||||
def int_ptx_read_pm2 : PTXReadSpecialRegisterIntrinsic_r32
|
||||
<"__builtin_ptx_read_pm2">;
|
||||
def int_ptx_read_pm3 : PTXReadSpecialRegisterIntrinsic_r32
|
||||
<"__builtin_ptx_read_pm3">;
|
||||
|
||||
let TargetPrefix = "ptx" in
|
||||
def int_ptx_bar_sync : Intrinsic<[], [llvm_i32_ty], []>,
|
||||
GCCBuiltin<"__builtin_ptx_bar_sync">;
|
@ -38,8 +38,6 @@ const char *Triple::getArchTypeName(ArchType Kind) {
|
||||
case x86_64: return "x86_64";
|
||||
case xcore: return "xcore";
|
||||
case mblaze: return "mblaze";
|
||||
case ptx32: return "ptx32";
|
||||
case ptx64: return "ptx64";
|
||||
case nvptx: return "nvptx";
|
||||
case nvptx64: return "nvptx64";
|
||||
case le32: return "le32";
|
||||
@ -76,8 +74,6 @@ const char *Triple::getArchTypePrefix(ArchType Kind) {
|
||||
|
||||
case xcore: return "xcore";
|
||||
|
||||
case ptx32: return "ptx";
|
||||
case ptx64: return "ptx";
|
||||
case nvptx: return "nvptx";
|
||||
case nvptx64: return "nvptx";
|
||||
case le32: return "le32";
|
||||
@ -164,8 +160,6 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
|
||||
.Case("x86", x86)
|
||||
.Case("x86-64", x86_64)
|
||||
.Case("xcore", xcore)
|
||||
.Case("ptx32", ptx32)
|
||||
.Case("ptx64", ptx64)
|
||||
.Case("nvptx", nvptx)
|
||||
.Case("nvptx64", nvptx64)
|
||||
.Case("le32", le32)
|
||||
@ -198,8 +192,6 @@ Triple::ArchType Triple::getArchTypeForDarwinArchName(StringRef Str) {
|
||||
.Cases("arm", "armv4t", "armv5", "armv6", Triple::arm)
|
||||
.Cases("armv7", "armv7f", "armv7k", "armv7s", "xscale", Triple::arm)
|
||||
.Case("r600", Triple::r600)
|
||||
.Case("ptx32", Triple::ptx32)
|
||||
.Case("ptx64", Triple::ptx64)
|
||||
.Case("nvptx", Triple::nvptx)
|
||||
.Case("nvptx64", Triple::nvptx64)
|
||||
.Case("amdil", Triple::amdil)
|
||||
@ -223,8 +215,6 @@ const char *Triple::getArchNameForAssembler() {
|
||||
.Cases("armv6", "thumbv6", "armv6")
|
||||
.Cases("armv7", "thumbv7", "armv7")
|
||||
.Case("r600", "r600")
|
||||
.Case("ptx32", "ptx32")
|
||||
.Case("ptx64", "ptx64")
|
||||
.Case("nvptx", "nvptx")
|
||||
.Case("nvptx64", "nvptx64")
|
||||
.Case("le32", "le32")
|
||||
@ -259,8 +249,6 @@ static Triple::ArchType parseArch(StringRef ArchName) {
|
||||
.Case("sparcv9", Triple::sparcv9)
|
||||
.Case("tce", Triple::tce)
|
||||
.Case("xcore", Triple::xcore)
|
||||
.Case("ptx32", Triple::ptx32)
|
||||
.Case("ptx64", Triple::ptx64)
|
||||
.Case("nvptx", Triple::nvptx)
|
||||
.Case("nvptx64", Triple::nvptx64)
|
||||
.Case("le32", Triple::le32)
|
||||
@ -689,7 +677,6 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
|
||||
case llvm::Triple::mipsel:
|
||||
case llvm::Triple::nvptx:
|
||||
case llvm::Triple::ppc:
|
||||
case llvm::Triple::ptx32:
|
||||
case llvm::Triple::r600:
|
||||
case llvm::Triple::sparc:
|
||||
case llvm::Triple::tce:
|
||||
@ -702,7 +689,6 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
|
||||
case llvm::Triple::mips64el:
|
||||
case llvm::Triple::nvptx64:
|
||||
case llvm::Triple::ppc64:
|
||||
case llvm::Triple::ptx64:
|
||||
case llvm::Triple::sparcv9:
|
||||
case llvm::Triple::x86_64:
|
||||
return 64;
|
||||
@ -740,7 +726,6 @@ Triple Triple::get32BitArchVariant() const {
|
||||
case Triple::mipsel:
|
||||
case Triple::nvptx:
|
||||
case Triple::ppc:
|
||||
case Triple::ptx32:
|
||||
case Triple::r600:
|
||||
case Triple::sparc:
|
||||
case Triple::tce:
|
||||
@ -754,7 +739,6 @@ Triple Triple::get32BitArchVariant() const {
|
||||
case Triple::mips64el: T.setArch(Triple::mipsel); break;
|
||||
case Triple::nvptx64: T.setArch(Triple::nvptx); break;
|
||||
case Triple::ppc64: T.setArch(Triple::ppc); break;
|
||||
case Triple::ptx64: T.setArch(Triple::ptx32); break;
|
||||
case Triple::sparcv9: T.setArch(Triple::sparc); break;
|
||||
case Triple::x86_64: T.setArch(Triple::x86); break;
|
||||
}
|
||||
@ -783,7 +767,6 @@ Triple Triple::get64BitArchVariant() const {
|
||||
case Triple::mips64el:
|
||||
case Triple::nvptx64:
|
||||
case Triple::ppc64:
|
||||
case Triple::ptx64:
|
||||
case Triple::sparcv9:
|
||||
case Triple::x86_64:
|
||||
// Already 64-bit.
|
||||
@ -793,7 +776,6 @@ Triple Triple::get64BitArchVariant() const {
|
||||
case Triple::mipsel: T.setArch(Triple::mips64el); break;
|
||||
case Triple::nvptx: T.setArch(Triple::nvptx64); break;
|
||||
case Triple::ppc: T.setArch(Triple::ppc64); break;
|
||||
case Triple::ptx32: T.setArch(Triple::ptx64); break;
|
||||
case Triple::sparc: T.setArch(Triple::sparcv9); break;
|
||||
case Triple::x86: T.setArch(Triple::x86_64); break;
|
||||
}
|
||||
|
@ -16,7 +16,7 @@
|
||||
;===------------------------------------------------------------------------===;
|
||||
|
||||
[common]
|
||||
subdirectories = ARM CellSPU CppBackend Hexagon MBlaze MSP430 NVPTX Mips PTX PowerPC Sparc X86 XCore
|
||||
subdirectories = ARM CellSPU CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC Sparc X86 XCore
|
||||
|
||||
; This is a special group whose required libraries are extended (by llvm-build)
|
||||
; with the best execution engine (the native JIT, if available, or the
|
||||
|
@ -1,32 +0,0 @@
|
||||
set(LLVM_TARGET_DEFINITIONS PTX.td)
|
||||
|
||||
tablegen(LLVM PTXGenAsmWriter.inc -gen-asm-writer)
|
||||
tablegen(LLVM PTXGenDAGISel.inc -gen-dag-isel)
|
||||
tablegen(LLVM PTXGenInstrInfo.inc -gen-instr-info)
|
||||
tablegen(LLVM PTXGenRegisterInfo.inc -gen-register-info)
|
||||
tablegen(LLVM PTXGenSubtargetInfo.inc -gen-subtarget)
|
||||
add_public_tablegen_target(PTXCommonTableGen)
|
||||
|
||||
add_llvm_target(PTXCodeGen
|
||||
PTXAsmPrinter.cpp
|
||||
PTXISelDAGToDAG.cpp
|
||||
PTXISelLowering.cpp
|
||||
PTXInstrInfo.cpp
|
||||
PTXFPRoundingModePass.cpp
|
||||
PTXFrameLowering.cpp
|
||||
PTXMCAsmStreamer.cpp
|
||||
PTXMCInstLower.cpp
|
||||
PTXMFInfoExtract.cpp
|
||||
PTXMachineFunctionInfo.cpp
|
||||
PTXParamManager.cpp
|
||||
PTXRegAlloc.cpp
|
||||
PTXRegisterInfo.cpp
|
||||
PTXSelectionDAGInfo.cpp
|
||||
PTXSubtarget.cpp
|
||||
PTXTargetMachine.cpp
|
||||
)
|
||||
|
||||
add_subdirectory(TargetInfo)
|
||||
add_subdirectory(InstPrinter)
|
||||
add_subdirectory(MCTargetDesc)
|
||||
|
@ -1,8 +0,0 @@
|
||||
include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
|
||||
|
||||
add_llvm_library(LLVMPTXAsmPrinter
|
||||
PTXInstPrinter.cpp
|
||||
)
|
||||
|
||||
add_dependencies(LLVMPTXAsmPrinter PTXCommonTableGen)
|
||||
|
@ -1,23 +0,0 @@
|
||||
;===- ./lib/Target/PTX/InstPrinter/LLVMBuild.txt ---------------*- Conf -*--===;
|
||||
;
|
||||
; The LLVM Compiler Infrastructure
|
||||
;
|
||||
; This file is distributed under the University of Illinois Open Source
|
||||
; License. See LICENSE.TXT for details.
|
||||
;
|
||||
;===------------------------------------------------------------------------===;
|
||||
;
|
||||
; This is an LLVMBuild description file for the components in this subdirectory.
|
||||
;
|
||||
; For more information on the LLVMBuild system, please see:
|
||||
;
|
||||
; http://llvm.org/docs/LLVMBuild.html
|
||||
;
|
||||
;===------------------------------------------------------------------------===;
|
||||
|
||||
[component_0]
|
||||
type = Library
|
||||
name = PTXAsmPrinter
|
||||
parent = PTX
|
||||
required_libraries = MC Support
|
||||
add_to_library_groups = PTX
|
@ -1,16 +0,0 @@
|
||||
##===- lib/Target/PTX/AsmPrinter/Makefile ------------------*- Makefile -*-===##
|
||||
#
|
||||
# The LLVM Compiler Infrastructure
|
||||
#
|
||||
# This file is distributed under the University of Illinois Open Source
|
||||
# License. See LICENSE.TXT for details.
|
||||
#
|
||||
##===----------------------------------------------------------------------===##
|
||||
LEVEL = ../../../..
|
||||
LIBRARYNAME = LLVMPTXAsmPrinter
|
||||
|
||||
# Hack: we need to include 'main' ptx target directory to grab private headers
|
||||
CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
|
||||
|
||||
include $(LEVEL)/Makefile.common
|
||||
|
@ -1,249 +0,0 @@
|
||||
//===-- PTXInstPrinter.cpp - Convert PTX MCInst to assembly syntax --------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This class prints a PTX MCInst to a .ptx file.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#define DEBUG_TYPE "asm-printer"
|
||||
#include "PTXInstPrinter.h"
|
||||
#include "MCTargetDesc/PTXBaseInfo.h"
|
||||
#include "llvm/MC/MCAsmInfo.h"
|
||||
#include "llvm/MC/MCExpr.h"
|
||||
#include "llvm/MC/MCInst.h"
|
||||
#include "llvm/MC/MCSymbol.h"
|
||||
#include "llvm/MC/MCInstrInfo.h"
|
||||
#include "llvm/ADT/APFloat.h"
|
||||
#include "llvm/ADT/StringExtras.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
using namespace llvm;
|
||||
|
||||
#include "PTXGenAsmWriter.inc"
|
||||
|
||||
PTXInstPrinter::PTXInstPrinter(const MCAsmInfo &MAI,
|
||||
const MCInstrInfo &MII,
|
||||
const MCRegisterInfo &MRI,
|
||||
const MCSubtargetInfo &STI) :
|
||||
MCInstPrinter(MAI, MII, MRI) {
|
||||
// Initialize the set of available features.
|
||||
setAvailableFeatures(STI.getFeatureBits());
|
||||
}
|
||||
|
||||
void PTXInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
|
||||
// Decode the register number into type and offset
|
||||
unsigned RegSpace = RegNo & 0x7;
|
||||
unsigned RegType = (RegNo >> 3) & 0x7;
|
||||
unsigned RegOffset = RegNo >> 6;
|
||||
|
||||
// Print the register
|
||||
OS << "%";
|
||||
|
||||
switch (RegSpace) {
|
||||
default:
|
||||
llvm_unreachable("Unknown register space!");
|
||||
case PTXRegisterSpace::Reg:
|
||||
switch (RegType) {
|
||||
default:
|
||||
llvm_unreachable("Unknown register type!");
|
||||
case PTXRegisterType::Pred:
|
||||
OS << "p";
|
||||
break;
|
||||
case PTXRegisterType::B16:
|
||||
OS << "rh";
|
||||
break;
|
||||
case PTXRegisterType::B32:
|
||||
OS << "r";
|
||||
break;
|
||||
case PTXRegisterType::B64:
|
||||
OS << "rd";
|
||||
break;
|
||||
case PTXRegisterType::F32:
|
||||
OS << "f";
|
||||
break;
|
||||
case PTXRegisterType::F64:
|
||||
OS << "fd";
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case PTXRegisterSpace::Return:
|
||||
OS << "ret";
|
||||
break;
|
||||
case PTXRegisterSpace::Argument:
|
||||
OS << "arg";
|
||||
break;
|
||||
}
|
||||
|
||||
OS << RegOffset;
|
||||
}
|
||||
|
||||
void PTXInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
|
||||
StringRef Annot) {
|
||||
printPredicate(MI, O);
|
||||
switch (MI->getOpcode()) {
|
||||
default:
|
||||
printInstruction(MI, O);
|
||||
break;
|
||||
case PTX::CALL:
|
||||
printCall(MI, O);
|
||||
}
|
||||
O << ";";
|
||||
printAnnotation(O, Annot);
|
||||
}
|
||||
|
||||
void PTXInstPrinter::printPredicate(const MCInst *MI, raw_ostream &O) {
|
||||
// The last two operands are the predicate operands
|
||||
int RegIndex;
|
||||
int OpIndex;
|
||||
|
||||
if (MI->getOpcode() == PTX::CALL) {
|
||||
RegIndex = 0;
|
||||
OpIndex = 1;
|
||||
} else {
|
||||
RegIndex = MI->getNumOperands()-2;
|
||||
OpIndex = MI->getNumOperands()-1;
|
||||
}
|
||||
|
||||
int PredOp = MI->getOperand(OpIndex).getImm();
|
||||
if (PredOp == PTXPredicate::None)
|
||||
return;
|
||||
|
||||
if (PredOp == PTXPredicate::Negate)
|
||||
O << '!';
|
||||
else
|
||||
O << '@';
|
||||
|
||||
printOperand(MI, RegIndex, O);
|
||||
}
|
||||
|
||||
void PTXInstPrinter::printCall(const MCInst *MI, raw_ostream &O) {
|
||||
O << "\tcall.uni\t";
|
||||
// The first two operands are the predicate slot
|
||||
unsigned Index = 2;
|
||||
unsigned NumRets = MI->getOperand(Index++).getImm();
|
||||
|
||||
if (NumRets > 0) {
|
||||
O << "(";
|
||||
printOperand(MI, Index++, O);
|
||||
for (unsigned i = 1; i < NumRets; ++i) {
|
||||
O << ", ";
|
||||
printOperand(MI, Index++, O);
|
||||
}
|
||||
O << "), ";
|
||||
}
|
||||
|
||||
const MCExpr* Expr = MI->getOperand(Index++).getExpr();
|
||||
unsigned NumArgs = MI->getOperand(Index++).getImm();
|
||||
|
||||
// if the function call is to printf or puts, change to vprintf
|
||||
if (const MCSymbolRefExpr *SymRefExpr = dyn_cast<MCSymbolRefExpr>(Expr)) {
|
||||
const MCSymbol &Sym = SymRefExpr->getSymbol();
|
||||
if (Sym.getName() == "printf" || Sym.getName() == "puts") {
|
||||
O << "vprintf";
|
||||
} else {
|
||||
O << Sym.getName();
|
||||
}
|
||||
} else {
|
||||
O << *Expr;
|
||||
}
|
||||
|
||||
O << ", (";
|
||||
|
||||
if (NumArgs > 0) {
|
||||
printOperand(MI, Index++, O);
|
||||
for (unsigned i = 1; i < NumArgs; ++i) {
|
||||
O << ", ";
|
||||
printOperand(MI, Index++, O);
|
||||
}
|
||||
}
|
||||
O << ")";
|
||||
}
|
||||
|
||||
void PTXInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
|
||||
raw_ostream &O) {
|
||||
const MCOperand &Op = MI->getOperand(OpNo);
|
||||
if (Op.isImm()) {
|
||||
O << Op.getImm();
|
||||
} else if (Op.isFPImm()) {
|
||||
double Imm = Op.getFPImm();
|
||||
APFloat FPImm(Imm);
|
||||
APInt FPIntImm = FPImm.bitcastToAPInt();
|
||||
O << "0D";
|
||||
// PTX requires us to output the full 64 bits, even if the number is zero
|
||||
if (FPIntImm.getZExtValue() > 0) {
|
||||
O << FPIntImm.toString(16, false);
|
||||
} else {
|
||||
O << "0000000000000000";
|
||||
}
|
||||
} else if (Op.isReg()) {
|
||||
printRegName(O, Op.getReg());
|
||||
} else {
|
||||
assert(Op.isExpr() && "unknown operand kind in printOperand");
|
||||
const MCExpr *Expr = Op.getExpr();
|
||||
if (const MCSymbolRefExpr *SymRefExpr = dyn_cast<MCSymbolRefExpr>(Expr)) {
|
||||
const MCSymbol &Sym = SymRefExpr->getSymbol();
|
||||
O << Sym.getName();
|
||||
} else {
|
||||
O << *Op.getExpr();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void PTXInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo,
|
||||
raw_ostream &O) {
|
||||
// By definition, operand OpNo+1 is an i32imm
|
||||
const MCOperand &Op2 = MI->getOperand(OpNo+1);
|
||||
printOperand(MI, OpNo, O);
|
||||
if (Op2.getImm() == 0)
|
||||
return; // don't print "+0"
|
||||
O << "+" << Op2.getImm();
|
||||
}
|
||||
|
||||
void PTXInstPrinter::printRoundingMode(const MCInst *MI, unsigned OpNo,
|
||||
raw_ostream &O) {
|
||||
const MCOperand &Op = MI->getOperand(OpNo);
|
||||
assert (Op.isImm() && "Rounding modes must be immediate values");
|
||||
switch (Op.getImm()) {
|
||||
default:
|
||||
llvm_unreachable("Unknown rounding mode!");
|
||||
case PTXRoundingMode::RndDefault:
|
||||
llvm_unreachable("FP rounding-mode pass did not handle instruction!");
|
||||
case PTXRoundingMode::RndNone:
|
||||
// Do not print anything.
|
||||
break;
|
||||
case PTXRoundingMode::RndNearestEven:
|
||||
O << ".rn";
|
||||
break;
|
||||
case PTXRoundingMode::RndTowardsZero:
|
||||
O << ".rz";
|
||||
break;
|
||||
case PTXRoundingMode::RndNegInf:
|
||||
O << ".rm";
|
||||
break;
|
||||
case PTXRoundingMode::RndPosInf:
|
||||
O << ".rp";
|
||||
break;
|
||||
case PTXRoundingMode::RndApprox:
|
||||
O << ".approx";
|
||||
break;
|
||||
case PTXRoundingMode::RndNearestEvenInt:
|
||||
O << ".rni";
|
||||
break;
|
||||
case PTXRoundingMode::RndTowardsZeroInt:
|
||||
O << ".rzi";
|
||||
break;
|
||||
case PTXRoundingMode::RndNegInfInt:
|
||||
O << ".rmi";
|
||||
break;
|
||||
case PTXRoundingMode::RndPosInfInt:
|
||||
O << ".rpi";
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -1,45 +0,0 @@
|
||||
//===- PTXInstPrinter.h - Convert PTX MCInst to assembly syntax -*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This class prints n PTX MCInst to a .ptx file.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef PTXINSTPRINTER_H
|
||||
#define PTXINSTPRINTER_H
|
||||
|
||||
#include "llvm/MC/MCInstPrinter.h"
|
||||
#include "llvm/MC/MCSubtargetInfo.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class MCOperand;
|
||||
|
||||
class PTXInstPrinter : public MCInstPrinter {
|
||||
public:
|
||||
PTXInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
|
||||
const MCRegisterInfo &MRI, const MCSubtargetInfo &STI);
|
||||
|
||||
virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
|
||||
virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
|
||||
|
||||
// Autogenerated by tblgen.
|
||||
void printInstruction(const MCInst *MI, raw_ostream &O);
|
||||
static const char *getRegisterName(unsigned RegNo);
|
||||
|
||||
void printPredicate(const MCInst *MI, raw_ostream &O);
|
||||
void printCall(const MCInst *MI, raw_ostream &O);
|
||||
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
||||
void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
||||
void printRoundingMode(const MCInst *MI, unsigned OpNo, raw_ostream &O);
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -1,32 +0,0 @@
|
||||
;===- ./lib/Target/PTX/LLVMBuild.txt ---------------------------*- Conf -*--===;
|
||||
;
|
||||
; The LLVM Compiler Infrastructure
|
||||
;
|
||||
; This file is distributed under the University of Illinois Open Source
|
||||
; License. See LICENSE.TXT for details.
|
||||
;
|
||||
;===------------------------------------------------------------------------===;
|
||||
;
|
||||
; This is an LLVMBuild description file for the components in this subdirectory.
|
||||
;
|
||||
; For more information on the LLVMBuild system, please see:
|
||||
;
|
||||
; http://llvm.org/docs/LLVMBuild.html
|
||||
;
|
||||
;===------------------------------------------------------------------------===;
|
||||
|
||||
[common]
|
||||
subdirectories = InstPrinter MCTargetDesc TargetInfo
|
||||
|
||||
[component_0]
|
||||
type = TargetGroup
|
||||
name = PTX
|
||||
parent = Target
|
||||
has_asmprinter = 1
|
||||
|
||||
[component_1]
|
||||
type = Library
|
||||
name = PTXCodeGen
|
||||
parent = PTX
|
||||
required_libraries = Analysis AsmPrinter CodeGen Core MC PTXDesc PTXInfo SelectionDAG Support Target TransformUtils
|
||||
add_to_library_groups = PTX
|
@ -1,6 +0,0 @@
|
||||
add_llvm_library(LLVMPTXDesc
|
||||
PTXMCTargetDesc.cpp
|
||||
PTXMCAsmInfo.cpp
|
||||
)
|
||||
|
||||
add_dependencies(LLVMPTXDesc PTXCommonTableGen)
|
@ -1,23 +0,0 @@
|
||||
;===- ./lib/Target/PTX/MCTargetDesc/LLVMBuild.txt --------------*- Conf -*--===;
|
||||
;
|
||||
; The LLVM Compiler Infrastructure
|
||||
;
|
||||
; This file is distributed under the University of Illinois Open Source
|
||||
; License. See LICENSE.TXT for details.
|
||||
;
|
||||
;===------------------------------------------------------------------------===;
|
||||
;
|
||||
; This is an LLVMBuild description file for the components in this subdirectory.
|
||||
;
|
||||
; For more information on the LLVMBuild system, please see:
|
||||
;
|
||||
; http://llvm.org/docs/LLVMBuild.html
|
||||
;
|
||||
;===------------------------------------------------------------------------===;
|
||||
|
||||
[component_0]
|
||||
type = Library
|
||||
name = PTXDesc
|
||||
parent = PTX
|
||||
required_libraries = MC PTXAsmPrinter PTXInfo Support
|
||||
add_to_library_groups = PTX
|
@ -1,16 +0,0 @@
|
||||
##===- lib/Target/PTX/TargetDesc/Makefile ------------------*- Makefile -*-===##
|
||||
#
|
||||
# The LLVM Compiler Infrastructure
|
||||
#
|
||||
# This file is distributed under the University of Illinois Open Source
|
||||
# License. See LICENSE.TXT for details.
|
||||
#
|
||||
##===----------------------------------------------------------------------===##
|
||||
|
||||
LEVEL = ../../../..
|
||||
LIBRARYNAME = LLVMPTXDesc
|
||||
|
||||
# Hack: we need to include 'main' target directory to grab private headers
|
||||
CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
|
||||
|
||||
include $(LEVEL)/Makefile.common
|
@ -1,134 +0,0 @@
|
||||
//===-- PTXBaseInfo.h - Top level definitions for PTX -------- --*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains small standalone helper functions and enum definitions for
|
||||
// the PTX target useful for the compiler back-end and the MC libraries.
|
||||
// As such, it deliberately does not include references to LLVM core
|
||||
// code gen types, passes, etc..
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef PTXBASEINFO_H
|
||||
#define PTXBASEINFO_H
|
||||
|
||||
#include "PTXMCTargetDesc.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
namespace llvm {
|
||||
namespace PTXStateSpace {
|
||||
enum {
|
||||
Global = 0, // default to global state space
|
||||
Constant = 1,
|
||||
Local = 2,
|
||||
Parameter = 3,
|
||||
Shared = 4
|
||||
};
|
||||
} // namespace PTXStateSpace
|
||||
|
||||
namespace PTXPredicate {
|
||||
enum {
|
||||
Normal = 0,
|
||||
Negate = 1,
|
||||
None = 2
|
||||
};
|
||||
} // namespace PTXPredicate
|
||||
|
||||
/// Namespace to hold all target-specific flags.
|
||||
namespace PTXRoundingMode {
|
||||
// Instruction Flags
|
||||
enum {
|
||||
// Rounding Mode Flags
|
||||
RndMask = 15,
|
||||
RndDefault = 0, // ---
|
||||
RndNone = 1, // <NONE>
|
||||
RndNearestEven = 2, // .rn
|
||||
RndTowardsZero = 3, // .rz
|
||||
RndNegInf = 4, // .rm
|
||||
RndPosInf = 5, // .rp
|
||||
RndApprox = 6, // .approx
|
||||
RndNearestEvenInt = 7, // .rni
|
||||
RndTowardsZeroInt = 8, // .rzi
|
||||
RndNegInfInt = 9, // .rmi
|
||||
RndPosInfInt = 10 // .rpi
|
||||
};
|
||||
} // namespace PTXII
|
||||
|
||||
namespace PTXRegisterType {
|
||||
// Register type encoded in MCOperands
|
||||
enum {
|
||||
Pred = 0,
|
||||
B16,
|
||||
B32,
|
||||
B64,
|
||||
F32,
|
||||
F64
|
||||
};
|
||||
} // namespace PTXRegisterType
|
||||
|
||||
namespace PTXRegisterSpace {
|
||||
// Register space encoded in MCOperands
|
||||
enum {
|
||||
Reg = 0,
|
||||
Local,
|
||||
Param,
|
||||
Argument,
|
||||
Return
|
||||
};
|
||||
}
|
||||
|
||||
inline static void decodeRegisterName(raw_ostream &OS,
|
||||
unsigned EncodedReg) {
|
||||
OS << "%";
|
||||
|
||||
unsigned RegSpace = EncodedReg & 0x7;
|
||||
unsigned RegType = (EncodedReg >> 3) & 0x7;
|
||||
unsigned RegOffset = EncodedReg >> 6;
|
||||
|
||||
switch (RegSpace) {
|
||||
default:
|
||||
llvm_unreachable("Unknown register space!");
|
||||
case PTXRegisterSpace::Reg:
|
||||
switch (RegType) {
|
||||
default:
|
||||
llvm_unreachable("Unknown register type!");
|
||||
case PTXRegisterType::Pred:
|
||||
OS << "p";
|
||||
break;
|
||||
case PTXRegisterType::B16:
|
||||
OS << "rh";
|
||||
break;
|
||||
case PTXRegisterType::B32:
|
||||
OS << "r";
|
||||
break;
|
||||
case PTXRegisterType::B64:
|
||||
OS << "rd";
|
||||
break;
|
||||
case PTXRegisterType::F32:
|
||||
OS << "f";
|
||||
break;
|
||||
case PTXRegisterType::F64:
|
||||
OS << "fd";
|
||||
break;
|
||||
}
|
||||
break;
|
||||
case PTXRegisterSpace::Return:
|
||||
OS << "ret";
|
||||
break;
|
||||
case PTXRegisterSpace::Argument:
|
||||
OS << "arg";
|
||||
break;
|
||||
}
|
||||
|
||||
OS << RegOffset;
|
||||
}
|
||||
} // namespace llvm
|
||||
|
||||
#endif
|
||||
|
@ -1,37 +0,0 @@
|
||||
//===-- PTXMCAsmInfo.cpp - PTX asm properties -----------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the declarations of the PTXMCAsmInfo properties.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "PTXMCAsmInfo.h"
|
||||
#include "llvm/ADT/Triple.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
void PTXMCAsmInfo::anchor() { }
|
||||
|
||||
PTXMCAsmInfo::PTXMCAsmInfo(const Target &T, const StringRef &TT) {
|
||||
Triple TheTriple(TT);
|
||||
if (TheTriple.getArch() == Triple::ptx64)
|
||||
PointerSize = 8;
|
||||
|
||||
CommentString = "//";
|
||||
|
||||
PrivateGlobalPrefix = "$L__";
|
||||
|
||||
AllowPeriodsInName = false;
|
||||
|
||||
HasSetDirective = false;
|
||||
|
||||
HasDotTypeDotSizeDirective = false;
|
||||
|
||||
HasSingleParameterDotFile = false;
|
||||
}
|
@ -1,30 +0,0 @@
|
||||
//===-- PTXMCAsmInfo.h - PTX asm properties --------------------*- C++ -*--===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the declaration of the PTXMCAsmInfo class.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef PTX_MCASM_INFO_H
|
||||
#define PTX_MCASM_INFO_H
|
||||
|
||||
#include "llvm/MC/MCAsmInfo.h"
|
||||
|
||||
namespace llvm {
|
||||
class Target;
|
||||
class StringRef;
|
||||
|
||||
class PTXMCAsmInfo : public MCAsmInfo {
|
||||
virtual void anchor();
|
||||
public:
|
||||
explicit PTXMCAsmInfo(const Target &T, const StringRef &TT);
|
||||
};
|
||||
} // namespace llvm
|
||||
|
||||
#endif // PTX_MCASM_INFO_H
|
@ -1,98 +0,0 @@
|
||||
//===-- PTXMCTargetDesc.cpp - PTX Target Descriptions ---------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file provides PTX specific target descriptions.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "PTXMCTargetDesc.h"
|
||||
#include "PTXMCAsmInfo.h"
|
||||
#include "InstPrinter/PTXInstPrinter.h"
|
||||
#include "llvm/MC/MCCodeGenInfo.h"
|
||||
#include "llvm/MC/MCInstrInfo.h"
|
||||
#include "llvm/MC/MCRegisterInfo.h"
|
||||
#include "llvm/MC/MCSubtargetInfo.h"
|
||||
#include "llvm/Support/TargetRegistry.h"
|
||||
|
||||
#define GET_INSTRINFO_MC_DESC
|
||||
#include "PTXGenInstrInfo.inc"
|
||||
|
||||
#define GET_SUBTARGETINFO_MC_DESC
|
||||
#include "PTXGenSubtargetInfo.inc"
|
||||
|
||||
#define GET_REGINFO_MC_DESC
|
||||
#include "PTXGenRegisterInfo.inc"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
static MCInstrInfo *createPTXMCInstrInfo() {
|
||||
MCInstrInfo *X = new MCInstrInfo();
|
||||
InitPTXMCInstrInfo(X);
|
||||
return X;
|
||||
}
|
||||
|
||||
static MCRegisterInfo *createPTXMCRegisterInfo(StringRef TT) {
|
||||
MCRegisterInfo *X = new MCRegisterInfo();
|
||||
// PTX does not have a return address register.
|
||||
InitPTXMCRegisterInfo(X, 0);
|
||||
return X;
|
||||
}
|
||||
|
||||
static MCSubtargetInfo *createPTXMCSubtargetInfo(StringRef TT, StringRef CPU,
|
||||
StringRef FS) {
|
||||
MCSubtargetInfo *X = new MCSubtargetInfo();
|
||||
InitPTXMCSubtargetInfo(X, TT, CPU, FS);
|
||||
return X;
|
||||
}
|
||||
|
||||
static MCCodeGenInfo *createPTXMCCodeGenInfo(StringRef TT, Reloc::Model RM,
|
||||
CodeModel::Model CM,
|
||||
CodeGenOpt::Level OL) {
|
||||
MCCodeGenInfo *X = new MCCodeGenInfo();
|
||||
X->InitMCCodeGenInfo(RM, CM, OL);
|
||||
return X;
|
||||
}
|
||||
|
||||
static MCInstPrinter *createPTXMCInstPrinter(const Target &T,
|
||||
unsigned SyntaxVariant,
|
||||
const MCAsmInfo &MAI,
|
||||
const MCInstrInfo &MII,
|
||||
const MCRegisterInfo &MRI,
|
||||
const MCSubtargetInfo &STI) {
|
||||
assert(SyntaxVariant == 0 && "We only have one syntax variant");
|
||||
return new PTXInstPrinter(MAI, MII, MRI, STI);
|
||||
}
|
||||
|
||||
extern "C" void LLVMInitializePTXTargetMC() {
|
||||
// Register the MC asm info.
|
||||
RegisterMCAsmInfo<PTXMCAsmInfo> X(ThePTX32Target);
|
||||
RegisterMCAsmInfo<PTXMCAsmInfo> Y(ThePTX64Target);
|
||||
|
||||
// Register the MC codegen info.
|
||||
TargetRegistry::RegisterMCCodeGenInfo(ThePTX32Target, createPTXMCCodeGenInfo);
|
||||
TargetRegistry::RegisterMCCodeGenInfo(ThePTX64Target, createPTXMCCodeGenInfo);
|
||||
|
||||
// Register the MC instruction info.
|
||||
TargetRegistry::RegisterMCInstrInfo(ThePTX32Target, createPTXMCInstrInfo);
|
||||
TargetRegistry::RegisterMCInstrInfo(ThePTX64Target, createPTXMCInstrInfo);
|
||||
|
||||
// Register the MC register info.
|
||||
TargetRegistry::RegisterMCRegInfo(ThePTX32Target, createPTXMCRegisterInfo);
|
||||
TargetRegistry::RegisterMCRegInfo(ThePTX64Target, createPTXMCRegisterInfo);
|
||||
|
||||
// Register the MC subtarget info.
|
||||
TargetRegistry::RegisterMCSubtargetInfo(ThePTX32Target,
|
||||
createPTXMCSubtargetInfo);
|
||||
TargetRegistry::RegisterMCSubtargetInfo(ThePTX64Target,
|
||||
createPTXMCSubtargetInfo);
|
||||
|
||||
// Register the MCInstPrinter.
|
||||
TargetRegistry::RegisterMCInstPrinter(ThePTX32Target, createPTXMCInstPrinter);
|
||||
TargetRegistry::RegisterMCInstPrinter(ThePTX64Target, createPTXMCInstPrinter);
|
||||
}
|
@ -1,36 +0,0 @@
|
||||
//===-- PTXMCTargetDesc.h - PTX Target Descriptions ------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file provides PTX specific target descriptions.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef PTXMCTARGETDESC_H
|
||||
#define PTXMCTARGETDESC_H
|
||||
|
||||
namespace llvm {
|
||||
class Target;
|
||||
|
||||
extern Target ThePTX32Target;
|
||||
extern Target ThePTX64Target;
|
||||
|
||||
} // End llvm namespace
|
||||
|
||||
// Defines symbolic names for PTX registers.
|
||||
#define GET_REGINFO_ENUM
|
||||
#include "PTXGenRegisterInfo.inc"
|
||||
|
||||
// Defines symbolic names for the PTX instructions.
|
||||
#define GET_INSTRINFO_ENUM
|
||||
#include "PTXGenInstrInfo.inc"
|
||||
|
||||
#define GET_SUBTARGETINFO_ENUM
|
||||
#include "PTXGenSubtargetInfo.inc"
|
||||
|
||||
#endif
|
@ -1,23 +0,0 @@
|
||||
##===- lib/Target/PTX/Makefile -----------------------------*- Makefile -*-===##
|
||||
#
|
||||
# The LLVM Compiler Infrastructure
|
||||
#
|
||||
# This file is distributed under the University of Illinois Open Source
|
||||
# License. See LICENSE.TXT for details.
|
||||
#
|
||||
##===----------------------------------------------------------------------===##
|
||||
|
||||
LEVEL = ../../..
|
||||
LIBRARYNAME = LLVMPTXCodeGen
|
||||
TARGET = PTX
|
||||
|
||||
# Make sure that tblgen is run, first thing.
|
||||
BUILT_SOURCES = PTXGenAsmWriter.inc \
|
||||
PTXGenDAGISel.inc \
|
||||
PTXGenInstrInfo.inc \
|
||||
PTXGenRegisterInfo.inc \
|
||||
PTXGenSubtargetInfo.inc
|
||||
|
||||
DIRS = InstPrinter TargetInfo MCTargetDesc
|
||||
|
||||
include $(LEVEL)/Makefile.common
|
@ -1,43 +0,0 @@
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the entry points for global functions defined in the LLVM
|
||||
// PTX back-end.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef PTX_H
|
||||
#define PTX_H
|
||||
|
||||
#include "MCTargetDesc/PTXBaseInfo.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
|
||||
namespace llvm {
|
||||
class MachineInstr;
|
||||
class MCInst;
|
||||
class PTXAsmPrinter;
|
||||
class PTXTargetMachine;
|
||||
class FunctionPass;
|
||||
|
||||
FunctionPass *createPTXISelDag(PTXTargetMachine &TM,
|
||||
CodeGenOpt::Level OptLevel);
|
||||
|
||||
FunctionPass *createPTXMFInfoExtract(PTXTargetMachine &TM,
|
||||
CodeGenOpt::Level OptLevel);
|
||||
|
||||
FunctionPass *createPTXFPRoundingModePass(PTXTargetMachine &TM,
|
||||
CodeGenOpt::Level OptLevel);
|
||||
|
||||
FunctionPass *createPTXRegisterAllocator();
|
||||
|
||||
void LowerPTXMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
|
||||
PTXAsmPrinter &AP);
|
||||
|
||||
} // namespace llvm;
|
||||
|
||||
#endif // PTX_H
|
@ -1,141 +0,0 @@
|
||||
//===-- PTX.td - Describe the PTX Target Machine -----------*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
// This is the top level entry point for the PTX target.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Target-independent interfaces
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
include "llvm/Target/Target.td"
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Subtarget Features
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===- Architectural Features ---------------------------------------------===//
|
||||
|
||||
def FeatureDouble : SubtargetFeature<"double", "SupportsDouble", "true",
|
||||
"Do not demote .f64 to .f32">;
|
||||
|
||||
def FeatureNoFMA : SubtargetFeature<"no-fma","SupportsFMA", "false",
|
||||
"Disable Fused-Multiply Add">;
|
||||
|
||||
//===- PTX Version --------------------------------------------------------===//
|
||||
|
||||
def FeaturePTX20 : SubtargetFeature<"ptx20", "PTXVersion", "PTX_VERSION_2_0",
|
||||
"Use PTX Language Version 2.0">;
|
||||
|
||||
def FeaturePTX21 : SubtargetFeature<"ptx21", "PTXVersion", "PTX_VERSION_2_1",
|
||||
"Use PTX Language Version 2.1">;
|
||||
|
||||
def FeaturePTX22 : SubtargetFeature<"ptx22", "PTXVersion", "PTX_VERSION_2_2",
|
||||
"Use PTX Language Version 2.2">;
|
||||
|
||||
def FeaturePTX23 : SubtargetFeature<"ptx23", "PTXVersion", "PTX_VERSION_2_3",
|
||||
"Use PTX Language Version 2.3">;
|
||||
|
||||
//===- PTX Target ---------------------------------------------------------===//
|
||||
|
||||
def FeatureSM10 : SubtargetFeature<"sm10", "PTXTarget", "PTX_SM_1_0",
|
||||
"Use Shader Model 1.0">;
|
||||
def FeatureSM11 : SubtargetFeature<"sm11", "PTXTarget", "PTX_SM_1_1",
|
||||
"Use Shader Model 1.1">;
|
||||
def FeatureSM12 : SubtargetFeature<"sm12", "PTXTarget", "PTX_SM_1_2",
|
||||
"Use Shader Model 1.2">;
|
||||
def FeatureSM13 : SubtargetFeature<"sm13", "PTXTarget", "PTX_SM_1_3",
|
||||
"Use Shader Model 1.3">;
|
||||
def FeatureSM20 : SubtargetFeature<"sm20", "PTXTarget", "PTX_SM_2_0",
|
||||
"Use Shader Model 2.0", [FeatureDouble]>;
|
||||
def FeatureSM21 : SubtargetFeature<"sm21", "PTXTarget", "PTX_SM_2_1",
|
||||
"Use Shader Model 2.1", [FeatureDouble]>;
|
||||
def FeatureSM22 : SubtargetFeature<"sm22", "PTXTarget", "PTX_SM_2_2",
|
||||
"Use Shader Model 2.2", [FeatureDouble]>;
|
||||
def FeatureSM23 : SubtargetFeature<"sm23", "PTXTarget", "PTX_SM_2_3",
|
||||
"Use Shader Model 2.3", [FeatureDouble]>;
|
||||
|
||||
def FeatureCOMPUTE10 : SubtargetFeature<"compute10", "PTXTarget",
|
||||
"PTX_COMPUTE_1_0",
|
||||
"Use Compute Compatibility 1.0">;
|
||||
def FeatureCOMPUTE11 : SubtargetFeature<"compute11", "PTXTarget",
|
||||
"PTX_COMPUTE_1_1",
|
||||
"Use Compute Compatibility 1.1">;
|
||||
def FeatureCOMPUTE12 : SubtargetFeature<"compute12", "PTXTarget",
|
||||
"PTX_COMPUTE_1_2",
|
||||
"Use Compute Compatibility 1.2">;
|
||||
def FeatureCOMPUTE13 : SubtargetFeature<"compute13", "PTXTarget",
|
||||
"PTX_COMPUTE_1_3",
|
||||
"Use Compute Compatibility 1.3">;
|
||||
def FeatureCOMPUTE20 : SubtargetFeature<"compute20", "PTXTarget",
|
||||
"PTX_COMPUTE_2_0",
|
||||
"Use Compute Compatibility 2.0",
|
||||
[FeatureDouble]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// PTX supported processors
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class Proc<string Name, list<SubtargetFeature> Features>
|
||||
: Processor<Name, NoItineraries, Features>;
|
||||
|
||||
def : Proc<"generic", []>;
|
||||
|
||||
// Processor definitions for compute/shader models
|
||||
def : Proc<"compute_10", [FeatureCOMPUTE10]>;
|
||||
def : Proc<"compute_11", [FeatureCOMPUTE11]>;
|
||||
def : Proc<"compute_12", [FeatureCOMPUTE12]>;
|
||||
def : Proc<"compute_13", [FeatureCOMPUTE13]>;
|
||||
def : Proc<"compute_20", [FeatureCOMPUTE20]>;
|
||||
def : Proc<"sm_10", [FeatureSM10]>;
|
||||
def : Proc<"sm_11", [FeatureSM11]>;
|
||||
def : Proc<"sm_12", [FeatureSM12]>;
|
||||
def : Proc<"sm_13", [FeatureSM13]>;
|
||||
def : Proc<"sm_20", [FeatureSM20]>;
|
||||
def : Proc<"sm_21", [FeatureSM21]>;
|
||||
def : Proc<"sm_22", [FeatureSM22]>;
|
||||
def : Proc<"sm_23", [FeatureSM23]>;
|
||||
|
||||
// Processor definitions for common GPU architectures
|
||||
def : Proc<"g80", [FeatureSM10]>;
|
||||
def : Proc<"gt200", [FeatureSM13]>;
|
||||
def : Proc<"gf100", [FeatureSM20, FeatureDouble]>;
|
||||
def : Proc<"fermi", [FeatureSM20, FeatureDouble]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Register File Description
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
include "PTXRegisterInfo.td"
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Instruction Descriptions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
include "PTXInstrInfo.td"
|
||||
|
||||
def PTXInstrInfo : InstrInfo;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Assembly printer
|
||||
//===----------------------------------------------------------------------===//
|
||||
// PTX uses the MC printer for asm output, so make sure the TableGen
|
||||
// AsmWriter bits get associated with the correct class.
|
||||
def PTXAsmWriter : AsmWriter {
|
||||
string AsmWriterClassName = "InstPrinter";
|
||||
bit isMCAsmWriter = 1;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Target Declaration
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def PTX : Target {
|
||||
let InstructionSet = PTXInstrInfo;
|
||||
let AssemblyWriters = [PTXAsmWriter];
|
||||
}
|
@ -1,561 +0,0 @@
|
||||
//===-- PTXAsmPrinter.cpp - PTX LLVM assembly writer ----------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains a printer that converts from our internal representation
|
||||
// of machine-dependent LLVM code to PTX assembly language.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#define DEBUG_TYPE "ptx-asm-printer"
|
||||
|
||||
#include "PTXAsmPrinter.h"
|
||||
#include "PTX.h"
|
||||
#include "PTXMachineFunctionInfo.h"
|
||||
#include "PTXParamManager.h"
|
||||
#include "PTXRegisterInfo.h"
|
||||
#include "PTXTargetMachine.h"
|
||||
#include "llvm/Argument.h"
|
||||
#include "llvm/DerivedTypes.h"
|
||||
#include "llvm/Function.h"
|
||||
#include "llvm/Module.h"
|
||||
#include "llvm/ADT/SmallString.h"
|
||||
#include "llvm/ADT/Twine.h"
|
||||
#include "llvm/Analysis/DebugInfo.h"
|
||||
#include "llvm/CodeGen/AsmPrinter.h"
|
||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
#include "llvm/CodeGen/MachineInstr.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/MC/MCContext.h"
|
||||
#include "llvm/MC/MCExpr.h"
|
||||
#include "llvm/MC/MCInst.h"
|
||||
#include "llvm/MC/MCStreamer.h"
|
||||
#include "llvm/MC/MCSymbol.h"
|
||||
#include "llvm/Target/Mangler.h"
|
||||
#include "llvm/Target/TargetLoweringObjectFile.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/MathExtras.h"
|
||||
#include "llvm/Support/Path.h"
|
||||
#include "llvm/Support/TargetRegistry.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
static const char PARAM_PREFIX[] = "__param_";
|
||||
static const char RETURN_PREFIX[] = "__ret_";
|
||||
|
||||
static const char *getRegisterTypeName(unsigned RegType) {
|
||||
switch (RegType) {
|
||||
default:
|
||||
llvm_unreachable("Unknown register type");
|
||||
case PTXRegisterType::Pred:
|
||||
return ".pred";
|
||||
case PTXRegisterType::B16:
|
||||
return ".b16";
|
||||
case PTXRegisterType::B32:
|
||||
return ".b32";
|
||||
case PTXRegisterType::B64:
|
||||
return ".b64";
|
||||
case PTXRegisterType::F32:
|
||||
return ".f32";
|
||||
case PTXRegisterType::F64:
|
||||
return ".f64";
|
||||
}
|
||||
}
|
||||
|
||||
static const char *getStateSpaceName(unsigned addressSpace) {
|
||||
switch (addressSpace) {
|
||||
default: llvm_unreachable("Unknown state space");
|
||||
case PTXStateSpace::Global: return "global";
|
||||
case PTXStateSpace::Constant: return "const";
|
||||
case PTXStateSpace::Local: return "local";
|
||||
case PTXStateSpace::Parameter: return "param";
|
||||
case PTXStateSpace::Shared: return "shared";
|
||||
}
|
||||
}
|
||||
|
||||
static const char *getTypeName(Type* type) {
|
||||
while (true) {
|
||||
switch (type->getTypeID()) {
|
||||
default: llvm_unreachable("Unknown type");
|
||||
case Type::FloatTyID: return ".f32";
|
||||
case Type::DoubleTyID: return ".f64";
|
||||
case Type::IntegerTyID:
|
||||
switch (type->getPrimitiveSizeInBits()) {
|
||||
default: llvm_unreachable("Unknown integer bit-width");
|
||||
case 16: return ".u16";
|
||||
case 32: return ".u32";
|
||||
case 64: return ".u64";
|
||||
}
|
||||
case Type::ArrayTyID:
|
||||
case Type::PointerTyID:
|
||||
type = dyn_cast<SequentialType>(type)->getElementType();
|
||||
break;
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bool PTXAsmPrinter::doFinalization(Module &M) {
|
||||
// XXX Temproarily remove global variables so that doFinalization() will not
|
||||
// emit them again (global variables are emitted at beginning).
|
||||
|
||||
Module::GlobalListType &global_list = M.getGlobalList();
|
||||
int i, n = global_list.size();
|
||||
GlobalVariable **gv_array = new GlobalVariable* [n];
|
||||
|
||||
// first, back-up GlobalVariable in gv_array
|
||||
i = 0;
|
||||
for (Module::global_iterator I = global_list.begin(), E = global_list.end();
|
||||
I != E; ++I)
|
||||
gv_array[i++] = &*I;
|
||||
|
||||
// second, empty global_list
|
||||
while (!global_list.empty())
|
||||
global_list.remove(global_list.begin());
|
||||
|
||||
// call doFinalization
|
||||
bool ret = AsmPrinter::doFinalization(M);
|
||||
|
||||
// now we restore global variables
|
||||
for (i = 0; i < n; i ++)
|
||||
global_list.insert(global_list.end(), gv_array[i]);
|
||||
|
||||
delete[] gv_array;
|
||||
return ret;
|
||||
}
|
||||
|
||||
void PTXAsmPrinter::EmitStartOfAsmFile(Module &M)
|
||||
{
|
||||
const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>();
|
||||
|
||||
// Emit the PTX .version and .target attributes
|
||||
OutStreamer.EmitRawText(Twine("\t.version ") + ST.getPTXVersionString());
|
||||
OutStreamer.EmitRawText(Twine("\t.target ") + ST.getTargetString() +
|
||||
(ST.supportsDouble() ? ""
|
||||
: ", map_f64_to_f32"));
|
||||
// .address_size directive is optional, but it must immediately follow
|
||||
// the .target directive if present within a module
|
||||
if (ST.supportsPTX23()) {
|
||||
const char *addrSize = ST.is64Bit() ? "64" : "32";
|
||||
OutStreamer.EmitRawText(Twine("\t.address_size ") + addrSize);
|
||||
}
|
||||
|
||||
OutStreamer.AddBlankLine();
|
||||
|
||||
// Define any .file directives
|
||||
DebugInfoFinder DbgFinder;
|
||||
DbgFinder.processModule(M);
|
||||
|
||||
for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
|
||||
E = DbgFinder.compile_unit_end(); I != E; ++I) {
|
||||
DICompileUnit DIUnit(*I);
|
||||
StringRef FN = DIUnit.getFilename();
|
||||
StringRef Dir = DIUnit.getDirectory();
|
||||
GetOrCreateSourceID(FN, Dir);
|
||||
}
|
||||
|
||||
OutStreamer.AddBlankLine();
|
||||
|
||||
// declare external functions
|
||||
for (Module::const_iterator i = M.begin(), e = M.end();
|
||||
i != e; ++i)
|
||||
EmitFunctionDeclaration(i);
|
||||
|
||||
// declare global variables
|
||||
for (Module::const_global_iterator i = M.global_begin(), e = M.global_end();
|
||||
i != e; ++i)
|
||||
EmitVariableDeclaration(i);
|
||||
}
|
||||
|
||||
void PTXAsmPrinter::EmitFunctionBodyStart() {
|
||||
OutStreamer.EmitRawText(Twine("{"));
|
||||
|
||||
const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>();
|
||||
const PTXParamManager &PM = MFI->getParamManager();
|
||||
|
||||
// Print register definitions
|
||||
SmallString<128> regDefs;
|
||||
raw_svector_ostream os(regDefs);
|
||||
unsigned numRegs;
|
||||
|
||||
// pred
|
||||
numRegs = MFI->countRegisters(PTXRegisterType::Pred, PTXRegisterSpace::Reg);
|
||||
if(numRegs > 0)
|
||||
os << "\t.reg .pred %p<" << numRegs << ">;\n";
|
||||
|
||||
// i16
|
||||
numRegs = MFI->countRegisters(PTXRegisterType::B16, PTXRegisterSpace::Reg);
|
||||
if(numRegs > 0)
|
||||
os << "\t.reg .b16 %rh<" << numRegs << ">;\n";
|
||||
|
||||
// i32
|
||||
numRegs = MFI->countRegisters(PTXRegisterType::B32, PTXRegisterSpace::Reg);
|
||||
if(numRegs > 0)
|
||||
os << "\t.reg .b32 %r<" << numRegs << ">;\n";
|
||||
|
||||
// i64
|
||||
numRegs = MFI->countRegisters(PTXRegisterType::B64, PTXRegisterSpace::Reg);
|
||||
if(numRegs > 0)
|
||||
os << "\t.reg .b64 %rd<" << numRegs << ">;\n";
|
||||
|
||||
// f32
|
||||
numRegs = MFI->countRegisters(PTXRegisterType::F32, PTXRegisterSpace::Reg);
|
||||
if(numRegs > 0)
|
||||
os << "\t.reg .f32 %f<" << numRegs << ">;\n";
|
||||
|
||||
// f64
|
||||
numRegs = MFI->countRegisters(PTXRegisterType::F64, PTXRegisterSpace::Reg);
|
||||
if(numRegs > 0)
|
||||
os << "\t.reg .f64 %fd<" << numRegs << ">;\n";
|
||||
|
||||
// Local params
|
||||
for (PTXParamManager::param_iterator i = PM.local_begin(), e = PM.local_end();
|
||||
i != e; ++i)
|
||||
os << "\t.param .b" << PM.getParamSize(*i) << ' ' << PM.getParamName(*i)
|
||||
<< ";\n";
|
||||
|
||||
OutStreamer.EmitRawText(os.str());
|
||||
|
||||
|
||||
const MachineFrameInfo* FrameInfo = MF->getFrameInfo();
|
||||
DEBUG(dbgs() << "Have " << FrameInfo->getNumObjects()
|
||||
<< " frame object(s)\n");
|
||||
for (unsigned i = 0, e = FrameInfo->getNumObjects(); i != e; ++i) {
|
||||
DEBUG(dbgs() << "Size of object: " << FrameInfo->getObjectSize(i) << "\n");
|
||||
if (FrameInfo->getObjectSize(i) > 0) {
|
||||
OutStreamer.EmitRawText("\t.local .align " +
|
||||
Twine(FrameInfo->getObjectAlignment(i)) +
|
||||
" .b8 __local" +
|
||||
Twine(i) +
|
||||
"[" +
|
||||
Twine(FrameInfo->getObjectSize(i)) +
|
||||
"];");
|
||||
}
|
||||
}
|
||||
|
||||
//unsigned Index = 1;
|
||||
// Print parameter passing params
|
||||
//for (PTXMachineFunctionInfo::param_iterator
|
||||
// i = MFI->paramBegin(), e = MFI->paramEnd(); i != e; ++i) {
|
||||
// std::string def = "\t.param .b";
|
||||
// def += utostr(*i);
|
||||
// def += " __ret_";
|
||||
// def += utostr(Index);
|
||||
// Index++;
|
||||
// def += ";";
|
||||
// OutStreamer.EmitRawText(Twine(def));
|
||||
//}
|
||||
}
|
||||
|
||||
void PTXAsmPrinter::EmitFunctionBodyEnd() {
|
||||
OutStreamer.EmitRawText(Twine("}"));
|
||||
}
|
||||
|
||||
void PTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
|
||||
MCInst TmpInst;
|
||||
LowerPTXMachineInstrToMCInst(MI, TmpInst, *this);
|
||||
OutStreamer.EmitInstruction(TmpInst);
|
||||
}
|
||||
|
||||
void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {
|
||||
// Check to see if this is a special global used by LLVM, if so, emit it.
|
||||
if (EmitSpecialLLVMGlobal(gv))
|
||||
return;
|
||||
|
||||
MCSymbol *gvsym = Mang->getSymbol(gv);
|
||||
|
||||
assert(gvsym->isUndefined() && "Cannot define a symbol twice!");
|
||||
|
||||
SmallString<128> decl;
|
||||
raw_svector_ostream os(decl);
|
||||
|
||||
// check if it is defined in some other translation unit
|
||||
if (gv->isDeclaration())
|
||||
os << ".extern ";
|
||||
|
||||
// state space: e.g., .global
|
||||
os << '.' << getStateSpaceName(gv->getType()->getAddressSpace()) << ' ';
|
||||
|
||||
// alignment (optional)
|
||||
unsigned alignment = gv->getAlignment();
|
||||
if (alignment != 0)
|
||||
os << ".align " << gv->getAlignment() << ' ';
|
||||
|
||||
|
||||
if (PointerType::classof(gv->getType())) {
|
||||
PointerType* pointerTy = dyn_cast<PointerType>(gv->getType());
|
||||
Type* elementTy = pointerTy->getElementType();
|
||||
|
||||
if (elementTy->isArrayTy()) {
|
||||
assert(elementTy->isArrayTy() && "Only pointers to arrays are supported");
|
||||
|
||||
ArrayType* arrayTy = dyn_cast<ArrayType>(elementTy);
|
||||
elementTy = arrayTy->getElementType();
|
||||
|
||||
unsigned numElements = arrayTy->getNumElements();
|
||||
|
||||
while (elementTy->isArrayTy()) {
|
||||
arrayTy = dyn_cast<ArrayType>(elementTy);
|
||||
elementTy = arrayTy->getElementType();
|
||||
|
||||
numElements *= arrayTy->getNumElements();
|
||||
}
|
||||
|
||||
// FIXME: isPrimitiveType() == false for i16?
|
||||
assert(elementTy->isSingleValueType() &&
|
||||
"Non-primitive types are not handled");
|
||||
|
||||
// Find the size of the element in bits
|
||||
unsigned elementSize = elementTy->getPrimitiveSizeInBits();
|
||||
|
||||
os << ".b" << elementSize << ' ' << gvsym->getName()
|
||||
<< '[' << numElements << ']';
|
||||
} else {
|
||||
os << ".b8" << gvsym->getName() << "[]";
|
||||
}
|
||||
|
||||
// handle string constants (assume ConstantArray means string)
|
||||
if (gv->hasInitializer()) {
|
||||
const Constant *C = gv->getInitializer();
|
||||
if (const ConstantArray *CA = dyn_cast<ConstantArray>(C)) {
|
||||
os << " = {";
|
||||
|
||||
for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
|
||||
if (i > 0)
|
||||
os << ',';
|
||||
|
||||
os << "0x";
|
||||
os.write_hex(cast<ConstantInt>(CA->getOperand(i))->getZExtValue());
|
||||
}
|
||||
|
||||
os << '}';
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Note: this is currently the fall-through case and most likely generates
|
||||
// incorrect code.
|
||||
os << getTypeName(gv->getType()) << ' ' << gvsym->getName();
|
||||
|
||||
if (isa<ArrayType>(gv->getType()) || isa<PointerType>(gv->getType()))
|
||||
os << "[]";
|
||||
}
|
||||
|
||||
os << ';';
|
||||
|
||||
OutStreamer.EmitRawText(os.str());
|
||||
OutStreamer.AddBlankLine();
|
||||
}
|
||||
|
||||
void PTXAsmPrinter::EmitFunctionEntryLabel() {
|
||||
// The function label could have already been emitted if two symbols end up
|
||||
// conflicting due to asm renaming. Detect this and emit an error.
|
||||
if (!CurrentFnSym->isUndefined())
|
||||
report_fatal_error("'" + Twine(CurrentFnSym->getName()) +
|
||||
"' label emitted multiple times to assembly file");
|
||||
|
||||
const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>();
|
||||
const PTXParamManager &PM = MFI->getParamManager();
|
||||
const bool isKernel = MFI->isKernel();
|
||||
const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>();
|
||||
|
||||
SmallString<128> decl;
|
||||
raw_svector_ostream os(decl);
|
||||
os << (isKernel ? ".entry" : ".func");
|
||||
|
||||
if (!isKernel) {
|
||||
os << " (";
|
||||
if (ST.useParamSpaceForDeviceArgs()) {
|
||||
for (PTXParamManager::param_iterator i = PM.ret_begin(), e = PM.ret_end(),
|
||||
b = i; i != e; ++i) {
|
||||
if (i != b)
|
||||
os << ", ";
|
||||
|
||||
os << ".param .b" << PM.getParamSize(*i) << ' ' << PM.getParamName(*i);
|
||||
}
|
||||
} else {
|
||||
for (PTXMachineFunctionInfo::reg_iterator
|
||||
i = MFI->retreg_begin(), e = MFI->retreg_end(), b = i;
|
||||
i != e; ++i) {
|
||||
if (i != b)
|
||||
os << ", ";
|
||||
|
||||
os << ".reg " << getRegisterTypeName(MFI->getRegisterType(*i)) << ' '
|
||||
<< MFI->getRegisterName(*i);
|
||||
}
|
||||
}
|
||||
os << ')';
|
||||
}
|
||||
|
||||
// Print function name
|
||||
os << ' ' << CurrentFnSym->getName() << " (";
|
||||
|
||||
const Function *F = MF->getFunction();
|
||||
|
||||
// Print parameters
|
||||
if (isKernel || ST.useParamSpaceForDeviceArgs()) {
|
||||
/*for (PTXParamManager::param_iterator i = PM.arg_begin(), e = PM.arg_end(),
|
||||
b = i; i != e; ++i) {
|
||||
if (i != b)
|
||||
os << ", ";
|
||||
|
||||
os << ".param .b" << PM.getParamSize(*i) << ' ' << PM.getParamName(*i);
|
||||
}*/
|
||||
int Counter = 1;
|
||||
for (Function::const_arg_iterator i = F->arg_begin(), e = F->arg_end(),
|
||||
b = i; i != e; ++i) {
|
||||
if (i != b)
|
||||
os << ", ";
|
||||
const Type *ArgType = (*i).getType();
|
||||
os << ".param .b";
|
||||
if (ArgType->isPointerTy()) {
|
||||
if (ST.is64Bit())
|
||||
os << "64";
|
||||
else
|
||||
os << "32";
|
||||
} else {
|
||||
os << ArgType->getPrimitiveSizeInBits();
|
||||
}
|
||||
if (ArgType->isPointerTy() && ST.emitPtrAttribute()) {
|
||||
const PointerType *PtrType = dyn_cast<const PointerType>(ArgType);
|
||||
os << " .ptr";
|
||||
switch (PtrType->getAddressSpace()) {
|
||||
default:
|
||||
llvm_unreachable("Unknown address space in argument");
|
||||
case PTXStateSpace::Global:
|
||||
os << " .global";
|
||||
break;
|
||||
case PTXStateSpace::Shared:
|
||||
os << " .shared";
|
||||
break;
|
||||
}
|
||||
}
|
||||
os << " __param_" << Counter++;
|
||||
}
|
||||
} else {
|
||||
for (PTXMachineFunctionInfo::reg_iterator
|
||||
i = MFI->argreg_begin(), e = MFI->argreg_end(), b = i;
|
||||
i != e; ++i) {
|
||||
if (i != b)
|
||||
os << ", ";
|
||||
|
||||
os << ".reg " << getRegisterTypeName(MFI->getRegisterType(*i)) << ' '
|
||||
<< MFI->getRegisterName(*i);
|
||||
}
|
||||
}
|
||||
os << ')';
|
||||
|
||||
OutStreamer.EmitRawText(os.str());
|
||||
}
|
||||
|
||||
void PTXAsmPrinter::EmitFunctionDeclaration(const Function* func)
|
||||
{
|
||||
const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>();
|
||||
|
||||
std::string decl = "";
|
||||
|
||||
// hard-coded emission of extern vprintf function
|
||||
|
||||
if (func->getName() == "printf" || func->getName() == "puts") {
|
||||
decl += ".extern .func (.param .b32 __param_1) vprintf (.param .b";
|
||||
if (ST.is64Bit())
|
||||
decl += "64";
|
||||
else
|
||||
decl += "32";
|
||||
decl += " __param_2, .param .b";
|
||||
if (ST.is64Bit())
|
||||
decl += "64";
|
||||
else
|
||||
decl += "32";
|
||||
decl += " __param_3)\n";
|
||||
}
|
||||
|
||||
OutStreamer.EmitRawText(Twine(decl));
|
||||
}
|
||||
|
||||
unsigned PTXAsmPrinter::GetOrCreateSourceID(StringRef FileName,
|
||||
StringRef DirName) {
|
||||
// If FE did not provide a file name, then assume stdin.
|
||||
if (FileName.empty())
|
||||
return GetOrCreateSourceID("<stdin>", StringRef());
|
||||
|
||||
// MCStream expects full path name as filename.
|
||||
if (!DirName.empty() && !sys::path::is_absolute(FileName)) {
|
||||
SmallString<128> FullPathName = DirName;
|
||||
sys::path::append(FullPathName, FileName);
|
||||
// Here FullPathName will be copied into StringMap by GetOrCreateSourceID.
|
||||
return GetOrCreateSourceID(StringRef(FullPathName), StringRef());
|
||||
}
|
||||
|
||||
StringMapEntry<unsigned> &Entry = SourceIdMap.GetOrCreateValue(FileName);
|
||||
if (Entry.getValue())
|
||||
return Entry.getValue();
|
||||
|
||||
unsigned SrcId = SourceIdMap.size();
|
||||
Entry.setValue(SrcId);
|
||||
|
||||
// Print out a .file directive to specify files for .loc directives.
|
||||
OutStreamer.EmitDwarfFileDirective(SrcId, "", Entry.getKey());
|
||||
|
||||
return SrcId;
|
||||
}
|
||||
|
||||
MCOperand PTXAsmPrinter::GetSymbolRef(const MachineOperand &MO,
|
||||
const MCSymbol *Symbol) {
|
||||
const MCExpr *Expr;
|
||||
Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, OutContext);
|
||||
return MCOperand::CreateExpr(Expr);
|
||||
}
|
||||
|
||||
MCOperand PTXAsmPrinter::lowerOperand(const MachineOperand &MO) {
|
||||
MCOperand MCOp;
|
||||
const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>();
|
||||
unsigned EncodedReg;
|
||||
switch (MO.getType()) {
|
||||
default:
|
||||
llvm_unreachable("Unknown operand type");
|
||||
case MachineOperand::MO_Register:
|
||||
if (MO.getReg() > 0) {
|
||||
// Encode the register
|
||||
EncodedReg = MFI->getEncodedRegister(MO.getReg());
|
||||
} else {
|
||||
EncodedReg = 0;
|
||||
}
|
||||
MCOp = MCOperand::CreateReg(EncodedReg);
|
||||
break;
|
||||
case MachineOperand::MO_Immediate:
|
||||
MCOp = MCOperand::CreateImm(MO.getImm());
|
||||
break;
|
||||
case MachineOperand::MO_MachineBasicBlock:
|
||||
MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
|
||||
MO.getMBB()->getSymbol(), OutContext));
|
||||
break;
|
||||
case MachineOperand::MO_GlobalAddress:
|
||||
MCOp = GetSymbolRef(MO, Mang->getSymbol(MO.getGlobal()));
|
||||
break;
|
||||
case MachineOperand::MO_ExternalSymbol:
|
||||
MCOp = GetSymbolRef(MO, GetExternalSymbolSymbol(MO.getSymbolName()));
|
||||
break;
|
||||
case MachineOperand::MO_FPImmediate:
|
||||
APFloat Val = MO.getFPImm()->getValueAPF();
|
||||
bool ignored;
|
||||
Val.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &ignored);
|
||||
MCOp = MCOperand::CreateFPImm(Val.convertToDouble());
|
||||
break;
|
||||
}
|
||||
|
||||
return MCOp;
|
||||
}
|
||||
|
||||
// Force static initialization.
|
||||
extern "C" void LLVMInitializePTXAsmPrinter() {
|
||||
RegisterAsmPrinter<PTXAsmPrinter> X(ThePTX32Target);
|
||||
RegisterAsmPrinter<PTXAsmPrinter> Y(ThePTX64Target);
|
||||
}
|
@ -1,57 +0,0 @@
|
||||
//===-- PTXAsmPrinter.h - Print machine code to a PTX file ------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// PTX Assembly printer class.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef PTXASMPRINTER_H
|
||||
#define PTXASMPRINTER_H
|
||||
|
||||
#include "PTX.h"
|
||||
#include "PTXTargetMachine.h"
|
||||
#include "llvm/ADT/StringMap.h"
|
||||
#include "llvm/CodeGen/AsmPrinter.h"
|
||||
#include "llvm/Support/Compiler.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class MCOperand;
|
||||
|
||||
class LLVM_LIBRARY_VISIBILITY PTXAsmPrinter : public AsmPrinter {
|
||||
public:
|
||||
explicit PTXAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
|
||||
: AsmPrinter(TM, Streamer) {}
|
||||
|
||||
const char *getPassName() const { return "PTX Assembly Printer"; }
|
||||
|
||||
bool doFinalization(Module &M);
|
||||
|
||||
virtual void EmitStartOfAsmFile(Module &M);
|
||||
virtual void EmitFunctionBodyStart();
|
||||
virtual void EmitFunctionBodyEnd();
|
||||
virtual void EmitFunctionEntryLabel();
|
||||
virtual void EmitInstruction(const MachineInstr *MI);
|
||||
|
||||
unsigned GetOrCreateSourceID(StringRef FileName,
|
||||
StringRef DirName);
|
||||
|
||||
MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol);
|
||||
MCOperand lowerOperand(const MachineOperand &MO);
|
||||
|
||||
private:
|
||||
void EmitVariableDeclaration(const GlobalVariable *gv);
|
||||
void EmitFunctionDeclaration(const Function* func);
|
||||
|
||||
StringMap<unsigned> SourceIdMap;
|
||||
}; // class PTXAsmPrinter
|
||||
} // namespace llvm
|
||||
|
||||
#endif
|
||||
|
@ -1,181 +0,0 @@
|
||||
//===-- PTXFPRoundingModePass.cpp - Assign rounding modes pass ------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines a machine function pass that sets appropriate FP rounding
|
||||
// modes for all relevant instructions.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#define DEBUG_TYPE "ptx-fp-rounding-mode"
|
||||
|
||||
#include "PTX.h"
|
||||
#include "PTXTargetMachine.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
// NOTE: PTXFPRoundingModePass should be executed just before emission.
|
||||
|
||||
namespace {
|
||||
/// PTXFPRoundingModePass - Pass to assign appropriate FP rounding modes to
|
||||
/// all FP instructions. Essentially, this pass just looks for all FP
|
||||
/// instructions that have a rounding mode set to RndDefault, and sets an
|
||||
/// appropriate rounding mode based on the target device.
|
||||
///
|
||||
class PTXFPRoundingModePass : public MachineFunctionPass {
|
||||
private:
|
||||
static char ID;
|
||||
|
||||
typedef std::pair<unsigned, unsigned> RndModeDesc;
|
||||
|
||||
PTXTargetMachine& TargetMachine;
|
||||
DenseMap<unsigned, RndModeDesc> Instrs;
|
||||
|
||||
public:
|
||||
PTXFPRoundingModePass(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel)
|
||||
: MachineFunctionPass(ID),
|
||||
TargetMachine(TM) {
|
||||
initializeMap();
|
||||
}
|
||||
|
||||
virtual bool runOnMachineFunction(MachineFunction &MF);
|
||||
|
||||
virtual const char *getPassName() const {
|
||||
return "PTX FP Rounding Mode Pass";
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
void initializeMap();
|
||||
void processInstruction(MachineInstr &MI);
|
||||
}; // class PTXFPRoundingModePass
|
||||
} // end anonymous namespace
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
char PTXFPRoundingModePass::ID = 0;
|
||||
|
||||
bool PTXFPRoundingModePass::runOnMachineFunction(MachineFunction &MF) {
|
||||
// Look at each basic block
|
||||
for (MachineFunction::iterator bbi = MF.begin(), bbe = MF.end(); bbi != bbe;
|
||||
++bbi) {
|
||||
MachineBasicBlock &MBB = *bbi;
|
||||
// Look at each instruction
|
||||
for (MachineBasicBlock::iterator ii = MBB.begin(), ie = MBB.end();
|
||||
ii != ie; ++ii) {
|
||||
MachineInstr &MI = *ii;
|
||||
processInstruction(MI);
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void PTXFPRoundingModePass::initializeMap() {
|
||||
using namespace PTXRoundingMode;
|
||||
const PTXSubtarget& ST = TargetMachine.getSubtarget<PTXSubtarget>();
|
||||
|
||||
// Build a map of default rounding mode for all instructions that need a
|
||||
// rounding mode.
|
||||
Instrs[PTX::FADDrr32] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
Instrs[PTX::FADDri32] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
Instrs[PTX::FADDrr64] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
Instrs[PTX::FADDri64] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
Instrs[PTX::FSUBrr32] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
Instrs[PTX::FSUBri32] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
Instrs[PTX::FSUBrr64] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
Instrs[PTX::FSUBri64] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
Instrs[PTX::FMULrr32] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
Instrs[PTX::FMULri32] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
Instrs[PTX::FMULrr64] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
Instrs[PTX::FMULri64] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
|
||||
Instrs[PTX::FNEGrr32] = std::make_pair(1U, (unsigned)RndNone);
|
||||
Instrs[PTX::FNEGri32] = std::make_pair(1U, (unsigned)RndNone);
|
||||
Instrs[PTX::FNEGrr64] = std::make_pair(1U, (unsigned)RndNone);
|
||||
Instrs[PTX::FNEGri64] = std::make_pair(1U, (unsigned)RndNone);
|
||||
|
||||
unsigned FDivRndMode = ST.fdivNeedsRoundingMode() ? RndNearestEven : RndNone;
|
||||
Instrs[PTX::FDIVrr32] = std::make_pair(1U, FDivRndMode);
|
||||
Instrs[PTX::FDIVri32] = std::make_pair(1U, FDivRndMode);
|
||||
Instrs[PTX::FDIVrr64] = std::make_pair(1U, FDivRndMode);
|
||||
Instrs[PTX::FDIVri64] = std::make_pair(1U, FDivRndMode);
|
||||
|
||||
unsigned FMADRndMode = ST.fmadNeedsRoundingMode() ? RndNearestEven : RndNone;
|
||||
Instrs[PTX::FMADrrr32] = std::make_pair(1U, FMADRndMode);
|
||||
Instrs[PTX::FMADrri32] = std::make_pair(1U, FMADRndMode);
|
||||
Instrs[PTX::FMADrii32] = std::make_pair(1U, FMADRndMode);
|
||||
Instrs[PTX::FMADrrr64] = std::make_pair(1U, FMADRndMode);
|
||||
Instrs[PTX::FMADrri64] = std::make_pair(1U, FMADRndMode);
|
||||
Instrs[PTX::FMADrii64] = std::make_pair(1U, FMADRndMode);
|
||||
|
||||
Instrs[PTX::FSQRTrr32] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
Instrs[PTX::FSQRTri32] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
Instrs[PTX::FSQRTrr64] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
Instrs[PTX::FSQRTri64] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
|
||||
Instrs[PTX::FSINrr32] = std::make_pair(1U, (unsigned)RndApprox);
|
||||
Instrs[PTX::FSINri32] = std::make_pair(1U, (unsigned)RndApprox);
|
||||
Instrs[PTX::FSINrr64] = std::make_pair(1U, (unsigned)RndApprox);
|
||||
Instrs[PTX::FSINri64] = std::make_pair(1U, (unsigned)RndApprox);
|
||||
Instrs[PTX::FCOSrr32] = std::make_pair(1U, (unsigned)RndApprox);
|
||||
Instrs[PTX::FCOSri32] = std::make_pair(1U, (unsigned)RndApprox);
|
||||
Instrs[PTX::FCOSrr64] = std::make_pair(1U, (unsigned)RndApprox);
|
||||
Instrs[PTX::FCOSri64] = std::make_pair(1U, (unsigned)RndApprox);
|
||||
|
||||
Instrs[PTX::CVTu16f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
|
||||
Instrs[PTX::CVTs16f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
|
||||
Instrs[PTX::CVTu16f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
|
||||
Instrs[PTX::CVTs16f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
|
||||
Instrs[PTX::CVTu32f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
|
||||
Instrs[PTX::CVTs32f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
|
||||
Instrs[PTX::CVTu32f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
|
||||
Instrs[PTX::CVTs32f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
|
||||
Instrs[PTX::CVTu64f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
|
||||
Instrs[PTX::CVTs64f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
|
||||
Instrs[PTX::CVTu64f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
|
||||
Instrs[PTX::CVTs64f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
|
||||
|
||||
Instrs[PTX::CVTf32u16] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
Instrs[PTX::CVTf32s16] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
Instrs[PTX::CVTf32u32] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
Instrs[PTX::CVTf32s32] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
Instrs[PTX::CVTf32u64] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
Instrs[PTX::CVTf32s64] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
Instrs[PTX::CVTf32f64] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
Instrs[PTX::CVTf64u16] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
Instrs[PTX::CVTf64s16] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
Instrs[PTX::CVTf64u32] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
Instrs[PTX::CVTf64s32] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
Instrs[PTX::CVTf64u64] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
Instrs[PTX::CVTf64s64] = std::make_pair(1U, (unsigned)RndNearestEven);
|
||||
}
|
||||
|
||||
void PTXFPRoundingModePass::processInstruction(MachineInstr &MI) {
|
||||
// Is this an instruction that needs a rounding mode?
|
||||
if (Instrs.count(MI.getOpcode())) {
|
||||
const RndModeDesc &Desc = Instrs[MI.getOpcode()];
|
||||
// Get the rounding mode operand
|
||||
MachineOperand &Op = MI.getOperand(Desc.first);
|
||||
// Update the rounding mode if needed
|
||||
if (Op.getImm() == PTXRoundingMode::RndDefault) {
|
||||
Op.setImm(Desc.second);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
FunctionPass *llvm::createPTXFPRoundingModePass(PTXTargetMachine &TM,
|
||||
CodeGenOpt::Level OptLevel) {
|
||||
return new PTXFPRoundingModePass(TM, OptLevel);
|
||||
}
|
||||
|
@ -1,24 +0,0 @@
|
||||
//===-- PTXFrameLowering.cpp - PTX Frame Information ----------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the PTX implementation of TargetFrameLowering class.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "PTXFrameLowering.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
void PTXFrameLowering::emitPrologue(MachineFunction &MF) const {
|
||||
}
|
||||
|
||||
void PTXFrameLowering::emitEpilogue(MachineFunction &MF,
|
||||
MachineBasicBlock &MBB) const {
|
||||
}
|
@ -1,44 +0,0 @@
|
||||
//===-- PTXFrameLowering.h - Define frame lowering for PTX -----*- C++ -*--===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
//
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef PTX_FRAMEINFO_H
|
||||
#define PTX_FRAMEINFO_H
|
||||
|
||||
#include "PTX.h"
|
||||
#include "PTXSubtarget.h"
|
||||
#include "llvm/Target/TargetFrameLowering.h"
|
||||
|
||||
namespace llvm {
|
||||
class PTXSubtarget;
|
||||
|
||||
class PTXFrameLowering : public TargetFrameLowering {
|
||||
protected:
|
||||
const PTXSubtarget &STI;
|
||||
|
||||
public:
|
||||
explicit PTXFrameLowering(const PTXSubtarget &sti)
|
||||
: TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 2, -2),
|
||||
STI(sti) {
|
||||
}
|
||||
|
||||
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
|
||||
/// the function.
|
||||
void emitPrologue(MachineFunction &MF) const;
|
||||
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
|
||||
|
||||
bool hasFP(const MachineFunction &MF) const { return false; }
|
||||
};
|
||||
|
||||
} // End llvm namespace
|
||||
|
||||
#endif
|
@ -1,356 +0,0 @@
|
||||
//===-- PTXISelDAGToDAG.cpp - A dag to dag inst selector for PTX ----------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines an instruction selector for the PTX target.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "PTX.h"
|
||||
#include "PTXMachineFunctionInfo.h"
|
||||
#include "PTXTargetMachine.h"
|
||||
#include "llvm/ADT/StringExtras.h"
|
||||
#include "llvm/CodeGen/SelectionDAGISel.h"
|
||||
#include "llvm/DerivedTypes.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
// PTXDAGToDAGISel - PTX specific code to select PTX machine
|
||||
// instructions for SelectionDAG operations.
|
||||
class PTXDAGToDAGISel : public SelectionDAGISel {
|
||||
public:
|
||||
PTXDAGToDAGISel(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel);
|
||||
|
||||
virtual const char *getPassName() const {
|
||||
return "PTX DAG->DAG Pattern Instruction Selection";
|
||||
}
|
||||
|
||||
SDNode *Select(SDNode *Node);
|
||||
|
||||
// Complex Pattern Selectors.
|
||||
bool SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2);
|
||||
bool SelectADDRri(SDValue &Addr, SDValue &Base, SDValue &Offset);
|
||||
bool SelectADDRii(SDValue &Addr, SDValue &Base, SDValue &Offset);
|
||||
bool SelectADDRlocal(SDValue &Addr, SDValue &Base, SDValue &Offset);
|
||||
|
||||
// Include the pieces auto'gened from the target description
|
||||
#include "PTXGenDAGISel.inc"
|
||||
|
||||
private:
|
||||
// We need this only because we can't match intruction BRAdp
|
||||
// pattern (PTXbrcond bb:$d, ...) in PTXInstrInfo.td
|
||||
SDNode *SelectBRCOND(SDNode *Node);
|
||||
|
||||
SDNode *SelectREADPARAM(SDNode *Node);
|
||||
SDNode *SelectWRITEPARAM(SDNode *Node);
|
||||
SDNode *SelectFrameIndex(SDNode *Node);
|
||||
|
||||
bool isImm(const SDValue &operand);
|
||||
bool SelectImm(const SDValue &operand, SDValue &imm);
|
||||
|
||||
const PTXSubtarget& getSubtarget() const;
|
||||
}; // class PTXDAGToDAGISel
|
||||
} // namespace
|
||||
|
||||
// createPTXISelDag - This pass converts a legalized DAG into a
|
||||
// PTX-specific DAG, ready for instruction scheduling
|
||||
FunctionPass *llvm::createPTXISelDag(PTXTargetMachine &TM,
|
||||
CodeGenOpt::Level OptLevel) {
|
||||
return new PTXDAGToDAGISel(TM, OptLevel);
|
||||
}
|
||||
|
||||
PTXDAGToDAGISel::PTXDAGToDAGISel(PTXTargetMachine &TM,
|
||||
CodeGenOpt::Level OptLevel)
|
||||
: SelectionDAGISel(TM, OptLevel) {}
|
||||
|
||||
SDNode *PTXDAGToDAGISel::Select(SDNode *Node) {
|
||||
switch (Node->getOpcode()) {
|
||||
case ISD::BRCOND:
|
||||
return SelectBRCOND(Node);
|
||||
case PTXISD::READ_PARAM:
|
||||
return SelectREADPARAM(Node);
|
||||
case PTXISD::WRITE_PARAM:
|
||||
return SelectWRITEPARAM(Node);
|
||||
case ISD::FrameIndex:
|
||||
return SelectFrameIndex(Node);
|
||||
default:
|
||||
return SelectCode(Node);
|
||||
}
|
||||
}
|
||||
|
||||
SDNode *PTXDAGToDAGISel::SelectBRCOND(SDNode *Node) {
|
||||
assert(Node->getNumOperands() >= 3);
|
||||
|
||||
SDValue Chain = Node->getOperand(0);
|
||||
SDValue Pred = Node->getOperand(1);
|
||||
SDValue Target = Node->getOperand(2); // branch target
|
||||
SDValue PredOp = CurDAG->getTargetConstant(PTXPredicate::Normal, MVT::i32);
|
||||
DebugLoc dl = Node->getDebugLoc();
|
||||
|
||||
assert(Target.getOpcode() == ISD::BasicBlock);
|
||||
assert(Pred.getValueType() == MVT::i1);
|
||||
|
||||
// Emit BRAdp
|
||||
SDValue Ops[] = { Target, Pred, PredOp, Chain };
|
||||
return CurDAG->getMachineNode(PTX::BRAdp, dl, MVT::Other, Ops, 4);
|
||||
}
|
||||
|
||||
SDNode *PTXDAGToDAGISel::SelectREADPARAM(SDNode *Node) {
|
||||
SDValue Chain = Node->getOperand(0);
|
||||
SDValue Index = Node->getOperand(1);
|
||||
|
||||
int OpCode;
|
||||
|
||||
// Get the type of parameter we are reading
|
||||
EVT VT = Node->getValueType(0);
|
||||
assert(VT.isSimple() && "READ_PARAM only implemented for MVT types");
|
||||
|
||||
MVT Type = VT.getSimpleVT();
|
||||
|
||||
if (Type == MVT::i1)
|
||||
OpCode = PTX::READPARAMPRED;
|
||||
else if (Type == MVT::i16)
|
||||
OpCode = PTX::READPARAMI16;
|
||||
else if (Type == MVT::i32)
|
||||
OpCode = PTX::READPARAMI32;
|
||||
else if (Type == MVT::i64)
|
||||
OpCode = PTX::READPARAMI64;
|
||||
else if (Type == MVT::f32)
|
||||
OpCode = PTX::READPARAMF32;
|
||||
else {
|
||||
assert(Type == MVT::f64 && "Unexpected type!");
|
||||
OpCode = PTX::READPARAMF64;
|
||||
}
|
||||
|
||||
SDValue Pred = CurDAG->getRegister(PTX::NoRegister, MVT::i1);
|
||||
SDValue PredOp = CurDAG->getTargetConstant(PTXPredicate::None, MVT::i32);
|
||||
DebugLoc dl = Node->getDebugLoc();
|
||||
|
||||
SDValue Ops[] = { Index, Pred, PredOp, Chain };
|
||||
return CurDAG->getMachineNode(OpCode, dl, VT, Ops, 4);
|
||||
}
|
||||
|
||||
SDNode *PTXDAGToDAGISel::SelectWRITEPARAM(SDNode *Node) {
|
||||
|
||||
SDValue Chain = Node->getOperand(0);
|
||||
SDValue Value = Node->getOperand(1);
|
||||
|
||||
int OpCode;
|
||||
|
||||
//Node->dumpr(CurDAG);
|
||||
|
||||
// Get the type of parameter we are writing
|
||||
EVT VT = Value->getValueType(0);
|
||||
assert(VT.isSimple() && "WRITE_PARAM only implemented for MVT types");
|
||||
|
||||
MVT Type = VT.getSimpleVT();
|
||||
|
||||
if (Type == MVT::i1)
|
||||
OpCode = PTX::WRITEPARAMPRED;
|
||||
else if (Type == MVT::i16)
|
||||
OpCode = PTX::WRITEPARAMI16;
|
||||
else if (Type == MVT::i32)
|
||||
OpCode = PTX::WRITEPARAMI32;
|
||||
else if (Type == MVT::i64)
|
||||
OpCode = PTX::WRITEPARAMI64;
|
||||
else if (Type == MVT::f32)
|
||||
OpCode = PTX::WRITEPARAMF32;
|
||||
else if (Type == MVT::f64)
|
||||
OpCode = PTX::WRITEPARAMF64;
|
||||
else
|
||||
llvm_unreachable("Invalid type in SelectWRITEPARAM");
|
||||
|
||||
SDValue Pred = CurDAG->getRegister(PTX::NoRegister, MVT::i1);
|
||||
SDValue PredOp = CurDAG->getTargetConstant(PTXPredicate::None, MVT::i32);
|
||||
DebugLoc dl = Node->getDebugLoc();
|
||||
|
||||
SDValue Ops[] = { Value, Pred, PredOp, Chain };
|
||||
SDNode* Ret = CurDAG->getMachineNode(OpCode, dl, MVT::Other, Ops, 4);
|
||||
|
||||
//dbgs() << "SelectWRITEPARAM produced:\n\t";
|
||||
//Ret->dumpr(CurDAG);
|
||||
|
||||
return Ret;
|
||||
}
|
||||
|
||||
SDNode *PTXDAGToDAGISel::SelectFrameIndex(SDNode *Node) {
|
||||
int FI = cast<FrameIndexSDNode>(Node)->getIndex();
|
||||
//dbgs() << "Selecting FrameIndex at index " << FI << "\n";
|
||||
//SDValue TFI = CurDAG->getTargetFrameIndex(FI, Node->getValueType(0));
|
||||
|
||||
PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>();
|
||||
|
||||
SDValue FrameSymbol = CurDAG->getTargetExternalSymbol(MFI->getFrameSymbol(FI),
|
||||
Node->getValueType(0));
|
||||
|
||||
return FrameSymbol.getNode();
|
||||
}
|
||||
|
||||
// Match memory operand of the form [reg+reg]
|
||||
bool PTXDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2) {
|
||||
if (Addr.getOpcode() != ISD::ADD || Addr.getNumOperands() < 2 ||
|
||||
isImm(Addr.getOperand(0)) || isImm(Addr.getOperand(1)))
|
||||
return false;
|
||||
|
||||
assert(Addr.getValueType().isSimple() && "Type must be simple");
|
||||
|
||||
R1 = Addr;
|
||||
R2 = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Match memory operand of the form [reg], [imm+reg], and [reg+imm]
|
||||
bool PTXDAGToDAGISel::SelectADDRri(SDValue &Addr, SDValue &Base,
|
||||
SDValue &Offset) {
|
||||
// FrameIndex addresses are handled separately
|
||||
//errs() << "SelectADDRri: ";
|
||||
//Addr.getNode()->dumpr();
|
||||
if (isa<FrameIndexSDNode>(Addr)) {
|
||||
//errs() << "Failure\n";
|
||||
return false;
|
||||
}
|
||||
|
||||
if (CurDAG->isBaseWithConstantOffset(Addr)) {
|
||||
Base = Addr.getOperand(0);
|
||||
if (isa<FrameIndexSDNode>(Base)) {
|
||||
//errs() << "Failure\n";
|
||||
return false;
|
||||
}
|
||||
ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
|
||||
Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
|
||||
//errs() << "Success\n";
|
||||
return true;
|
||||
}
|
||||
|
||||
/*if (Addr.getNumOperands() == 1) {
|
||||
Base = Addr;
|
||||
Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
|
||||
errs() << "Success\n";
|
||||
return true;
|
||||
}*/
|
||||
|
||||
//errs() << "SelectADDRri fails on: ";
|
||||
//Addr.getNode()->dumpr();
|
||||
|
||||
if (isImm(Addr)) {
|
||||
//errs() << "Failure\n";
|
||||
return false;
|
||||
}
|
||||
|
||||
Base = Addr;
|
||||
Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
|
||||
|
||||
//errs() << "Success\n";
|
||||
return true;
|
||||
|
||||
/*if (Addr.getOpcode() != ISD::ADD) {
|
||||
// let SelectADDRii handle the [imm] case
|
||||
if (isImm(Addr))
|
||||
return false;
|
||||
// it is [reg]
|
||||
|
||||
assert(Addr.getValueType().isSimple() && "Type must be simple");
|
||||
Base = Addr;
|
||||
Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
if (Addr.getNumOperands() < 2)
|
||||
return false;
|
||||
|
||||
// let SelectADDRii handle the [imm+imm] case
|
||||
if (isImm(Addr.getOperand(0)) && isImm(Addr.getOperand(1)))
|
||||
return false;
|
||||
|
||||
// try [reg+imm] and [imm+reg]
|
||||
for (int i = 0; i < 2; i ++)
|
||||
if (SelectImm(Addr.getOperand(1-i), Offset)) {
|
||||
Base = Addr.getOperand(i);
|
||||
return true;
|
||||
}
|
||||
|
||||
// neither [reg+imm] nor [imm+reg]
|
||||
return false;*/
|
||||
}
|
||||
|
||||
// Match memory operand of the form [imm+imm] and [imm]
|
||||
bool PTXDAGToDAGISel::SelectADDRii(SDValue &Addr, SDValue &Base,
|
||||
SDValue &Offset) {
|
||||
// is [imm+imm]?
|
||||
if (Addr.getOpcode() == ISD::ADD) {
|
||||
return SelectImm(Addr.getOperand(0), Base) &&
|
||||
SelectImm(Addr.getOperand(1), Offset);
|
||||
}
|
||||
|
||||
// is [imm]?
|
||||
if (SelectImm(Addr, Base)) {
|
||||
assert(Addr.getValueType().isSimple() && "Type must be simple");
|
||||
|
||||
Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Match memory operand of the form [reg], [imm+reg], and [reg+imm]
|
||||
bool PTXDAGToDAGISel::SelectADDRlocal(SDValue &Addr, SDValue &Base,
|
||||
SDValue &Offset) {
|
||||
//errs() << "SelectADDRlocal: ";
|
||||
//Addr.getNode()->dumpr();
|
||||
if (isa<FrameIndexSDNode>(Addr)) {
|
||||
Base = Addr;
|
||||
Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
|
||||
//errs() << "Success\n";
|
||||
return true;
|
||||
}
|
||||
|
||||
if (CurDAG->isBaseWithConstantOffset(Addr)) {
|
||||
Base = Addr.getOperand(0);
|
||||
if (!isa<FrameIndexSDNode>(Base)) {
|
||||
//errs() << "Failure\n";
|
||||
return false;
|
||||
}
|
||||
ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
|
||||
Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
|
||||
//errs() << "Offset: ";
|
||||
//Offset.getNode()->dumpr();
|
||||
//errs() << "Success\n";
|
||||
return true;
|
||||
}
|
||||
|
||||
//errs() << "Failure\n";
|
||||
return false;
|
||||
}
|
||||
|
||||
bool PTXDAGToDAGISel::isImm(const SDValue &operand) {
|
||||
return ConstantSDNode::classof(operand.getNode());
|
||||
}
|
||||
|
||||
bool PTXDAGToDAGISel::SelectImm(const SDValue &operand, SDValue &imm) {
|
||||
SDNode *node = operand.getNode();
|
||||
if (!ConstantSDNode::classof(node))
|
||||
return false;
|
||||
|
||||
ConstantSDNode *CN = cast<ConstantSDNode>(node);
|
||||
imm = CurDAG->getTargetConstant(*CN->getConstantIntValue(),
|
||||
operand.getValueType());
|
||||
return true;
|
||||
}
|
||||
|
||||
const PTXSubtarget& PTXDAGToDAGISel::getSubtarget() const
|
||||
{
|
||||
return TM.getSubtarget<PTXSubtarget>();
|
||||
}
|
||||
|
@ -1,516 +0,0 @@
|
||||
//===-- PTXISelLowering.cpp - PTX DAG Lowering Implementation -------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file implements the PTXTargetLowering class.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "PTXISelLowering.h"
|
||||
#include "PTX.h"
|
||||
#include "PTXMachineFunctionInfo.h"
|
||||
#include "PTXRegisterInfo.h"
|
||||
#include "PTXSubtarget.h"
|
||||
#include "llvm/Function.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/CodeGen/CallingConvLower.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/SelectionDAG.h"
|
||||
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// TargetLowering Implementation
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
PTXTargetLowering::PTXTargetLowering(TargetMachine &TM)
|
||||
: TargetLowering(TM, new TargetLoweringObjectFileELF()) {
|
||||
// Set up the register classes.
|
||||
addRegisterClass(MVT::i1, &PTX::RegPredRegClass);
|
||||
addRegisterClass(MVT::i16, &PTX::RegI16RegClass);
|
||||
addRegisterClass(MVT::i32, &PTX::RegI32RegClass);
|
||||
addRegisterClass(MVT::i64, &PTX::RegI64RegClass);
|
||||
addRegisterClass(MVT::f32, &PTX::RegF32RegClass);
|
||||
addRegisterClass(MVT::f64, &PTX::RegF64RegClass);
|
||||
|
||||
setBooleanContents(ZeroOrOneBooleanContent);
|
||||
setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
|
||||
setMinFunctionAlignment(2);
|
||||
|
||||
// Let LLVM use loads/stores for all mem* operations
|
||||
maxStoresPerMemcpy = 4096;
|
||||
maxStoresPerMemmove = 4096;
|
||||
maxStoresPerMemset = 4096;
|
||||
|
||||
////////////////////////////////////
|
||||
/////////// Expansion //////////////
|
||||
////////////////////////////////////
|
||||
|
||||
// (any/zero/sign) extload => load + (any/zero/sign) extend
|
||||
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::i16, Expand);
|
||||
setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Expand);
|
||||
setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand);
|
||||
|
||||
// f32 extload => load + fextend
|
||||
|
||||
setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
|
||||
|
||||
// f64 truncstore => trunc + store
|
||||
|
||||
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
|
||||
|
||||
// sign_extend_inreg => sign_extend
|
||||
|
||||
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
|
||||
|
||||
// br_cc => brcond
|
||||
|
||||
setOperationAction(ISD::BR_CC, MVT::Other, Expand);
|
||||
|
||||
// select_cc => setcc
|
||||
|
||||
setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
|
||||
setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
|
||||
setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
|
||||
|
||||
////////////////////////////////////
|
||||
//////////// Legal /////////////////
|
||||
////////////////////////////////////
|
||||
|
||||
setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
|
||||
setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
|
||||
|
||||
////////////////////////////////////
|
||||
//////////// Custom ////////////////
|
||||
////////////////////////////////////
|
||||
|
||||
// customise setcc to use bitwise logic if possible
|
||||
|
||||
//setOperationAction(ISD::SETCC, MVT::i1, Custom);
|
||||
setOperationAction(ISD::SETCC, MVT::i1, Legal);
|
||||
|
||||
// customize translation of memory addresses
|
||||
|
||||
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
|
||||
setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
|
||||
|
||||
// Compute derived properties from the register classes
|
||||
computeRegisterProperties();
|
||||
}
|
||||
|
||||
EVT PTXTargetLowering::getSetCCResultType(EVT VT) const {
|
||||
return MVT::i1;
|
||||
}
|
||||
|
||||
SDValue PTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
||||
switch (Op.getOpcode()) {
|
||||
default:
|
||||
llvm_unreachable("Unimplemented operand");
|
||||
case ISD::SETCC:
|
||||
return LowerSETCC(Op, DAG);
|
||||
case ISD::GlobalAddress:
|
||||
return LowerGlobalAddress(Op, DAG);
|
||||
}
|
||||
}
|
||||
|
||||
const char *PTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
switch (Opcode) {
|
||||
default:
|
||||
llvm_unreachable("Unknown opcode");
|
||||
case PTXISD::COPY_ADDRESS:
|
||||
return "PTXISD::COPY_ADDRESS";
|
||||
case PTXISD::LOAD_PARAM:
|
||||
return "PTXISD::LOAD_PARAM";
|
||||
case PTXISD::STORE_PARAM:
|
||||
return "PTXISD::STORE_PARAM";
|
||||
case PTXISD::READ_PARAM:
|
||||
return "PTXISD::READ_PARAM";
|
||||
case PTXISD::WRITE_PARAM:
|
||||
return "PTXISD::WRITE_PARAM";
|
||||
case PTXISD::EXIT:
|
||||
return "PTXISD::EXIT";
|
||||
case PTXISD::RET:
|
||||
return "PTXISD::RET";
|
||||
case PTXISD::CALL:
|
||||
return "PTXISD::CALL";
|
||||
}
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Custom Lower Operation
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
SDValue PTXTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
|
||||
assert(Op.getValueType() == MVT::i1 && "SetCC type must be 1-bit integer");
|
||||
SDValue Op0 = Op.getOperand(0);
|
||||
SDValue Op1 = Op.getOperand(1);
|
||||
SDValue Op2 = Op.getOperand(2);
|
||||
DebugLoc dl = Op.getDebugLoc();
|
||||
//ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
|
||||
|
||||
// Look for X == 0, X == 1, X != 0, or X != 1
|
||||
// We can simplify these to bitwise logic
|
||||
|
||||
//if (Op1.getOpcode() == ISD::Constant &&
|
||||
// (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 ||
|
||||
// cast<ConstantSDNode>(Op1)->isNullValue()) &&
|
||||
// (CC == ISD::SETEQ || CC == ISD::SETNE)) {
|
||||
//
|
||||
// return DAG.getNode(ISD::AND, dl, MVT::i1, Op0, Op1);
|
||||
//}
|
||||
|
||||
//ConstantSDNode* COp1 = cast<ConstantSDNode>(Op1);
|
||||
//if(COp1 && COp1->getZExtValue() == 1) {
|
||||
// if(CC == ISD::SETNE) {
|
||||
// return DAG.getNode(PTX::XORripreds, dl, MVT::i1, Op0);
|
||||
// }
|
||||
//}
|
||||
|
||||
llvm_unreachable("setcc was not matched by a pattern!");
|
||||
|
||||
return DAG.getNode(ISD::SETCC, dl, MVT::i1, Op0, Op1, Op2);
|
||||
}
|
||||
|
||||
SDValue PTXTargetLowering::
|
||||
LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
|
||||
EVT PtrVT = getPointerTy();
|
||||
DebugLoc dl = Op.getDebugLoc();
|
||||
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
|
||||
|
||||
assert(PtrVT.isSimple() && "Pointer must be to primitive type.");
|
||||
|
||||
SDValue targetGlobal = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
|
||||
SDValue movInstr = DAG.getNode(PTXISD::COPY_ADDRESS,
|
||||
dl,
|
||||
PtrVT.getSimpleVT(),
|
||||
targetGlobal);
|
||||
|
||||
return movInstr;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Calling Convention Implementation
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
SDValue PTXTargetLowering::
|
||||
LowerFormalArguments(SDValue Chain,
|
||||
CallingConv::ID CallConv,
|
||||
bool isVarArg,
|
||||
const SmallVectorImpl<ISD::InputArg> &Ins,
|
||||
DebugLoc dl,
|
||||
SelectionDAG &DAG,
|
||||
SmallVectorImpl<SDValue> &InVals) const {
|
||||
if (isVarArg) llvm_unreachable("PTX does not support varargs");
|
||||
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
const PTXSubtarget& ST = getTargetMachine().getSubtarget<PTXSubtarget>();
|
||||
PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
|
||||
PTXParamManager &PM = MFI->getParamManager();
|
||||
|
||||
switch (CallConv) {
|
||||
default:
|
||||
llvm_unreachable("Unsupported calling convention");
|
||||
case CallingConv::PTX_Kernel:
|
||||
MFI->setKernel(true);
|
||||
break;
|
||||
case CallingConv::PTX_Device:
|
||||
MFI->setKernel(false);
|
||||
break;
|
||||
}
|
||||
|
||||
// We do one of two things here:
|
||||
// IsKernel || SM >= 2.0 -> Use param space for arguments
|
||||
// SM < 2.0 -> Use registers for arguments
|
||||
if (MFI->isKernel() || ST.useParamSpaceForDeviceArgs()) {
|
||||
// We just need to emit the proper LOAD_PARAM ISDs
|
||||
for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
|
||||
assert((!MFI->isKernel() || Ins[i].VT != MVT::i1) &&
|
||||
"Kernels cannot take pred operands");
|
||||
|
||||
unsigned ParamSize = Ins[i].VT.getStoreSizeInBits();
|
||||
unsigned Param = PM.addArgumentParam(ParamSize);
|
||||
const std::string &ParamName = PM.getParamName(Param);
|
||||
SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(),
|
||||
MVT::Other);
|
||||
SDValue ArgValue = DAG.getNode(PTXISD::LOAD_PARAM, dl, Ins[i].VT, Chain,
|
||||
ParamValue);
|
||||
InVals.push_back(ArgValue);
|
||||
}
|
||||
}
|
||||
else {
|
||||
for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
|
||||
EVT RegVT = Ins[i].VT;
|
||||
const TargetRegisterClass* TRC = getRegClassFor(RegVT);
|
||||
unsigned RegType;
|
||||
|
||||
// Determine which register class we need
|
||||
if (RegVT == MVT::i1)
|
||||
RegType = PTXRegisterType::Pred;
|
||||
else if (RegVT == MVT::i16)
|
||||
RegType = PTXRegisterType::B16;
|
||||
else if (RegVT == MVT::i32)
|
||||
RegType = PTXRegisterType::B32;
|
||||
else if (RegVT == MVT::i64)
|
||||
RegType = PTXRegisterType::B64;
|
||||
else if (RegVT == MVT::f32)
|
||||
RegType = PTXRegisterType::F32;
|
||||
else if (RegVT == MVT::f64)
|
||||
RegType = PTXRegisterType::F64;
|
||||
else
|
||||
llvm_unreachable("Unknown parameter type");
|
||||
|
||||
// Use a unique index in the instruction to prevent instruction folding.
|
||||
// Yes, this is a hack.
|
||||
SDValue Index = DAG.getTargetConstant(i, MVT::i32);
|
||||
unsigned Reg = MF.getRegInfo().createVirtualRegister(TRC);
|
||||
SDValue ArgValue = DAG.getNode(PTXISD::READ_PARAM, dl, RegVT, Chain,
|
||||
Index);
|
||||
|
||||
InVals.push_back(ArgValue);
|
||||
|
||||
MFI->addRegister(Reg, RegType, PTXRegisterSpace::Argument);
|
||||
}
|
||||
}
|
||||
|
||||
return Chain;
|
||||
}
|
||||
|
||||
SDValue PTXTargetLowering::
|
||||
LowerReturn(SDValue Chain,
|
||||
CallingConv::ID CallConv,
|
||||
bool isVarArg,
|
||||
const SmallVectorImpl<ISD::OutputArg> &Outs,
|
||||
const SmallVectorImpl<SDValue> &OutVals,
|
||||
DebugLoc dl,
|
||||
SelectionDAG &DAG) const {
|
||||
if (isVarArg) llvm_unreachable("PTX does not support varargs");
|
||||
|
||||
switch (CallConv) {
|
||||
default:
|
||||
llvm_unreachable("Unsupported calling convention.");
|
||||
case CallingConv::PTX_Kernel:
|
||||
assert(Outs.size() == 0 && "Kernel must return void.");
|
||||
return DAG.getNode(PTXISD::EXIT, dl, MVT::Other, Chain);
|
||||
case CallingConv::PTX_Device:
|
||||
assert(Outs.size() <= 1 && "Can at most return one value.");
|
||||
break;
|
||||
}
|
||||
|
||||
MachineFunction& MF = DAG.getMachineFunction();
|
||||
PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
|
||||
PTXParamManager &PM = MFI->getParamManager();
|
||||
|
||||
SDValue Flag;
|
||||
const PTXSubtarget& ST = getTargetMachine().getSubtarget<PTXSubtarget>();
|
||||
|
||||
if (ST.useParamSpaceForDeviceArgs()) {
|
||||
assert(Outs.size() < 2 && "Device functions can return at most one value");
|
||||
|
||||
if (Outs.size() == 1) {
|
||||
unsigned ParamSize = OutVals[0].getValueType().getSizeInBits();
|
||||
unsigned Param = PM.addReturnParam(ParamSize);
|
||||
const std::string &ParamName = PM.getParamName(Param);
|
||||
SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(),
|
||||
MVT::Other);
|
||||
Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain,
|
||||
ParamValue, OutVals[0]);
|
||||
}
|
||||
} else {
|
||||
for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
|
||||
EVT RegVT = Outs[i].VT;
|
||||
const TargetRegisterClass* TRC;
|
||||
unsigned RegType;
|
||||
|
||||
// Determine which register class we need
|
||||
if (RegVT == MVT::i1) {
|
||||
TRC = &PTX::RegPredRegClass;
|
||||
RegType = PTXRegisterType::Pred;
|
||||
} else if (RegVT == MVT::i16) {
|
||||
TRC = &PTX::RegI16RegClass;
|
||||
RegType = PTXRegisterType::B16;
|
||||
} else if (RegVT == MVT::i32) {
|
||||
TRC = &PTX::RegI32RegClass;
|
||||
RegType = PTXRegisterType::B32;
|
||||
} else if (RegVT == MVT::i64) {
|
||||
TRC = &PTX::RegI64RegClass;
|
||||
RegType = PTXRegisterType::B64;
|
||||
} else if (RegVT == MVT::f32) {
|
||||
TRC = &PTX::RegF32RegClass;
|
||||
RegType = PTXRegisterType::F32;
|
||||
} else if (RegVT == MVT::f64) {
|
||||
TRC = &PTX::RegF64RegClass;
|
||||
RegType = PTXRegisterType::F64;
|
||||
} else {
|
||||
llvm_unreachable("Unknown parameter type");
|
||||
}
|
||||
|
||||
unsigned Reg = MF.getRegInfo().createVirtualRegister(TRC);
|
||||
|
||||
SDValue Copy = DAG.getCopyToReg(Chain, dl, Reg, OutVals[i]/*, Flag*/);
|
||||
SDValue OutReg = DAG.getRegister(Reg, RegVT);
|
||||
|
||||
Chain = DAG.getNode(PTXISD::WRITE_PARAM, dl, MVT::Other, Copy, OutReg);
|
||||
|
||||
MFI->addRegister(Reg, RegType, PTXRegisterSpace::Return);
|
||||
}
|
||||
}
|
||||
|
||||
if (Flag.getNode() == 0) {
|
||||
return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain);
|
||||
}
|
||||
else {
|
||||
return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain, Flag);
|
||||
}
|
||||
}
|
||||
|
||||
SDValue
|
||||
PTXTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
|
||||
CallingConv::ID CallConv, bool isVarArg,
|
||||
bool doesNotRet, bool &isTailCall,
|
||||
const SmallVectorImpl<ISD::OutputArg> &Outs,
|
||||
const SmallVectorImpl<SDValue> &OutVals,
|
||||
const SmallVectorImpl<ISD::InputArg> &Ins,
|
||||
DebugLoc dl, SelectionDAG &DAG,
|
||||
SmallVectorImpl<SDValue> &InVals) const {
|
||||
|
||||
MachineFunction& MF = DAG.getMachineFunction();
|
||||
PTXMachineFunctionInfo *PTXMFI = MF.getInfo<PTXMachineFunctionInfo>();
|
||||
PTXParamManager &PM = PTXMFI->getParamManager();
|
||||
MachineFrameInfo *MFI = MF.getFrameInfo();
|
||||
|
||||
assert(getTargetMachine().getSubtarget<PTXSubtarget>().callsAreHandled() &&
|
||||
"Calls are not handled for the target device");
|
||||
|
||||
// Identify the callee function
|
||||
const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
|
||||
const Function *function = cast<Function>(GV);
|
||||
|
||||
// allow non-device calls only for printf
|
||||
bool isPrintf = function->getName() == "printf" || function->getName() == "puts";
|
||||
|
||||
assert((isPrintf || function->getCallingConv() == CallingConv::PTX_Device) &&
|
||||
"PTX function calls must be to PTX device functions");
|
||||
|
||||
unsigned outSize = isPrintf ? 2 : Outs.size();
|
||||
|
||||
std::vector<SDValue> Ops;
|
||||
// The layout of the ops will be [Chain, #Ins, Ins, Callee, #Outs, Outs]
|
||||
Ops.resize(outSize + Ins.size() + 4);
|
||||
|
||||
Ops[0] = Chain;
|
||||
|
||||
// Identify the callee function
|
||||
Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy());
|
||||
Ops[Ins.size()+2] = Callee;
|
||||
|
||||
// #Outs
|
||||
Ops[Ins.size()+3] = DAG.getTargetConstant(outSize, MVT::i32);
|
||||
|
||||
if (isPrintf) {
|
||||
// first argument is the address of the global string variable in memory
|
||||
unsigned Param0 = PM.addLocalParam(getPointerTy().getSizeInBits());
|
||||
SDValue ParamValue0 = DAG.getTargetExternalSymbol(PM.getParamName(Param0).c_str(),
|
||||
MVT::Other);
|
||||
Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain,
|
||||
ParamValue0, OutVals[0]);
|
||||
Ops[Ins.size()+4] = ParamValue0;
|
||||
|
||||
// alignment is the maximum size of all the arguments
|
||||
unsigned alignment = 0;
|
||||
for (unsigned i = 1; i < OutVals.size(); ++i) {
|
||||
alignment = std::max(alignment,
|
||||
OutVals[i].getValueType().getSizeInBits());
|
||||
}
|
||||
|
||||
// size is the alignment multiplied by the number of arguments
|
||||
unsigned size = alignment * (OutVals.size() - 1);
|
||||
|
||||
// second argument is the address of the stack object (unless no arguments)
|
||||
unsigned Param1 = PM.addLocalParam(getPointerTy().getSizeInBits());
|
||||
SDValue ParamValue1 = DAG.getTargetExternalSymbol(PM.getParamName(Param1).c_str(),
|
||||
MVT::Other);
|
||||
Ops[Ins.size()+5] = ParamValue1;
|
||||
|
||||
if (size > 0)
|
||||
{
|
||||
// create a local stack object to store the arguments
|
||||
unsigned StackObject = MFI->CreateStackObject(size / 8, alignment / 8, false);
|
||||
SDValue FrameIndex = DAG.getFrameIndex(StackObject, getPointerTy());
|
||||
|
||||
// store each of the arguments to the stack in turn
|
||||
for (unsigned int i = 1; i != OutVals.size(); i++) {
|
||||
SDValue FrameAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), FrameIndex, DAG.getTargetConstant((i - 1) * 8, getPointerTy()));
|
||||
Chain = DAG.getStore(Chain, dl, OutVals[i], FrameAddr,
|
||||
MachinePointerInfo(),
|
||||
false, false, 0);
|
||||
}
|
||||
|
||||
// copy the address of the local frame index to get the address in non-local space
|
||||
SDValue genericAddr = DAG.getNode(PTXISD::COPY_ADDRESS, dl, getPointerTy(), FrameIndex);
|
||||
|
||||
// store this address in the second argument
|
||||
Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain, ParamValue1, genericAddr);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Generate STORE_PARAM nodes for each function argument. In PTX, function
|
||||
// arguments are explicitly stored into .param variables and passed as
|
||||
// arguments. There is no register/stack-based calling convention in PTX.
|
||||
for (unsigned i = 0; i != OutVals.size(); ++i) {
|
||||
unsigned Size = OutVals[i].getValueType().getSizeInBits();
|
||||
unsigned Param = PM.addLocalParam(Size);
|
||||
const std::string &ParamName = PM.getParamName(Param);
|
||||
SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(),
|
||||
MVT::Other);
|
||||
Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain,
|
||||
ParamValue, OutVals[i]);
|
||||
Ops[i+Ins.size()+4] = ParamValue;
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<SDValue> InParams;
|
||||
|
||||
// Generate list of .param variables to hold the return value(s).
|
||||
Ops[1] = DAG.getTargetConstant(Ins.size(), MVT::i32);
|
||||
for (unsigned i = 0; i < Ins.size(); ++i) {
|
||||
unsigned Size = Ins[i].VT.getStoreSizeInBits();
|
||||
unsigned Param = PM.addLocalParam(Size);
|
||||
const std::string &ParamName = PM.getParamName(Param);
|
||||
SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(),
|
||||
MVT::Other);
|
||||
Ops[i+2] = ParamValue;
|
||||
InParams.push_back(ParamValue);
|
||||
}
|
||||
|
||||
Ops[0] = Chain;
|
||||
|
||||
// Create the CALL node.
|
||||
Chain = DAG.getNode(PTXISD::CALL, dl, MVT::Other, &Ops[0], Ops.size());
|
||||
|
||||
// Create the LOAD_PARAM nodes that retrieve the function return value(s).
|
||||
for (unsigned i = 0; i < Ins.size(); ++i) {
|
||||
SDValue Load = DAG.getNode(PTXISD::LOAD_PARAM, dl, Ins[i].VT, Chain,
|
||||
InParams[i]);
|
||||
InVals.push_back(Load);
|
||||
}
|
||||
|
||||
return Chain;
|
||||
}
|
||||
|
||||
unsigned PTXTargetLowering::getNumRegisters(LLVMContext &Context, EVT VT) {
|
||||
// All arguments consist of one "register," regardless of the type.
|
||||
return 1;
|
||||
}
|
||||
|
@ -1,82 +0,0 @@
|
||||
//===-- PTXISelLowering.h - PTX DAG Lowering Interface ----------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines the interfaces that PTX uses to lower LLVM code into a
|
||||
// selection DAG.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef PTX_ISEL_LOWERING_H
|
||||
#define PTX_ISEL_LOWERING_H
|
||||
|
||||
#include "llvm/Target/TargetLowering.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
namespace PTXISD {
|
||||
enum NodeType {
|
||||
FIRST_NUMBER = ISD::BUILTIN_OP_END,
|
||||
LOAD_PARAM,
|
||||
STORE_PARAM,
|
||||
READ_PARAM,
|
||||
WRITE_PARAM,
|
||||
EXIT,
|
||||
RET,
|
||||
COPY_ADDRESS,
|
||||
CALL
|
||||
};
|
||||
} // namespace PTXISD
|
||||
|
||||
class PTXTargetLowering : public TargetLowering {
|
||||
public:
|
||||
explicit PTXTargetLowering(TargetMachine &TM);
|
||||
|
||||
virtual const char *getTargetNodeName(unsigned Opcode) const;
|
||||
|
||||
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
virtual SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
virtual SDValue
|
||||
LowerFormalArguments(SDValue Chain,
|
||||
CallingConv::ID CallConv,
|
||||
bool isVarArg,
|
||||
const SmallVectorImpl<ISD::InputArg> &Ins,
|
||||
DebugLoc dl,
|
||||
SelectionDAG &DAG,
|
||||
SmallVectorImpl<SDValue> &InVals) const;
|
||||
|
||||
virtual SDValue
|
||||
LowerReturn(SDValue Chain,
|
||||
CallingConv::ID CallConv,
|
||||
bool isVarArg,
|
||||
const SmallVectorImpl<ISD::OutputArg> &Outs,
|
||||
const SmallVectorImpl<SDValue> &OutVals,
|
||||
DebugLoc dl,
|
||||
SelectionDAG &DAG) const;
|
||||
|
||||
virtual SDValue
|
||||
LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
|
||||
bool isVarArg, bool doesNotRet, bool &isTailCall,
|
||||
const SmallVectorImpl<ISD::OutputArg> &Outs,
|
||||
const SmallVectorImpl<SDValue> &OutVals,
|
||||
const SmallVectorImpl<ISD::InputArg> &Ins,
|
||||
DebugLoc dl, SelectionDAG &DAG,
|
||||
SmallVectorImpl<SDValue> &InVals) const;
|
||||
|
||||
virtual EVT getSetCCResultType(EVT VT) const;
|
||||
|
||||
virtual unsigned getNumRegisters(LLVMContext &Context, EVT VT);
|
||||
|
||||
private:
|
||||
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
|
||||
}; // class PTXTargetLowering
|
||||
} // namespace llvm
|
||||
|
||||
#endif // PTX_ISEL_LOWERING_H
|
@ -1,51 +0,0 @@
|
||||
//===-- PTXInstrFormats.td - PTX Instruction Formats -------*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
||||
// Rounding Mode Specifier
|
||||
/*class RoundingMode<bits<3> val> {
|
||||
bits<3> Value = val;
|
||||
}
|
||||
|
||||
def RndDefault : RoundingMode<0>;
|
||||
def RndNearestEven : RoundingMode<1>;
|
||||
def RndNearestZero : RoundingMode<2>;
|
||||
def RndNegInf : RoundingMode<3>;
|
||||
def RndPosInf : RoundingMode<4>;
|
||||
def RndApprox : RoundingMode<5>;*/
|
||||
|
||||
|
||||
// Rounding Mode Operand
|
||||
def RndMode : Operand<i32> {
|
||||
let PrintMethod = "printRoundingMode";
|
||||
}
|
||||
|
||||
def RndDefault : PatLeaf<(i32 0)>;
|
||||
|
||||
// PTX Predicate operand, default to (0, 0) = (zero-reg, none).
|
||||
// Leave PrintMethod empty; predicate printing is defined elsewhere.
|
||||
def pred : PredicateOperand<OtherVT, (ops RegPred, i32imm),
|
||||
(ops (i1 zero_reg), (i32 2))>;
|
||||
|
||||
def RndModeOperand : Operand<OtherVT> {
|
||||
let MIOperandInfo = (ops i32imm);
|
||||
}
|
||||
|
||||
// Instruction Types
|
||||
let Namespace = "PTX" in {
|
||||
|
||||
class InstPTX<dag oops, dag iops, string asmstr, list<dag> pattern>
|
||||
: Instruction {
|
||||
dag OutOperandList = oops;
|
||||
dag InOperandList = !con(iops, (ins pred:$_p));
|
||||
let AsmString = asmstr; // Predicate printing is defined elsewhere.
|
||||
let Pattern = pattern;
|
||||
let isPredicable = 1;
|
||||
}
|
||||
}
|
@ -1,359 +0,0 @@
|
||||
//===-- PTXInstrInfo.cpp - PTX Instruction Information --------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the PTX implementation of the TargetInstrInfo class.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#define DEBUG_TYPE "ptx-instrinfo"
|
||||
|
||||
#include "PTXInstrInfo.h"
|
||||
#include "PTX.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/SelectionDAG.h"
|
||||
#include "llvm/CodeGen/SelectionDAGNodes.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/TargetRegistry.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
#define GET_INSTRINFO_CTOR
|
||||
#include "PTXGenInstrInfo.inc"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
PTXInstrInfo::PTXInstrInfo(PTXTargetMachine &_TM)
|
||||
: PTXGenInstrInfo(),
|
||||
RI(_TM, *this), TM(_TM) {}
|
||||
|
||||
static const struct map_entry {
|
||||
const TargetRegisterClass *cls;
|
||||
const int opcode;
|
||||
} map[] = {
|
||||
{ &PTX::RegI16RegClass, PTX::MOVU16rr },
|
||||
{ &PTX::RegI32RegClass, PTX::MOVU32rr },
|
||||
{ &PTX::RegI64RegClass, PTX::MOVU64rr },
|
||||
{ &PTX::RegF32RegClass, PTX::MOVF32rr },
|
||||
{ &PTX::RegF64RegClass, PTX::MOVF64rr },
|
||||
{ &PTX::RegPredRegClass, PTX::MOVPREDrr }
|
||||
};
|
||||
|
||||
void PTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I, DebugLoc DL,
|
||||
unsigned DstReg, unsigned SrcReg,
|
||||
bool KillSrc) const {
|
||||
|
||||
const MachineRegisterInfo& MRI = MBB.getParent()->getRegInfo();
|
||||
//assert(MRI.getRegClass(SrcReg) == MRI.getRegClass(DstReg) &&
|
||||
// "Invalid register copy between two register classes");
|
||||
|
||||
for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++i) {
|
||||
if (map[i].cls == MRI.getRegClass(DstReg)) {
|
||||
const MCInstrDesc &MCID = get(map[i].opcode);
|
||||
MachineInstr *MI = BuildMI(MBB, I, DL, MCID, DstReg).
|
||||
addReg(SrcReg, getKillRegState(KillSrc));
|
||||
AddDefaultPredicate(MI);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
llvm_unreachable("Impossible reg-to-reg copy");
|
||||
}
|
||||
|
||||
bool PTXInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I,
|
||||
unsigned DstReg, unsigned SrcReg,
|
||||
const TargetRegisterClass *DstRC,
|
||||
const TargetRegisterClass *SrcRC,
|
||||
DebugLoc DL) const {
|
||||
if (DstRC != SrcRC)
|
||||
return false;
|
||||
|
||||
for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i)
|
||||
if (DstRC == map[i].cls) {
|
||||
const MCInstrDesc &MCID = get(map[i].opcode);
|
||||
MachineInstr *MI = BuildMI(MBB, I, DL, MCID, DstReg).addReg(SrcReg);
|
||||
AddDefaultPredicate(MI);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool PTXInstrInfo::isMoveInstr(const MachineInstr& MI,
|
||||
unsigned &SrcReg, unsigned &DstReg,
|
||||
unsigned &SrcSubIdx, unsigned &DstSubIdx) const {
|
||||
switch (MI.getOpcode()) {
|
||||
default:
|
||||
return false;
|
||||
case PTX::MOVU16rr:
|
||||
case PTX::MOVU32rr:
|
||||
case PTX::MOVU64rr:
|
||||
case PTX::MOVF32rr:
|
||||
case PTX::MOVF64rr:
|
||||
case PTX::MOVPREDrr:
|
||||
assert(MI.getNumOperands() >= 2 &&
|
||||
MI.getOperand(0).isReg() && MI.getOperand(1).isReg() &&
|
||||
"Invalid register-register move instruction");
|
||||
SrcSubIdx = DstSubIdx = 0; // No sub-registers
|
||||
DstReg = MI.getOperand(0).getReg();
|
||||
SrcReg = MI.getOperand(1).getReg();
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// predicate support
|
||||
|
||||
bool PTXInstrInfo::isPredicated(const MachineInstr *MI) const {
|
||||
int i = MI->findFirstPredOperandIdx();
|
||||
return i != -1 && MI->getOperand(i).getReg() != PTX::NoRegister;
|
||||
}
|
||||
|
||||
bool PTXInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
|
||||
return !isPredicated(MI) && MI->isTerminator();
|
||||
}
|
||||
|
||||
bool PTXInstrInfo::
|
||||
PredicateInstruction(MachineInstr *MI,
|
||||
const SmallVectorImpl<MachineOperand> &Pred) const {
|
||||
if (Pred.size() < 2)
|
||||
llvm_unreachable("lesser than 2 predicate operands are provided");
|
||||
|
||||
int i = MI->findFirstPredOperandIdx();
|
||||
if (i == -1)
|
||||
llvm_unreachable("missing predicate operand");
|
||||
|
||||
MI->getOperand(i).setReg(Pred[0].getReg());
|
||||
MI->getOperand(i+1).setImm(Pred[1].getImm());
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool PTXInstrInfo::
|
||||
SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
|
||||
const SmallVectorImpl<MachineOperand> &Pred2) const {
|
||||
const MachineOperand &PredReg1 = Pred1[0];
|
||||
const MachineOperand &PredReg2 = Pred2[0];
|
||||
if (PredReg1.getReg() != PredReg2.getReg())
|
||||
return false;
|
||||
|
||||
const MachineOperand &PredOp1 = Pred1[1];
|
||||
const MachineOperand &PredOp2 = Pred2[1];
|
||||
if (PredOp1.getImm() != PredOp2.getImm())
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool PTXInstrInfo::
|
||||
DefinesPredicate(MachineInstr *MI,
|
||||
std::vector<MachineOperand> &Pred) const {
|
||||
// If an instruction sets a predicate register, it defines a predicate.
|
||||
|
||||
// TODO supprot 5-operand format of setp instruction
|
||||
|
||||
if (MI->getNumOperands() < 1)
|
||||
return false;
|
||||
|
||||
const MachineOperand &MO = MI->getOperand(0);
|
||||
|
||||
if (!MO.isReg() || RI.getRegClass(MO.getReg()) != &PTX::RegPredRegClass)
|
||||
return false;
|
||||
|
||||
Pred.push_back(MO);
|
||||
Pred.push_back(MachineOperand::CreateImm(PTXPredicate::None));
|
||||
return true;
|
||||
}
|
||||
|
||||
// branch support
|
||||
|
||||
bool PTXInstrInfo::
|
||||
AnalyzeBranch(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock *&TBB,
|
||||
MachineBasicBlock *&FBB,
|
||||
SmallVectorImpl<MachineOperand> &Cond,
|
||||
bool AllowModify) const {
|
||||
// TODO implement cases when AllowModify is true
|
||||
|
||||
if (MBB.empty())
|
||||
return true;
|
||||
|
||||
MachineBasicBlock::iterator iter = MBB.end();
|
||||
const MachineInstr& instLast1 = *--iter;
|
||||
// for special case that MBB has only 1 instruction
|
||||
const bool IsSizeOne = MBB.size() == 1;
|
||||
// if IsSizeOne is true, *--iter and instLast2 are invalid
|
||||
// we put a dummy value in instLast2 and desc2 since they are used
|
||||
const MachineInstr& instLast2 = IsSizeOne ? instLast1 : *--iter;
|
||||
|
||||
DEBUG(dbgs() << "\n");
|
||||
DEBUG(dbgs() << "AnalyzeBranch: opcode: " << instLast1.getOpcode() << "\n");
|
||||
DEBUG(dbgs() << "AnalyzeBranch: MBB: " << MBB.getName().str() << "\n");
|
||||
DEBUG(dbgs() << "AnalyzeBranch: TBB: " << TBB << "\n");
|
||||
DEBUG(dbgs() << "AnalyzeBranch: FBB: " << FBB << "\n");
|
||||
|
||||
// this block ends with no branches
|
||||
if (!IsAnyKindOfBranch(instLast1)) {
|
||||
DEBUG(dbgs() << "AnalyzeBranch: ends with no branch\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
// this block ends with only an unconditional branch
|
||||
if (instLast1.isUnconditionalBranch() &&
|
||||
// when IsSizeOne is true, it "absorbs" the evaluation of instLast2
|
||||
(IsSizeOne || !IsAnyKindOfBranch(instLast2))) {
|
||||
DEBUG(dbgs() << "AnalyzeBranch: ends with only uncond branch\n");
|
||||
TBB = GetBranchTarget(instLast1);
|
||||
return false;
|
||||
}
|
||||
|
||||
// this block ends with a conditional branch and
|
||||
// it falls through to a successor block
|
||||
if (instLast1.isConditionalBranch() &&
|
||||
IsAnySuccessorAlsoLayoutSuccessor(MBB)) {
|
||||
DEBUG(dbgs() << "AnalyzeBranch: ends with cond branch and fall through\n");
|
||||
TBB = GetBranchTarget(instLast1);
|
||||
int i = instLast1.findFirstPredOperandIdx();
|
||||
Cond.push_back(instLast1.getOperand(i));
|
||||
Cond.push_back(instLast1.getOperand(i+1));
|
||||
return false;
|
||||
}
|
||||
|
||||
// when IsSizeOne is true, we are done
|
||||
if (IsSizeOne)
|
||||
return true;
|
||||
|
||||
// this block ends with a conditional branch
|
||||
// followed by an unconditional branch
|
||||
if (instLast2.isConditionalBranch() &&
|
||||
instLast1.isUnconditionalBranch()) {
|
||||
DEBUG(dbgs() << "AnalyzeBranch: ends with cond and uncond branch\n");
|
||||
TBB = GetBranchTarget(instLast2);
|
||||
FBB = GetBranchTarget(instLast1);
|
||||
int i = instLast2.findFirstPredOperandIdx();
|
||||
Cond.push_back(instLast2.getOperand(i));
|
||||
Cond.push_back(instLast2.getOperand(i+1));
|
||||
return false;
|
||||
}
|
||||
|
||||
// branch cannot be understood
|
||||
DEBUG(dbgs() << "AnalyzeBranch: cannot be understood\n");
|
||||
return true;
|
||||
}
|
||||
|
||||
unsigned PTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
|
||||
unsigned count = 0;
|
||||
while (!MBB.empty())
|
||||
if (IsAnyKindOfBranch(MBB.back())) {
|
||||
MBB.pop_back();
|
||||
++count;
|
||||
} else
|
||||
break;
|
||||
DEBUG(dbgs() << "RemoveBranch: MBB: " << MBB.getName().str() << "\n");
|
||||
DEBUG(dbgs() << "RemoveBranch: remove " << count << " branch inst\n");
|
||||
return count;
|
||||
}
|
||||
|
||||
unsigned PTXInstrInfo::
|
||||
InsertBranch(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock *TBB,
|
||||
MachineBasicBlock *FBB,
|
||||
const SmallVectorImpl<MachineOperand> &Cond,
|
||||
DebugLoc DL) const {
|
||||
DEBUG(dbgs() << "InsertBranch: MBB: " << MBB.getName().str() << "\n");
|
||||
DEBUG(if (TBB) dbgs() << "InsertBranch: TBB: " << TBB->getName().str()
|
||||
<< "\n";
|
||||
else dbgs() << "InsertBranch: TBB: (NULL)\n");
|
||||
DEBUG(if (FBB) dbgs() << "InsertBranch: FBB: " << FBB->getName().str()
|
||||
<< "\n";
|
||||
else dbgs() << "InsertBranch: FBB: (NULL)\n");
|
||||
DEBUG(dbgs() << "InsertBranch: Cond size: " << Cond.size() << "\n");
|
||||
|
||||
assert(TBB && "TBB is NULL");
|
||||
|
||||
if (FBB) {
|
||||
BuildMI(&MBB, DL, get(PTX::BRAdp))
|
||||
.addMBB(TBB).addReg(Cond[0].getReg()).addImm(Cond[1].getImm());
|
||||
BuildMI(&MBB, DL, get(PTX::BRAd))
|
||||
.addMBB(FBB).addReg(PTX::NoRegister).addImm(PTXPredicate::None);
|
||||
return 2;
|
||||
} else if (Cond.size()) {
|
||||
BuildMI(&MBB, DL, get(PTX::BRAdp))
|
||||
.addMBB(TBB).addReg(Cond[0].getReg()).addImm(Cond[1].getImm());
|
||||
return 1;
|
||||
} else {
|
||||
BuildMI(&MBB, DL, get(PTX::BRAd))
|
||||
.addMBB(TBB).addReg(PTX::NoRegister).addImm(PTXPredicate::None);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Memory operand folding for spills
|
||||
void PTXInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MII,
|
||||
unsigned SrcReg, bool isKill, int FrameIdx,
|
||||
const TargetRegisterClass *RC,
|
||||
const TargetRegisterInfo *TRI) const {
|
||||
llvm_unreachable("storeRegToStackSlot should not be called for PTX");
|
||||
}
|
||||
|
||||
void PTXInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MII,
|
||||
unsigned DestReg, int FrameIdx,
|
||||
const TargetRegisterClass *RC,
|
||||
const TargetRegisterInfo *TRI) const {
|
||||
llvm_unreachable("loadRegFromStackSlot should not be called for PTX");
|
||||
}
|
||||
|
||||
// static helper routines
|
||||
|
||||
MachineSDNode *PTXInstrInfo::
|
||||
GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
|
||||
DebugLoc dl, EVT VT, SDValue Op1) {
|
||||
SDValue predReg = DAG->getRegister(PTX::NoRegister, MVT::i1);
|
||||
SDValue predOp = DAG->getTargetConstant(PTXPredicate::None, MVT::i32);
|
||||
SDValue ops[] = { Op1, predReg, predOp };
|
||||
return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops));
|
||||
}
|
||||
|
||||
MachineSDNode *PTXInstrInfo::
|
||||
GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
|
||||
DebugLoc dl, EVT VT, SDValue Op1, SDValue Op2) {
|
||||
SDValue predReg = DAG->getRegister(PTX::NoRegister, MVT::i1);
|
||||
SDValue predOp = DAG->getTargetConstant(PTXPredicate::None, MVT::i32);
|
||||
SDValue ops[] = { Op1, Op2, predReg, predOp };
|
||||
return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops));
|
||||
}
|
||||
|
||||
void PTXInstrInfo::AddDefaultPredicate(MachineInstr *MI) {
|
||||
if (MI->findFirstPredOperandIdx() == -1) {
|
||||
MI->addOperand(MachineOperand::CreateReg(PTX::NoRegister, /*IsDef=*/false));
|
||||
MI->addOperand(MachineOperand::CreateImm(PTXPredicate::None));
|
||||
}
|
||||
}
|
||||
|
||||
bool PTXInstrInfo::IsAnyKindOfBranch(const MachineInstr& inst) {
|
||||
return inst.isTerminator() || inst.isBranch() || inst.isIndirectBranch();
|
||||
}
|
||||
|
||||
bool PTXInstrInfo::
|
||||
IsAnySuccessorAlsoLayoutSuccessor(const MachineBasicBlock& MBB) {
|
||||
for (MachineBasicBlock::const_succ_iterator
|
||||
i = MBB.succ_begin(), e = MBB.succ_end(); i != e; ++i)
|
||||
if (MBB.isLayoutSuccessor((const MachineBasicBlock*) &*i))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
MachineBasicBlock *PTXInstrInfo::GetBranchTarget(const MachineInstr& inst) {
|
||||
// FIXME So far all branch instructions put destination in 1st operand
|
||||
const MachineOperand& target = inst.getOperand(0);
|
||||
assert(target.isMBB() && "FIXME: detect branch target operand");
|
||||
return target.getMBB();
|
||||
}
|
@ -1,133 +0,0 @@
|
||||
//===-- PTXInstrInfo.h - PTX Instruction Information ------------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the PTX implementation of the TargetInstrInfo class.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef PTX_INSTR_INFO_H
|
||||
#define PTX_INSTR_INFO_H
|
||||
|
||||
#include "PTXRegisterInfo.h"
|
||||
#include "llvm/Target/TargetInstrInfo.h"
|
||||
|
||||
#define GET_INSTRINFO_HEADER
|
||||
#include "PTXGenInstrInfo.inc"
|
||||
|
||||
namespace llvm {
|
||||
class PTXTargetMachine;
|
||||
|
||||
class MachineSDNode;
|
||||
class SDValue;
|
||||
class SelectionDAG;
|
||||
|
||||
class PTXInstrInfo : public PTXGenInstrInfo {
|
||||
private:
|
||||
const PTXRegisterInfo RI;
|
||||
PTXTargetMachine &TM;
|
||||
|
||||
public:
|
||||
explicit PTXInstrInfo(PTXTargetMachine &_TM);
|
||||
|
||||
virtual const PTXRegisterInfo &getRegisterInfo() const { return RI; }
|
||||
|
||||
virtual void copyPhysReg(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I, DebugLoc DL,
|
||||
unsigned DstReg, unsigned SrcReg,
|
||||
bool KillSrc) const;
|
||||
|
||||
virtual bool copyRegToReg(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I,
|
||||
unsigned DstReg, unsigned SrcReg,
|
||||
const TargetRegisterClass *DstRC,
|
||||
const TargetRegisterClass *SrcRC,
|
||||
DebugLoc DL) const;
|
||||
|
||||
virtual bool isMoveInstr(const MachineInstr& MI,
|
||||
unsigned &SrcReg, unsigned &DstReg,
|
||||
unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
|
||||
|
||||
// predicate support
|
||||
|
||||
virtual bool isPredicated(const MachineInstr *MI) const;
|
||||
|
||||
virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const;
|
||||
|
||||
virtual
|
||||
bool PredicateInstruction(MachineInstr *MI,
|
||||
const SmallVectorImpl<MachineOperand> &Pred) const;
|
||||
|
||||
virtual
|
||||
bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
|
||||
const SmallVectorImpl<MachineOperand> &Pred2) const;
|
||||
|
||||
virtual bool DefinesPredicate(MachineInstr *MI,
|
||||
std::vector<MachineOperand> &Pred) const;
|
||||
|
||||
// PTX is fully-predicable
|
||||
virtual bool isPredicable(MachineInstr *MI) const { return true; }
|
||||
|
||||
// branch support
|
||||
|
||||
virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
|
||||
MachineBasicBlock *&FBB,
|
||||
SmallVectorImpl<MachineOperand> &Cond,
|
||||
bool AllowModify = false) const;
|
||||
|
||||
virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
|
||||
|
||||
virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
|
||||
MachineBasicBlock *FBB,
|
||||
const SmallVectorImpl<MachineOperand> &Cond,
|
||||
DebugLoc DL) const;
|
||||
|
||||
// Memory operand folding for spills
|
||||
// TODO: Implement this eventually and get rid of storeRegToStackSlot and
|
||||
// loadRegFromStackSlot. Doing so will get rid of the "stack" registers
|
||||
// we currently use to spill, though I doubt the overall effect on ptxas
|
||||
// output will be large. I have yet to see a case where ptxas is unable
|
||||
// to see through the "stack" register usage and hence generates
|
||||
// efficient code anyway.
|
||||
// virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
|
||||
// MachineInstr* MI,
|
||||
// const SmallVectorImpl<unsigned> &Ops,
|
||||
// int FrameIndex) const;
|
||||
|
||||
virtual void storeRegToStackSlot(MachineBasicBlock& MBB,
|
||||
MachineBasicBlock::iterator MII,
|
||||
unsigned SrcReg, bool isKill, int FrameIndex,
|
||||
const TargetRegisterClass* RC,
|
||||
const TargetRegisterInfo* TRI) const;
|
||||
virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MII,
|
||||
unsigned DestReg, int FrameIdx,
|
||||
const TargetRegisterClass *RC,
|
||||
const TargetRegisterInfo *TRI) const;
|
||||
|
||||
// static helper routines
|
||||
|
||||
static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
|
||||
DebugLoc dl, EVT VT,
|
||||
SDValue Op1);
|
||||
|
||||
static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
|
||||
DebugLoc dl, EVT VT,
|
||||
SDValue Op1, SDValue Op2);
|
||||
|
||||
static void AddDefaultPredicate(MachineInstr *MI);
|
||||
|
||||
static bool IsAnyKindOfBranch(const MachineInstr& inst);
|
||||
|
||||
static bool IsAnySuccessorAlsoLayoutSuccessor(const MachineBasicBlock& MBB);
|
||||
|
||||
static MachineBasicBlock *GetBranchTarget(const MachineInstr& inst);
|
||||
}; // class PTXInstrInfo
|
||||
} // namespace llvm
|
||||
|
||||
#endif // PTX_INSTR_INFO_H
|
File diff suppressed because it is too large
Load Diff
@ -1,278 +0,0 @@
|
||||
//===- PTXInstrLoadStore.td - PTX Load/Store Instruction Defs -*- tablegen-*-=//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file describes the PTX load/store instructions in TableGen format.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
||||
// Addressing Predicates
|
||||
// We have to differentiate between 32- and 64-bit pointer types
|
||||
def Use32BitAddresses : Predicate<"!getSubtarget().is64Bit()">;
|
||||
def Use64BitAddresses : Predicate<"getSubtarget().is64Bit()">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Pattern Fragments for Loads/Stores
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def load_global : PatFrag<(ops node:$ptr), (load node:$ptr), [{
|
||||
const Value *Src;
|
||||
const PointerType *PT;
|
||||
if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
|
||||
(PT = dyn_cast<PointerType>(Src->getType())))
|
||||
return PT->getAddressSpace() == PTXStateSpace::Global;
|
||||
return false;
|
||||
}]>;
|
||||
|
||||
def load_constant : PatFrag<(ops node:$ptr), (load node:$ptr), [{
|
||||
const Value *Src;
|
||||
const PointerType *PT;
|
||||
if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
|
||||
(PT = dyn_cast<PointerType>(Src->getType())))
|
||||
return PT->getAddressSpace() == PTXStateSpace::Constant;
|
||||
return false;
|
||||
}]>;
|
||||
|
||||
def load_shared : PatFrag<(ops node:$ptr), (load node:$ptr), [{
|
||||
const Value *Src;
|
||||
const PointerType *PT;
|
||||
if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
|
||||
(PT = dyn_cast<PointerType>(Src->getType())))
|
||||
return PT->getAddressSpace() == PTXStateSpace::Shared;
|
||||
return false;
|
||||
}]>;
|
||||
|
||||
def store_global
|
||||
: PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
|
||||
const Value *Src;
|
||||
const PointerType *PT;
|
||||
if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
|
||||
(PT = dyn_cast<PointerType>(Src->getType())))
|
||||
return PT->getAddressSpace() == PTXStateSpace::Global;
|
||||
return false;
|
||||
}]>;
|
||||
|
||||
def store_shared
|
||||
: PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
|
||||
const Value *Src;
|
||||
const PointerType *PT;
|
||||
if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
|
||||
(PT = dyn_cast<PointerType>(Src->getType())))
|
||||
return PT->getAddressSpace() == PTXStateSpace::Shared;
|
||||
return false;
|
||||
}]>;
|
||||
|
||||
// Addressing modes.
|
||||
def ADDRrr32 : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
|
||||
def ADDRrr64 : ComplexPattern<i64, 2, "SelectADDRrr", [], []>;
|
||||
def ADDRri32 : ComplexPattern<i32, 2, "SelectADDRri", [], []>;
|
||||
def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri", [], []>;
|
||||
def ADDRii32 : ComplexPattern<i32, 2, "SelectADDRii", [], []>;
|
||||
def ADDRii64 : ComplexPattern<i64, 2, "SelectADDRii", [], []>;
|
||||
def ADDRlocal32 : ComplexPattern<i32, 2, "SelectADDRlocal", [], []>;
|
||||
def ADDRlocal64 : ComplexPattern<i64, 2, "SelectADDRlocal", [], []>;
|
||||
|
||||
// Address operands
|
||||
def MEMri32 : Operand<i32> {
|
||||
let PrintMethod = "printMemOperand";
|
||||
let MIOperandInfo = (ops RegI32, i32imm);
|
||||
}
|
||||
def MEMri64 : Operand<i64> {
|
||||
let PrintMethod = "printMemOperand";
|
||||
let MIOperandInfo = (ops RegI64, i64imm);
|
||||
}
|
||||
def LOCALri32 : Operand<i32> {
|
||||
let PrintMethod = "printMemOperand";
|
||||
let MIOperandInfo = (ops i32imm, i32imm);
|
||||
}
|
||||
def LOCALri64 : Operand<i64> {
|
||||
let PrintMethod = "printMemOperand";
|
||||
let MIOperandInfo = (ops i64imm, i64imm);
|
||||
}
|
||||
def MEMii32 : Operand<i32> {
|
||||
let PrintMethod = "printMemOperand";
|
||||
let MIOperandInfo = (ops i32imm, i32imm);
|
||||
}
|
||||
def MEMii64 : Operand<i64> {
|
||||
let PrintMethod = "printMemOperand";
|
||||
let MIOperandInfo = (ops i64imm, i64imm);
|
||||
}
|
||||
// The operand here does not correspond to an actual address, so we
|
||||
// can use i32 in 64-bit address modes.
|
||||
def MEMpi : Operand<i32> {
|
||||
let PrintMethod = "printParamOperand";
|
||||
let MIOperandInfo = (ops i32imm);
|
||||
}
|
||||
def MEMret : Operand<i32> {
|
||||
let PrintMethod = "printReturnOperand";
|
||||
let MIOperandInfo = (ops i32imm);
|
||||
}
|
||||
|
||||
|
||||
// Load/store .param space
|
||||
def PTXloadparam
|
||||
: SDNode<"PTXISD::LOAD_PARAM", SDTypeProfile<1, 1, [SDTCisPtrTy<1>]>,
|
||||
[SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>;
|
||||
def PTXstoreparam
|
||||
: SDNode<"PTXISD::STORE_PARAM", SDTypeProfile<0, 2, [SDTCisVT<0, i32>]>,
|
||||
[SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>;
|
||||
|
||||
def PTXreadparam
|
||||
: SDNode<"PTXISD::READ_PARAM", SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>,
|
||||
[SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>;
|
||||
def PTXwriteparam
|
||||
: SDNode<"PTXISD::WRITE_PARAM", SDTypeProfile<0, 1, []>,
|
||||
[SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>;
|
||||
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Classes for loads/stores
|
||||
//===----------------------------------------------------------------------===//
|
||||
multiclass PTX_LD<string opstr, string typestr,
|
||||
RegisterClass RC, PatFrag pat_load> {
|
||||
def rr32 : InstPTX<(outs RC:$d),
|
||||
(ins MEMri32:$a),
|
||||
!strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
|
||||
[(set RC:$d, (pat_load ADDRrr32:$a))]>,
|
||||
Requires<[Use32BitAddresses]>;
|
||||
def rr64 : InstPTX<(outs RC:$d),
|
||||
(ins MEMri64:$a),
|
||||
!strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
|
||||
[(set RC:$d, (pat_load ADDRrr64:$a))]>,
|
||||
Requires<[Use64BitAddresses]>;
|
||||
def ri32 : InstPTX<(outs RC:$d),
|
||||
(ins MEMri32:$a),
|
||||
!strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
|
||||
[(set RC:$d, (pat_load ADDRri32:$a))]>,
|
||||
Requires<[Use32BitAddresses]>;
|
||||
def ri64 : InstPTX<(outs RC:$d),
|
||||
(ins MEMri64:$a),
|
||||
!strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
|
||||
[(set RC:$d, (pat_load ADDRri64:$a))]>,
|
||||
Requires<[Use64BitAddresses]>;
|
||||
def ii32 : InstPTX<(outs RC:$d),
|
||||
(ins MEMii32:$a),
|
||||
!strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
|
||||
[(set RC:$d, (pat_load ADDRii32:$a))]>,
|
||||
Requires<[Use32BitAddresses]>;
|
||||
def ii64 : InstPTX<(outs RC:$d),
|
||||
(ins MEMii64:$a),
|
||||
!strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
|
||||
[(set RC:$d, (pat_load ADDRii64:$a))]>,
|
||||
Requires<[Use64BitAddresses]>;
|
||||
}
|
||||
|
||||
multiclass PTX_ST<string opstr, string typestr, RegisterClass RC,
|
||||
PatFrag pat_store> {
|
||||
def rr32 : InstPTX<(outs),
|
||||
(ins RC:$d, MEMri32:$a),
|
||||
!strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
|
||||
[(pat_store RC:$d, ADDRrr32:$a)]>,
|
||||
Requires<[Use32BitAddresses]>;
|
||||
def rr64 : InstPTX<(outs),
|
||||
(ins RC:$d, MEMri64:$a),
|
||||
!strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
|
||||
[(pat_store RC:$d, ADDRrr64:$a)]>,
|
||||
Requires<[Use64BitAddresses]>;
|
||||
def ri32 : InstPTX<(outs),
|
||||
(ins RC:$d, MEMri32:$a),
|
||||
!strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
|
||||
[(pat_store RC:$d, ADDRri32:$a)]>,
|
||||
Requires<[Use32BitAddresses]>;
|
||||
def ri64 : InstPTX<(outs),
|
||||
(ins RC:$d, MEMri64:$a),
|
||||
!strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
|
||||
[(pat_store RC:$d, ADDRri64:$a)]>,
|
||||
Requires<[Use64BitAddresses]>;
|
||||
def ii32 : InstPTX<(outs),
|
||||
(ins RC:$d, MEMii32:$a),
|
||||
!strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
|
||||
[(pat_store RC:$d, ADDRii32:$a)]>,
|
||||
Requires<[Use32BitAddresses]>;
|
||||
def ii64 : InstPTX<(outs),
|
||||
(ins RC:$d, MEMii64:$a),
|
||||
!strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
|
||||
[(pat_store RC:$d, ADDRii64:$a)]>,
|
||||
Requires<[Use64BitAddresses]>;
|
||||
}
|
||||
|
||||
multiclass PTX_LOCAL_LD_ST<string typestr, RegisterClass RC> {
|
||||
def LDri32 : InstPTX<(outs RC:$d), (ins LOCALri32:$a),
|
||||
!strconcat("ld.local", !strconcat(typestr, "\t$d, [$a]")),
|
||||
[(set RC:$d, (load_global ADDRlocal32:$a))]>;
|
||||
def LDri64 : InstPTX<(outs RC:$d), (ins LOCALri64:$a),
|
||||
!strconcat("ld.local", !strconcat(typestr, "\t$d, [$a]")),
|
||||
[(set RC:$d, (load_global ADDRlocal64:$a))]>;
|
||||
def STri32 : InstPTX<(outs), (ins RC:$d, LOCALri32:$a),
|
||||
!strconcat("st.local", !strconcat(typestr, "\t[$a], $d")),
|
||||
[(store_global RC:$d, ADDRlocal32:$a)]>;
|
||||
def STri64 : InstPTX<(outs), (ins RC:$d, LOCALri64:$a),
|
||||
!strconcat("st.local", !strconcat(typestr, "\t[$a], $d")),
|
||||
[(store_global RC:$d, ADDRlocal64:$a)]>;
|
||||
}
|
||||
|
||||
multiclass PTX_PARAM_LD_ST<string typestr, RegisterClass RC> {
|
||||
let hasSideEffects = 1 in {
|
||||
def LDpi : InstPTX<(outs RC:$d), (ins i32imm:$a),
|
||||
!strconcat("ld.param", !strconcat(typestr, "\t$d, [$a]")),
|
||||
[(set RC:$d, (PTXloadparam texternalsym:$a))]>;
|
||||
def STpi : InstPTX<(outs), (ins i32imm:$d, RC:$a),
|
||||
!strconcat("st.param", !strconcat(typestr, "\t[$d], $a")),
|
||||
[(PTXstoreparam texternalsym:$d, RC:$a)]>;
|
||||
}
|
||||
}
|
||||
|
||||
multiclass PTX_LD_ALL<string opstr, PatFrag pat_load> {
|
||||
defm u16 : PTX_LD<opstr, ".u16", RegI16, pat_load>;
|
||||
defm u32 : PTX_LD<opstr, ".u32", RegI32, pat_load>;
|
||||
defm u64 : PTX_LD<opstr, ".u64", RegI64, pat_load>;
|
||||
defm f32 : PTX_LD<opstr, ".f32", RegF32, pat_load>;
|
||||
defm f64 : PTX_LD<opstr, ".f64", RegF64, pat_load>;
|
||||
}
|
||||
|
||||
multiclass PTX_ST_ALL<string opstr, PatFrag pat_store> {
|
||||
defm u16 : PTX_ST<opstr, ".u16", RegI16, pat_store>;
|
||||
defm u32 : PTX_ST<opstr, ".u32", RegI32, pat_store>;
|
||||
defm u64 : PTX_ST<opstr, ".u64", RegI64, pat_store>;
|
||||
defm f32 : PTX_ST<opstr, ".f32", RegF32, pat_store>;
|
||||
defm f64 : PTX_ST<opstr, ".f64", RegF64, pat_store>;
|
||||
}
|
||||
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Instruction definitions for loads/stores
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Global/shared stores
|
||||
defm STg : PTX_ST_ALL<"st.global", store_global>;
|
||||
defm STs : PTX_ST_ALL<"st.shared", store_shared>;
|
||||
|
||||
// Global/shared/constant loads
|
||||
defm LDg : PTX_LD_ALL<"ld.global", load_global>;
|
||||
defm LDc : PTX_LD_ALL<"ld.const", load_constant>;
|
||||
defm LDs : PTX_LD_ALL<"ld.shared", load_shared>;
|
||||
|
||||
// Param loads/stores
|
||||
defm PARAMPRED : PTX_PARAM_LD_ST<".pred", RegPred>;
|
||||
defm PARAMU16 : PTX_PARAM_LD_ST<".u16", RegI16>;
|
||||
defm PARAMU32 : PTX_PARAM_LD_ST<".u32", RegI32>;
|
||||
defm PARAMU64 : PTX_PARAM_LD_ST<".u64", RegI64>;
|
||||
defm PARAMF32 : PTX_PARAM_LD_ST<".f32", RegF32>;
|
||||
defm PARAMF64 : PTX_PARAM_LD_ST<".f64", RegF64>;
|
||||
|
||||
// Local loads/stores
|
||||
defm LOCALPRED : PTX_LOCAL_LD_ST<".pred", RegPred>;
|
||||
defm LOCALU16 : PTX_LOCAL_LD_ST<".u16", RegI16>;
|
||||
defm LOCALU32 : PTX_LOCAL_LD_ST<".u32", RegI32>;
|
||||
defm LOCALU64 : PTX_LOCAL_LD_ST<".u64", RegI64>;
|
||||
defm LOCALF32 : PTX_LOCAL_LD_ST<".f32", RegF32>;
|
||||
defm LOCALF64 : PTX_LOCAL_LD_ST<".f64", RegF64>;
|
||||
|
@ -1,110 +0,0 @@
|
||||
//===-- PTXIntrinsicInstrInfo.td - Defines PTX intrinsics --*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines all of the PTX-specific intrinsic instructions.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// PTX Special Purpose Register Accessor Intrinsics
|
||||
|
||||
class PTX_READ_SPECIAL_REGISTER_R64<string regname, Intrinsic intop>
|
||||
: InstPTX<(outs RegI64:$d), (ins),
|
||||
!strconcat("mov.u64\t$d, %", regname),
|
||||
[(set RegI64:$d, (intop))]>;
|
||||
|
||||
class PTX_READ_SPECIAL_REGISTER_R32<string regname, Intrinsic intop>
|
||||
: InstPTX<(outs RegI32:$d), (ins),
|
||||
!strconcat("mov.u32\t$d, %", regname),
|
||||
[(set RegI32:$d, (intop))]>;
|
||||
|
||||
// TODO Add read vector-version of special registers
|
||||
|
||||
//def PTX_READ_TID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"tid",
|
||||
// int_ptx_read_tid_r64>;
|
||||
def PTX_READ_TID_X : PTX_READ_SPECIAL_REGISTER_R32<"tid.x",
|
||||
int_ptx_read_tid_x>;
|
||||
def PTX_READ_TID_Y : PTX_READ_SPECIAL_REGISTER_R32<"tid.y",
|
||||
int_ptx_read_tid_y>;
|
||||
def PTX_READ_TID_Z : PTX_READ_SPECIAL_REGISTER_R32<"tid.z",
|
||||
int_ptx_read_tid_z>;
|
||||
def PTX_READ_TID_W : PTX_READ_SPECIAL_REGISTER_R32<"tid.w",
|
||||
int_ptx_read_tid_w>;
|
||||
|
||||
//def PTX_READ_NTID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"ntid",
|
||||
// int_ptx_read_ntid_r64>;
|
||||
def PTX_READ_NTID_X : PTX_READ_SPECIAL_REGISTER_R32<"ntid.x",
|
||||
int_ptx_read_ntid_x>;
|
||||
def PTX_READ_NTID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ntid.y",
|
||||
int_ptx_read_ntid_y>;
|
||||
def PTX_READ_NTID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ntid.z",
|
||||
int_ptx_read_ntid_z>;
|
||||
def PTX_READ_NTID_W : PTX_READ_SPECIAL_REGISTER_R32<"ntid.w",
|
||||
int_ptx_read_ntid_w>;
|
||||
|
||||
def PTX_READ_LANEID : PTX_READ_SPECIAL_REGISTER_R32<"laneid",
|
||||
int_ptx_read_laneid>;
|
||||
def PTX_READ_WARPID : PTX_READ_SPECIAL_REGISTER_R32<"warpid",
|
||||
int_ptx_read_warpid>;
|
||||
def PTX_READ_NWARPID : PTX_READ_SPECIAL_REGISTER_R32<"nwarpid",
|
||||
int_ptx_read_nwarpid>;
|
||||
|
||||
//def PTX_READ_CTAID_R64 :
|
||||
//PTX_READ_SPECIAL_REGISTER_R64<"ctaid", int_ptx_read_ctaid_r64>;
|
||||
def PTX_READ_CTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.x",
|
||||
int_ptx_read_ctaid_x>;
|
||||
def PTX_READ_CTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.y",
|
||||
int_ptx_read_ctaid_y>;
|
||||
def PTX_READ_CTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.z",
|
||||
int_ptx_read_ctaid_z>;
|
||||
def PTX_READ_CTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.w",
|
||||
int_ptx_read_ctaid_w>;
|
||||
|
||||
//def PTX_READ_NCTAID_R64 :
|
||||
//PTX_READ_SPECIAL_REGISTER_R64<"nctaid", int_ptx_read_nctaid_r64>;
|
||||
def PTX_READ_NCTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.x",
|
||||
int_ptx_read_nctaid_x>;
|
||||
def PTX_READ_NCTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.y",
|
||||
int_ptx_read_nctaid_y>;
|
||||
def PTX_READ_NCTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.z",
|
||||
int_ptx_read_nctaid_z>;
|
||||
def PTX_READ_NCTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.w",
|
||||
int_ptx_read_nctaid_w>;
|
||||
|
||||
def PTX_READ_SMID : PTX_READ_SPECIAL_REGISTER_R32<"smid",
|
||||
int_ptx_read_smid>;
|
||||
def PTX_READ_NSMID : PTX_READ_SPECIAL_REGISTER_R32<"nsmid",
|
||||
int_ptx_read_nsmid>;
|
||||
def PTX_READ_GRIDID : PTX_READ_SPECIAL_REGISTER_R32<"gridid",
|
||||
int_ptx_read_gridid>;
|
||||
|
||||
def PTX_READ_LANEMASK_EQ
|
||||
: PTX_READ_SPECIAL_REGISTER_R32<"lanemask_eq", int_ptx_read_lanemask_eq>;
|
||||
def PTX_READ_LANEMASK_LE
|
||||
: PTX_READ_SPECIAL_REGISTER_R32<"lanemask_le", int_ptx_read_lanemask_le>;
|
||||
def PTX_READ_LANEMASK_LT
|
||||
: PTX_READ_SPECIAL_REGISTER_R32<"lanemask_lt", int_ptx_read_lanemask_lt>;
|
||||
def PTX_READ_LANEMASK_GE
|
||||
: PTX_READ_SPECIAL_REGISTER_R32<"lanemask_ge", int_ptx_read_lanemask_ge>;
|
||||
def PTX_READ_LANEMASK_GT
|
||||
: PTX_READ_SPECIAL_REGISTER_R32<"lanemask_gt", int_ptx_read_lanemask_gt>;
|
||||
|
||||
def PTX_READ_CLOCK
|
||||
: PTX_READ_SPECIAL_REGISTER_R32<"clock", int_ptx_read_clock>;
|
||||
def PTX_READ_CLOCK64
|
||||
: PTX_READ_SPECIAL_REGISTER_R64<"clock64", int_ptx_read_clock64>;
|
||||
|
||||
def PTX_READ_PM0 : PTX_READ_SPECIAL_REGISTER_R32<"pm0", int_ptx_read_pm0>;
|
||||
def PTX_READ_PM1 : PTX_READ_SPECIAL_REGISTER_R32<"pm1", int_ptx_read_pm1>;
|
||||
def PTX_READ_PM2 : PTX_READ_SPECIAL_REGISTER_R32<"pm2", int_ptx_read_pm2>;
|
||||
def PTX_READ_PM3 : PTX_READ_SPECIAL_REGISTER_R32<"pm3", int_ptx_read_pm3>;
|
||||
|
||||
// PTX Parallel Synchronization and Communication Intrinsics
|
||||
|
||||
def PTX_BAR_SYNC : InstPTX<(outs), (ins i32imm:$i), "bar.sync\t$i",
|
||||
[(int_ptx_bar_sync imm:$i)]>;
|
@ -1,556 +0,0 @@
|
||||
//===-- PTXMCAsmStreamer.cpp - PTX Text Assembly Output -------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/ADT/OwningPtr.h"
|
||||
#include "llvm/ADT/SmallString.h"
|
||||
#include "llvm/ADT/Twine.h"
|
||||
#include "llvm/MC/MCAsmInfo.h"
|
||||
#include "llvm/MC/MCCodeEmitter.h"
|
||||
#include "llvm/MC/MCContext.h"
|
||||
#include "llvm/MC/MCExpr.h"
|
||||
#include "llvm/MC/MCInst.h"
|
||||
#include "llvm/MC/MCInstPrinter.h"
|
||||
#include "llvm/MC/MCStreamer.h"
|
||||
#include "llvm/MC/MCSymbol.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/MathExtras.h"
|
||||
#include "llvm/Support/Format.h"
|
||||
#include "llvm/Support/FormattedStream.h"
|
||||
#include "llvm/Support/PathV2.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
class PTXMCAsmStreamer : public MCStreamer {
|
||||
formatted_raw_ostream &OS;
|
||||
const MCAsmInfo &MAI;
|
||||
OwningPtr<MCInstPrinter> InstPrinter;
|
||||
OwningPtr<MCCodeEmitter> Emitter;
|
||||
|
||||
SmallString<128> CommentToEmit;
|
||||
raw_svector_ostream CommentStream;
|
||||
|
||||
unsigned IsVerboseAsm : 1;
|
||||
unsigned ShowInst : 1;
|
||||
|
||||
public:
|
||||
PTXMCAsmStreamer(MCContext &Context,
|
||||
formatted_raw_ostream &os,
|
||||
bool isVerboseAsm, bool useLoc,
|
||||
MCInstPrinter *printer,
|
||||
MCCodeEmitter *emitter,
|
||||
bool showInst)
|
||||
: MCStreamer(Context), OS(os), MAI(Context.getAsmInfo()),
|
||||
InstPrinter(printer), Emitter(emitter), CommentStream(CommentToEmit),
|
||||
IsVerboseAsm(isVerboseAsm),
|
||||
ShowInst(showInst) {
|
||||
if (InstPrinter && IsVerboseAsm)
|
||||
InstPrinter->setCommentStream(CommentStream);
|
||||
}
|
||||
|
||||
~PTXMCAsmStreamer() {}
|
||||
|
||||
inline void EmitEOL() {
|
||||
// If we don't have any comments, just emit a \n.
|
||||
if (!IsVerboseAsm) {
|
||||
OS << '\n';
|
||||
return;
|
||||
}
|
||||
EmitCommentsAndEOL();
|
||||
}
|
||||
void EmitCommentsAndEOL();
|
||||
|
||||
/// isVerboseAsm - Return true if this streamer supports verbose assembly at
|
||||
/// all.
|
||||
virtual bool isVerboseAsm() const { return IsVerboseAsm; }
|
||||
|
||||
/// hasRawTextSupport - We support EmitRawText.
|
||||
virtual bool hasRawTextSupport() const { return true; }
|
||||
|
||||
/// AddComment - Add a comment that can be emitted to the generated .s
|
||||
/// file if applicable as a QoI issue to make the output of the compiler
|
||||
/// more readable. This only affects the MCAsmStreamer, and only when
|
||||
/// verbose assembly output is enabled.
|
||||
virtual void AddComment(const Twine &T);
|
||||
|
||||
/// AddEncodingComment - Add a comment showing the encoding of an instruction.
|
||||
virtual void AddEncodingComment(const MCInst &Inst);
|
||||
|
||||
/// GetCommentOS - Return a raw_ostream that comments can be written to.
|
||||
/// Unlike AddComment, you are required to terminate comments with \n if you
|
||||
/// use this method.
|
||||
virtual raw_ostream &GetCommentOS() {
|
||||
if (!IsVerboseAsm)
|
||||
return nulls(); // Discard comments unless in verbose asm mode.
|
||||
return CommentStream;
|
||||
}
|
||||
|
||||
/// AddBlankLine - Emit a blank line to a .s file to pretty it up.
|
||||
virtual void AddBlankLine() {
|
||||
EmitEOL();
|
||||
}
|
||||
|
||||
/// @name MCStreamer Interface
|
||||
/// @{
|
||||
|
||||
virtual void ChangeSection(const MCSection *Section);
|
||||
virtual void InitSections() { /* PTX does not use sections */ }
|
||||
|
||||
virtual void EmitLabel(MCSymbol *Symbol);
|
||||
|
||||
virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
|
||||
|
||||
virtual void EmitThumbFunc(MCSymbol *Func);
|
||||
|
||||
virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
|
||||
|
||||
virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol);
|
||||
|
||||
virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
|
||||
const MCSymbol *LastLabel,
|
||||
const MCSymbol *Label,
|
||||
unsigned PointerSize);
|
||||
|
||||
virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
|
||||
|
||||
virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue);
|
||||
virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol);
|
||||
virtual void EmitCOFFSymbolStorageClass(int StorageClass);
|
||||
virtual void EmitCOFFSymbolType(int Type);
|
||||
virtual void EndCOFFSymbolDef();
|
||||
virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value);
|
||||
virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
|
||||
unsigned ByteAlignment);
|
||||
|
||||
/// EmitLocalCommonSymbol - Emit a local common (.lcomm) symbol.
|
||||
///
|
||||
/// @param Symbol - The common symbol to emit.
|
||||
/// @param Size - The size of the common symbol.
|
||||
/// @param ByteAlignment - The alignment of the common symbol in bytes.
|
||||
virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
|
||||
unsigned ByteAlignment);
|
||||
|
||||
virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
|
||||
unsigned Size = 0, unsigned ByteAlignment = 0);
|
||||
|
||||
virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
|
||||
uint64_t Size, unsigned ByteAlignment = 0);
|
||||
|
||||
virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
|
||||
|
||||
virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
|
||||
unsigned AddrSpace);
|
||||
virtual void EmitULEB128Value(const MCExpr *Value);
|
||||
virtual void EmitSLEB128Value(const MCExpr *Value);
|
||||
virtual void EmitGPRel32Value(const MCExpr *Value);
|
||||
|
||||
|
||||
virtual void EmitFill(uint64_t NumBytes, uint8_t FillValue,
|
||||
unsigned AddrSpace);
|
||||
|
||||
virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
|
||||
unsigned ValueSize = 1,
|
||||
unsigned MaxBytesToEmit = 0);
|
||||
|
||||
virtual void EmitCodeAlignment(unsigned ByteAlignment,
|
||||
unsigned MaxBytesToEmit = 0);
|
||||
|
||||
virtual bool EmitValueToOffset(const MCExpr *Offset,
|
||||
unsigned char Value = 0);
|
||||
|
||||
virtual void EmitFileDirective(StringRef Filename);
|
||||
virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Directory,
|
||||
StringRef Filename);
|
||||
|
||||
virtual void EmitInstruction(const MCInst &Inst);
|
||||
|
||||
/// EmitRawText - If this file is backed by an assembly streamer, this dumps
|
||||
/// the specified string in the output .s file. This capability is
|
||||
/// indicated by the hasRawTextSupport() predicate.
|
||||
virtual void EmitRawText(StringRef String);
|
||||
|
||||
virtual void FinishImpl();
|
||||
|
||||
/// @}
|
||||
|
||||
}; // class PTXMCAsmStreamer
|
||||
|
||||
}
|
||||
|
||||
/// TODO: Add appropriate implementation of Emit*() methods when needed
|
||||
|
||||
void PTXMCAsmStreamer::AddComment(const Twine &T) {
|
||||
if (!IsVerboseAsm) return;
|
||||
|
||||
// Make sure that CommentStream is flushed.
|
||||
CommentStream.flush();
|
||||
|
||||
T.toVector(CommentToEmit);
|
||||
// Each comment goes on its own line.
|
||||
CommentToEmit.push_back('\n');
|
||||
|
||||
// Tell the comment stream that the vector changed underneath it.
|
||||
CommentStream.resync();
|
||||
}
|
||||
|
||||
void PTXMCAsmStreamer::EmitCommentsAndEOL() {
|
||||
if (CommentToEmit.empty() && CommentStream.GetNumBytesInBuffer() == 0) {
|
||||
OS << '\n';
|
||||
return;
|
||||
}
|
||||
|
||||
CommentStream.flush();
|
||||
StringRef Comments = CommentToEmit.str();
|
||||
|
||||
assert(Comments.back() == '\n' &&
|
||||
"Comment array not newline terminated");
|
||||
do {
|
||||
// Emit a line of comments.
|
||||
OS.PadToColumn(MAI.getCommentColumn());
|
||||
size_t Position = Comments.find('\n');
|
||||
OS << MAI.getCommentString() << ' ' << Comments.substr(0, Position) << '\n';
|
||||
|
||||
Comments = Comments.substr(Position+1);
|
||||
} while (!Comments.empty());
|
||||
|
||||
CommentToEmit.clear();
|
||||
// Tell the comment stream that the vector changed underneath it.
|
||||
CommentStream.resync();
|
||||
}
|
||||
|
||||
static inline int64_t truncateToSize(int64_t Value, unsigned Bytes) {
|
||||
assert(Bytes && "Invalid size!");
|
||||
return Value & ((uint64_t) (int64_t) -1 >> (64 - Bytes * 8));
|
||||
}
|
||||
|
||||
void PTXMCAsmStreamer::ChangeSection(const MCSection *Section) {
|
||||
assert(Section && "Cannot switch to a null section!");
|
||||
}
|
||||
|
||||
void PTXMCAsmStreamer::EmitLabel(MCSymbol *Symbol) {
|
||||
assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
|
||||
assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
|
||||
assert(getCurrentSection() && "Cannot emit before setting section!");
|
||||
|
||||
OS << *Symbol << MAI.getLabelSuffix();
|
||||
EmitEOL();
|
||||
Symbol->setSection(*getCurrentSection());
|
||||
}
|
||||
|
||||
void PTXMCAsmStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {}
|
||||
|
||||
void PTXMCAsmStreamer::EmitThumbFunc(MCSymbol *Func) {}
|
||||
|
||||
void PTXMCAsmStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
|
||||
OS << *Symbol << " = " << *Value;
|
||||
EmitEOL();
|
||||
|
||||
// FIXME: Lift context changes into super class.
|
||||
Symbol->setVariableValue(Value);
|
||||
}
|
||||
|
||||
void PTXMCAsmStreamer::EmitWeakReference(MCSymbol *Alias,
|
||||
const MCSymbol *Symbol) {
|
||||
OS << ".weakref " << *Alias << ", " << *Symbol;
|
||||
EmitEOL();
|
||||
}
|
||||
|
||||
void PTXMCAsmStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta,
|
||||
const MCSymbol *LastLabel,
|
||||
const MCSymbol *Label,
|
||||
unsigned PointerSize) {
|
||||
report_fatal_error("Unimplemented.");
|
||||
}
|
||||
|
||||
void PTXMCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
|
||||
MCSymbolAttr Attribute) {}
|
||||
|
||||
void PTXMCAsmStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {}
|
||||
|
||||
void PTXMCAsmStreamer::BeginCOFFSymbolDef(const MCSymbol *Symbol) {}
|
||||
|
||||
void PTXMCAsmStreamer::EmitCOFFSymbolStorageClass (int StorageClass) {}
|
||||
|
||||
void PTXMCAsmStreamer::EmitCOFFSymbolType (int Type) {}
|
||||
|
||||
void PTXMCAsmStreamer::EndCOFFSymbolDef() {}
|
||||
|
||||
void PTXMCAsmStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {}
|
||||
|
||||
void PTXMCAsmStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
|
||||
unsigned ByteAlignment) {}
|
||||
|
||||
void PTXMCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
|
||||
unsigned ByteAlignment) {}
|
||||
|
||||
void PTXMCAsmStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
|
||||
unsigned Size, unsigned ByteAlignment) {}
|
||||
|
||||
void PTXMCAsmStreamer::EmitTBSSSymbol(const MCSection *Section,
|
||||
MCSymbol *Symbol,
|
||||
uint64_t Size, unsigned ByteAlignment) {}
|
||||
|
||||
static inline char toOctal(int X) { return (X&7)+'0'; }
|
||||
|
||||
static void PrintQuotedString(StringRef Data, raw_ostream &OS) {
|
||||
OS << '"';
|
||||
|
||||
for (unsigned i = 0, e = Data.size(); i != e; ++i) {
|
||||
unsigned char C = Data[i];
|
||||
if (C == '"' || C == '\\') {
|
||||
OS << '\\' << (char)C;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (isprint((unsigned char)C)) {
|
||||
OS << (char)C;
|
||||
continue;
|
||||
}
|
||||
|
||||
switch (C) {
|
||||
case '\b': OS << "\\b"; break;
|
||||
case '\f': OS << "\\f"; break;
|
||||
case '\n': OS << "\\n"; break;
|
||||
case '\r': OS << "\\r"; break;
|
||||
case '\t': OS << "\\t"; break;
|
||||
default:
|
||||
OS << '\\';
|
||||
OS << toOctal(C >> 6);
|
||||
OS << toOctal(C >> 3);
|
||||
OS << toOctal(C >> 0);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
OS << '"';
|
||||
}
|
||||
|
||||
void PTXMCAsmStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
|
||||
assert(getCurrentSection() && "Cannot emit contents before setting section!");
|
||||
if (Data.empty()) return;
|
||||
|
||||
if (Data.size() == 1) {
|
||||
OS << MAI.getData8bitsDirective(AddrSpace);
|
||||
OS << (unsigned)(unsigned char)Data[0];
|
||||
EmitEOL();
|
||||
return;
|
||||
}
|
||||
|
||||
// If the data ends with 0 and the target supports .asciz, use it, otherwise
|
||||
// use .ascii
|
||||
if (MAI.getAscizDirective() && Data.back() == 0) {
|
||||
OS << MAI.getAscizDirective();
|
||||
Data = Data.substr(0, Data.size()-1);
|
||||
} else {
|
||||
OS << MAI.getAsciiDirective();
|
||||
}
|
||||
|
||||
OS << ' ';
|
||||
PrintQuotedString(Data, OS);
|
||||
EmitEOL();
|
||||
}
|
||||
|
||||
void PTXMCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
|
||||
unsigned AddrSpace) {
|
||||
assert(getCurrentSection() && "Cannot emit contents before setting section!");
|
||||
const char *Directive = 0;
|
||||
switch (Size) {
|
||||
default: break;
|
||||
case 1: Directive = MAI.getData8bitsDirective(AddrSpace); break;
|
||||
case 2: Directive = MAI.getData16bitsDirective(AddrSpace); break;
|
||||
case 4: Directive = MAI.getData32bitsDirective(AddrSpace); break;
|
||||
case 8:
|
||||
Directive = MAI.getData64bitsDirective(AddrSpace);
|
||||
// If the target doesn't support 64-bit data, emit as two 32-bit halves.
|
||||
if (Directive) break;
|
||||
int64_t IntValue;
|
||||
if (!Value->EvaluateAsAbsolute(IntValue))
|
||||
report_fatal_error("Don't know how to emit this value.");
|
||||
if (getContext().getAsmInfo().isLittleEndian()) {
|
||||
EmitIntValue((uint32_t)(IntValue >> 0 ), 4, AddrSpace);
|
||||
EmitIntValue((uint32_t)(IntValue >> 32), 4, AddrSpace);
|
||||
} else {
|
||||
EmitIntValue((uint32_t)(IntValue >> 32), 4, AddrSpace);
|
||||
EmitIntValue((uint32_t)(IntValue >> 0 ), 4, AddrSpace);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
assert(Directive && "Invalid size for machine code value!");
|
||||
OS << Directive << *Value;
|
||||
EmitEOL();
|
||||
}
|
||||
|
||||
void PTXMCAsmStreamer::EmitULEB128Value(const MCExpr *Value) {
|
||||
assert(MAI.hasLEB128() && "Cannot print a .uleb");
|
||||
OS << ".uleb128 " << *Value;
|
||||
EmitEOL();
|
||||
}
|
||||
|
||||
void PTXMCAsmStreamer::EmitSLEB128Value(const MCExpr *Value) {
|
||||
assert(MAI.hasLEB128() && "Cannot print a .sleb");
|
||||
OS << ".sleb128 " << *Value;
|
||||
EmitEOL();
|
||||
}
|
||||
|
||||
void PTXMCAsmStreamer::EmitGPRel32Value(const MCExpr *Value) {
|
||||
assert(MAI.getGPRel32Directive() != 0);
|
||||
OS << MAI.getGPRel32Directive() << *Value;
|
||||
EmitEOL();
|
||||
}
|
||||
|
||||
|
||||
/// EmitFill - Emit NumBytes bytes worth of the value specified by
|
||||
/// FillValue. This implements directives such as '.space'.
|
||||
void PTXMCAsmStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue,
|
||||
unsigned AddrSpace) {
|
||||
if (NumBytes == 0) return;
|
||||
|
||||
if (AddrSpace == 0)
|
||||
if (const char *ZeroDirective = MAI.getZeroDirective()) {
|
||||
OS << ZeroDirective << NumBytes;
|
||||
if (FillValue != 0)
|
||||
OS << ',' << (int)FillValue;
|
||||
EmitEOL();
|
||||
return;
|
||||
}
|
||||
|
||||
// Emit a byte at a time.
|
||||
MCStreamer::EmitFill(NumBytes, FillValue, AddrSpace);
|
||||
}
|
||||
|
||||
void PTXMCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment,
|
||||
int64_t Value,
|
||||
unsigned ValueSize,
|
||||
unsigned MaxBytesToEmit) {
|
||||
// Some assemblers don't support non-power of two alignments, so we always
|
||||
// emit alignments as a power of two if possible.
|
||||
if (isPowerOf2_32(ByteAlignment)) {
|
||||
switch (ValueSize) {
|
||||
default: llvm_unreachable("Invalid size for machine code value!");
|
||||
case 1: OS << MAI.getAlignDirective(); break;
|
||||
// FIXME: use MAI for this!
|
||||
case 2: OS << ".p2alignw "; break;
|
||||
case 4: OS << ".p2alignl "; break;
|
||||
case 8: llvm_unreachable("Unsupported alignment size!");
|
||||
}
|
||||
|
||||
if (MAI.getAlignmentIsInBytes())
|
||||
OS << ByteAlignment;
|
||||
else
|
||||
OS << Log2_32(ByteAlignment);
|
||||
|
||||
if (Value || MaxBytesToEmit) {
|
||||
OS << ", 0x";
|
||||
OS.write_hex(truncateToSize(Value, ValueSize));
|
||||
|
||||
if (MaxBytesToEmit)
|
||||
OS << ", " << MaxBytesToEmit;
|
||||
}
|
||||
EmitEOL();
|
||||
return;
|
||||
}
|
||||
|
||||
// Non-power of two alignment. This is not widely supported by assemblers.
|
||||
// FIXME: Parameterize this based on MAI.
|
||||
switch (ValueSize) {
|
||||
default: llvm_unreachable("Invalid size for machine code value!");
|
||||
case 1: OS << ".balign"; break;
|
||||
case 2: OS << ".balignw"; break;
|
||||
case 4: OS << ".balignl"; break;
|
||||
case 8: llvm_unreachable("Unsupported alignment size!");
|
||||
}
|
||||
|
||||
OS << ' ' << ByteAlignment;
|
||||
OS << ", " << truncateToSize(Value, ValueSize);
|
||||
if (MaxBytesToEmit)
|
||||
OS << ", " << MaxBytesToEmit;
|
||||
EmitEOL();
|
||||
}
|
||||
|
||||
void PTXMCAsmStreamer::EmitCodeAlignment(unsigned ByteAlignment,
|
||||
unsigned MaxBytesToEmit) {}
|
||||
|
||||
bool PTXMCAsmStreamer::EmitValueToOffset(const MCExpr *Offset,
|
||||
unsigned char Value) {return false;}
|
||||
|
||||
|
||||
void PTXMCAsmStreamer::EmitFileDirective(StringRef Filename) {
|
||||
assert(MAI.hasSingleParameterDotFile());
|
||||
OS << "\t.file\t";
|
||||
PrintQuotedString(Filename, OS);
|
||||
EmitEOL();
|
||||
}
|
||||
|
||||
// FIXME: should we inherit from MCAsmStreamer?
|
||||
bool PTXMCAsmStreamer::EmitDwarfFileDirective(unsigned FileNo,
|
||||
StringRef Directory,
|
||||
StringRef Filename) {
|
||||
if (!Directory.empty()) {
|
||||
if (sys::path::is_absolute(Filename))
|
||||
return EmitDwarfFileDirective(FileNo, "", Filename);
|
||||
SmallString<128> FullPathName = Directory;
|
||||
sys::path::append(FullPathName, Filename);
|
||||
return EmitDwarfFileDirective(FileNo, "", FullPathName);
|
||||
}
|
||||
|
||||
OS << "\t.file\t" << FileNo << ' ';
|
||||
PrintQuotedString(Filename, OS);
|
||||
EmitEOL();
|
||||
return this->MCStreamer::EmitDwarfFileDirective(FileNo, Directory, Filename);
|
||||
}
|
||||
|
||||
void PTXMCAsmStreamer::AddEncodingComment(const MCInst &Inst) {}
|
||||
|
||||
void PTXMCAsmStreamer::EmitInstruction(const MCInst &Inst) {
|
||||
assert(getCurrentSection() && "Cannot emit contents before setting section!");
|
||||
|
||||
// Show the encoding in a comment if we have a code emitter.
|
||||
if (Emitter)
|
||||
AddEncodingComment(Inst);
|
||||
|
||||
// Show the MCInst if enabled.
|
||||
if (ShowInst) {
|
||||
Inst.dump_pretty(GetCommentOS(), &MAI, InstPrinter.get(), "\n ");
|
||||
GetCommentOS() << "\n";
|
||||
}
|
||||
|
||||
// If we have an AsmPrinter, use that to print, otherwise print the MCInst.
|
||||
if (InstPrinter)
|
||||
InstPrinter->printInst(&Inst, OS, "");
|
||||
else
|
||||
Inst.print(OS, &MAI);
|
||||
EmitEOL();
|
||||
}
|
||||
|
||||
/// EmitRawText - If this file is backed by an assembly streamer, this dumps
|
||||
/// the specified string in the output .s file. This capability is
|
||||
/// indicated by the hasRawTextSupport() predicate.
|
||||
void PTXMCAsmStreamer::EmitRawText(StringRef String) {
|
||||
if (!String.empty() && String.back() == '\n')
|
||||
String = String.substr(0, String.size()-1);
|
||||
OS << String;
|
||||
EmitEOL();
|
||||
}
|
||||
|
||||
void PTXMCAsmStreamer::FinishImpl() {}
|
||||
|
||||
namespace llvm {
|
||||
MCStreamer *createPTXAsmStreamer(MCContext &Context,
|
||||
formatted_raw_ostream &OS,
|
||||
bool isVerboseAsm, bool useLoc, bool useCFI,
|
||||
bool useDwarfDirectory,
|
||||
MCInstPrinter *IP,
|
||||
MCCodeEmitter *CE, MCAsmBackend *MAB,
|
||||
bool ShowInst) {
|
||||
return new PTXMCAsmStreamer(Context, OS, isVerboseAsm, useLoc,
|
||||
IP, CE, ShowInst);
|
||||
}
|
||||
}
|
@ -1,32 +0,0 @@
|
||||
//===-- PTXMCInstLower.cpp - Convert PTX MachineInstr to an MCInst --------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains code to lower PTX MachineInstrs to their corresponding
|
||||
// MCInst records.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "PTX.h"
|
||||
#include "PTXAsmPrinter.h"
|
||||
#include "llvm/Constants.h"
|
||||
#include "llvm/CodeGen/MachineBasicBlock.h"
|
||||
#include "llvm/MC/MCExpr.h"
|
||||
#include "llvm/MC/MCInst.h"
|
||||
#include "llvm/Target/Mangler.h"
|
||||
|
||||
void llvm::LowerPTXMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
|
||||
PTXAsmPrinter &AP) {
|
||||
OutMI.setOpcode(MI->getOpcode());
|
||||
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
|
||||
const MachineOperand &MO = MI->getOperand(i);
|
||||
MCOperand MCOp;
|
||||
OutMI.addOperand(AP.lowerOperand(MO));
|
||||
}
|
||||
}
|
||||
|
@ -1,85 +0,0 @@
|
||||
//===-- PTXMFInfoExtract.cpp - Extract PTX machine function info ----------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines an information extractor for PTX machine functions.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#define DEBUG_TYPE "ptx-mf-info-extract"
|
||||
|
||||
#include "PTX.h"
|
||||
#include "PTXTargetMachine.h"
|
||||
#include "PTXMachineFunctionInfo.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
// NOTE: PTXMFInfoExtract must after register allocation!
|
||||
|
||||
namespace {
|
||||
/// PTXMFInfoExtract - PTX specific code to extract of PTX machine
|
||||
/// function information for PTXAsmPrinter
|
||||
///
|
||||
class PTXMFInfoExtract : public MachineFunctionPass {
|
||||
private:
|
||||
static char ID;
|
||||
|
||||
public:
|
||||
PTXMFInfoExtract(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel)
|
||||
: MachineFunctionPass(ID) {}
|
||||
|
||||
virtual bool runOnMachineFunction(MachineFunction &MF);
|
||||
|
||||
virtual const char *getPassName() const {
|
||||
return "PTX Machine Function Info Extractor";
|
||||
}
|
||||
}; // class PTXMFInfoExtract
|
||||
} // end anonymous namespace
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
char PTXMFInfoExtract::ID = 0;
|
||||
|
||||
bool PTXMFInfoExtract::runOnMachineFunction(MachineFunction &MF) {
|
||||
PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
|
||||
// Generate list of all virtual registers used in this function
|
||||
for (unsigned i = 0; i < MRI.getNumVirtRegs(); ++i) {
|
||||
unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
|
||||
const TargetRegisterClass *TRC = MRI.getRegClass(Reg);
|
||||
unsigned RegType;
|
||||
if (TRC == &PTX::RegPredRegClass)
|
||||
RegType = PTXRegisterType::Pred;
|
||||
else if (TRC == &PTX::RegI16RegClass)
|
||||
RegType = PTXRegisterType::B16;
|
||||
else if (TRC == &PTX::RegI32RegClass)
|
||||
RegType = PTXRegisterType::B32;
|
||||
else if (TRC == &PTX::RegI64RegClass)
|
||||
RegType = PTXRegisterType::B64;
|
||||
else if (TRC == &PTX::RegF32RegClass)
|
||||
RegType = PTXRegisterType::F32;
|
||||
else if (TRC == &PTX::RegF64RegClass)
|
||||
RegType = PTXRegisterType::F64;
|
||||
else
|
||||
llvm_unreachable("Unkown register class.");
|
||||
MFI->addRegister(Reg, RegType, PTXRegisterSpace::Reg);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
FunctionPass *llvm::createPTXMFInfoExtract(PTXTargetMachine &TM,
|
||||
CodeGenOpt::Level OptLevel) {
|
||||
return new PTXMFInfoExtract(TM, OptLevel);
|
||||
}
|
@ -1,14 +0,0 @@
|
||||
//===-- PTXMachineFuctionInfo.cpp - PTX machine function info -------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "PTXMachineFunctionInfo.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
void PTXMachineFunctionInfo::anchor() { }
|
@ -1,202 +0,0 @@
|
||||
//===-- PTXMachineFuctionInfo.h - PTX machine function info ------*- C++ -*-==//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file declares PTX-specific per-machine-function information.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef PTX_MACHINE_FUNCTION_INFO_H
|
||||
#define PTX_MACHINE_FUNCTION_INFO_H
|
||||
|
||||
#include "PTX.h"
|
||||
#include "PTXParamManager.h"
|
||||
#include "PTXRegisterInfo.h"
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/DenseSet.h"
|
||||
#include "llvm/ADT/StringExtras.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
/// PTXMachineFunctionInfo - This class is derived from MachineFunction and
|
||||
/// contains private PTX target-specific information for each MachineFunction.
|
||||
///
|
||||
class PTXMachineFunctionInfo : public MachineFunctionInfo {
|
||||
virtual void anchor();
|
||||
bool IsKernel;
|
||||
DenseSet<unsigned> RegArgs;
|
||||
DenseSet<unsigned> RegRets;
|
||||
|
||||
typedef DenseMap<int, std::string> FrameMap;
|
||||
|
||||
FrameMap FrameSymbols;
|
||||
|
||||
struct RegisterInfo {
|
||||
unsigned Reg;
|
||||
unsigned Type;
|
||||
unsigned Space;
|
||||
unsigned Offset;
|
||||
unsigned Encoded;
|
||||
};
|
||||
|
||||
typedef DenseMap<unsigned, RegisterInfo> RegisterInfoMap;
|
||||
|
||||
RegisterInfoMap RegInfo;
|
||||
|
||||
PTXParamManager ParamManager;
|
||||
|
||||
public:
|
||||
typedef DenseSet<unsigned>::const_iterator reg_iterator;
|
||||
|
||||
PTXMachineFunctionInfo(MachineFunction &MF)
|
||||
: IsKernel(false) {
|
||||
}
|
||||
|
||||
/// getParamManager - Returns the PTXParamManager instance for this function.
|
||||
PTXParamManager& getParamManager() { return ParamManager; }
|
||||
const PTXParamManager& getParamManager() const { return ParamManager; }
|
||||
|
||||
/// setKernel/isKernel - Gets/sets a flag that indicates if this function is
|
||||
/// a PTX kernel function.
|
||||
void setKernel(bool _IsKernel=true) { IsKernel = _IsKernel; }
|
||||
bool isKernel() const { return IsKernel; }
|
||||
|
||||
/// argreg_begin/argreg_end - Returns iterators to the set of registers
|
||||
/// containing function arguments.
|
||||
reg_iterator argreg_begin() const { return RegArgs.begin(); }
|
||||
reg_iterator argreg_end() const { return RegArgs.end(); }
|
||||
|
||||
/// retreg_begin/retreg_end - Returns iterators to the set of registers
|
||||
/// containing the function return values.
|
||||
reg_iterator retreg_begin() const { return RegRets.begin(); }
|
||||
reg_iterator retreg_end() const { return RegRets.end(); }
|
||||
|
||||
/// addRegister - Adds a virtual register to the set of all used registers
|
||||
void addRegister(unsigned Reg, unsigned RegType, unsigned RegSpace) {
|
||||
if (!RegInfo.count(Reg)) {
|
||||
RegisterInfo Info;
|
||||
Info.Reg = Reg;
|
||||
Info.Type = RegType;
|
||||
Info.Space = RegSpace;
|
||||
|
||||
// Determine register offset
|
||||
Info.Offset = 0;
|
||||
for(RegisterInfoMap::const_iterator i = RegInfo.begin(),
|
||||
e = RegInfo.end(); i != e; ++i) {
|
||||
const RegisterInfo& RI = i->second;
|
||||
if (RI.Space == RegSpace)
|
||||
if (RI.Space != PTXRegisterSpace::Reg || RI.Type == Info.Type)
|
||||
Info.Offset++;
|
||||
}
|
||||
|
||||
// Encode the register data into a single register number
|
||||
Info.Encoded = (Info.Offset << 6) | (Info.Type << 3) | Info.Space;
|
||||
|
||||
RegInfo[Reg] = Info;
|
||||
|
||||
if (RegSpace == PTXRegisterSpace::Argument)
|
||||
RegArgs.insert(Reg);
|
||||
else if (RegSpace == PTXRegisterSpace::Return)
|
||||
RegRets.insert(Reg);
|
||||
}
|
||||
}
|
||||
|
||||
/// countRegisters - Returns the number of registers of the given type and
|
||||
/// space.
|
||||
unsigned countRegisters(unsigned RegType, unsigned RegSpace) const {
|
||||
unsigned Count = 0;
|
||||
for(RegisterInfoMap::const_iterator i = RegInfo.begin(), e = RegInfo.end();
|
||||
i != e; ++i) {
|
||||
const RegisterInfo& RI = i->second;
|
||||
if (RI.Type == RegType && RI.Space == RegSpace)
|
||||
Count++;
|
||||
}
|
||||
return Count;
|
||||
}
|
||||
|
||||
/// getEncodedRegister - Returns the encoded value of the register.
|
||||
unsigned getEncodedRegister(unsigned Reg) const {
|
||||
return RegInfo.lookup(Reg).Encoded;
|
||||
}
|
||||
|
||||
/// addRetReg - Adds a register to the set of return-value registers.
|
||||
void addRetReg(unsigned Reg) {
|
||||
if (!RegRets.count(Reg)) {
|
||||
RegRets.insert(Reg);
|
||||
}
|
||||
}
|
||||
|
||||
/// addArgReg - Adds a register to the set of function argument registers.
|
||||
void addArgReg(unsigned Reg) {
|
||||
RegArgs.insert(Reg);
|
||||
}
|
||||
|
||||
/// getRegisterName - Returns the name of the specified virtual register. This
|
||||
/// name is used during PTX emission.
|
||||
std::string getRegisterName(unsigned Reg) const {
|
||||
if (RegInfo.count(Reg)) {
|
||||
const RegisterInfo& RI = RegInfo.lookup(Reg);
|
||||
std::string Name;
|
||||
raw_string_ostream NameStr(Name);
|
||||
decodeRegisterName(NameStr, RI.Encoded);
|
||||
NameStr.flush();
|
||||
return Name;
|
||||
}
|
||||
else if (Reg == PTX::NoRegister)
|
||||
return "%noreg";
|
||||
else
|
||||
llvm_unreachable("Register not in register name map");
|
||||
}
|
||||
|
||||
/// getEncodedRegisterName - Returns the name of the encoded register.
|
||||
std::string getEncodedRegisterName(unsigned EncodedReg) const {
|
||||
std::string Name;
|
||||
raw_string_ostream NameStr(Name);
|
||||
decodeRegisterName(NameStr, EncodedReg);
|
||||
NameStr.flush();
|
||||
return Name;
|
||||
}
|
||||
|
||||
/// getRegisterType - Returns the type of the specified virtual register.
|
||||
unsigned getRegisterType(unsigned Reg) const {
|
||||
if (RegInfo.count(Reg))
|
||||
return RegInfo.lookup(Reg).Type;
|
||||
else
|
||||
llvm_unreachable("Unknown register");
|
||||
}
|
||||
|
||||
/// getOffsetForRegister - Returns the offset of the virtual register
|
||||
unsigned getOffsetForRegister(unsigned Reg) const {
|
||||
if (RegInfo.count(Reg))
|
||||
return RegInfo.lookup(Reg).Offset;
|
||||
else
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// getFrameSymbol - Returns the symbol name for the given FrameIndex.
|
||||
const char* getFrameSymbol(int FrameIndex) {
|
||||
if (FrameSymbols.count(FrameIndex)) {
|
||||
return FrameSymbols.lookup(FrameIndex).c_str();
|
||||
} else {
|
||||
std::string Name = "__local";
|
||||
Name += utostr(FrameIndex);
|
||||
// The whole point of caching this name is to ensure the pointer we pass
|
||||
// to any getExternalSymbol() calls will remain valid for the lifetime of
|
||||
// the back-end instance. This is to work around an issue in SelectionDAG
|
||||
// where symbol names are expected to be life-long strings.
|
||||
FrameSymbols[FrameIndex] = Name;
|
||||
return FrameSymbols[FrameIndex].c_str();
|
||||
}
|
||||
}
|
||||
}; // class PTXMachineFunctionInfo
|
||||
} // namespace llvm
|
||||
|
||||
#endif // PTX_MACHINE_FUNCTION_INFO_H
|
@ -1,73 +0,0 @@
|
||||
//===-- PTXParamManager.cpp - Manager for .param variables ----------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file implements the PTXParamManager class.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "PTXParamManager.h"
|
||||
#include "PTX.h"
|
||||
#include "llvm/ADT/StringExtras.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
PTXParamManager::PTXParamManager() {
|
||||
}
|
||||
|
||||
unsigned PTXParamManager::addArgumentParam(unsigned Size) {
|
||||
PTXParam Param;
|
||||
Param.Type = PTX_PARAM_TYPE_ARGUMENT;
|
||||
Param.Size = Size;
|
||||
|
||||
std::string Name;
|
||||
Name = "__param_";
|
||||
Name += utostr(ArgumentParams.size()+1);
|
||||
Param.Name = Name;
|
||||
|
||||
unsigned Index = AllParams.size();
|
||||
AllParams[Index] = Param;
|
||||
ArgumentParams.push_back(Index);
|
||||
|
||||
return Index;
|
||||
}
|
||||
|
||||
unsigned PTXParamManager::addReturnParam(unsigned Size) {
|
||||
PTXParam Param;
|
||||
Param.Type = PTX_PARAM_TYPE_RETURN;
|
||||
Param.Size = Size;
|
||||
|
||||
std::string Name;
|
||||
Name = "__ret_";
|
||||
Name += utostr(ReturnParams.size()+1);
|
||||
Param.Name = Name;
|
||||
|
||||
unsigned Index = AllParams.size();
|
||||
AllParams[Index] = Param;
|
||||
ReturnParams.push_back(Index);
|
||||
|
||||
return Index;
|
||||
}
|
||||
|
||||
unsigned PTXParamManager::addLocalParam(unsigned Size) {
|
||||
PTXParam Param;
|
||||
Param.Type = PTX_PARAM_TYPE_LOCAL;
|
||||
Param.Size = Size;
|
||||
|
||||
std::string Name;
|
||||
Name = "__localparam_";
|
||||
Name += utostr(LocalParams.size()+1);
|
||||
Param.Name = Name;
|
||||
|
||||
unsigned Index = AllParams.size();
|
||||
AllParams[Index] = Param;
|
||||
LocalParams.push_back(Index);
|
||||
|
||||
return Index;
|
||||
}
|
||||
|
@ -1,87 +0,0 @@
|
||||
//===-- PTXParamManager.h - Manager for .param variables --------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines the PTXParamManager class, which manages all defined .param
|
||||
// variables for a particular function.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef PTX_PARAM_MANAGER_H
|
||||
#define PTX_PARAM_MANAGER_H
|
||||
|
||||
#include "llvm/ADT/DenseMap.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include <string>
|
||||
|
||||
namespace llvm {
|
||||
|
||||
/// PTXParamManager - This class manages all .param variables defined for a
|
||||
/// particular function.
|
||||
class PTXParamManager {
|
||||
private:
|
||||
|
||||
/// PTXParamType - Type of a .param variable
|
||||
enum PTXParamType {
|
||||
PTX_PARAM_TYPE_ARGUMENT,
|
||||
PTX_PARAM_TYPE_RETURN,
|
||||
PTX_PARAM_TYPE_LOCAL
|
||||
};
|
||||
|
||||
/// PTXParam - Definition of a PTX .param variable
|
||||
struct PTXParam {
|
||||
PTXParamType Type;
|
||||
unsigned Size;
|
||||
std::string Name;
|
||||
};
|
||||
|
||||
DenseMap<unsigned, PTXParam> AllParams;
|
||||
SmallVector<unsigned, 4> ArgumentParams;
|
||||
SmallVector<unsigned, 4> ReturnParams;
|
||||
SmallVector<unsigned, 4> LocalParams;
|
||||
|
||||
public:
|
||||
|
||||
typedef SmallVector<unsigned, 4>::const_iterator param_iterator;
|
||||
|
||||
PTXParamManager();
|
||||
|
||||
param_iterator arg_begin() const { return ArgumentParams.begin(); }
|
||||
param_iterator arg_end() const { return ArgumentParams.end(); }
|
||||
param_iterator ret_begin() const { return ReturnParams.begin(); }
|
||||
param_iterator ret_end() const { return ReturnParams.end(); }
|
||||
param_iterator local_begin() const { return LocalParams.begin(); }
|
||||
param_iterator local_end() const { return LocalParams.end(); }
|
||||
|
||||
/// addArgumentParam - Returns a new .param used as an argument.
|
||||
unsigned addArgumentParam(unsigned Size);
|
||||
|
||||
/// addReturnParam - Returns a new .param used as a return argument.
|
||||
unsigned addReturnParam(unsigned Size);
|
||||
|
||||
/// addLocalParam - Returns a new .param used as a local .param variable.
|
||||
unsigned addLocalParam(unsigned Size);
|
||||
|
||||
/// getParamName - Returns the name of the parameter as a string.
|
||||
const std::string &getParamName(unsigned Param) const {
|
||||
assert(AllParams.count(Param) == 1 && "Param has not been defined!");
|
||||
return AllParams.find(Param)->second.Name;
|
||||
}
|
||||
|
||||
/// getParamSize - Returns the size of the parameter in bits.
|
||||
unsigned getParamSize(unsigned Param) const {
|
||||
assert(AllParams.count(Param) == 1 && "Param has not been defined!");
|
||||
return AllParams.find(Param)->second.Size;
|
||||
}
|
||||
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -1,53 +0,0 @@
|
||||
//===-- PTXRegAlloc.cpp - PTX Register Allocator --------------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains a register allocator for PTX code.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#define DEBUG_TYPE "ptx-reg-alloc"
|
||||
|
||||
#include "PTX.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/RegAllocRegistry.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
// Special register allocator for PTX.
|
||||
class PTXRegAlloc : public MachineFunctionPass {
|
||||
public:
|
||||
static char ID;
|
||||
PTXRegAlloc() : MachineFunctionPass(ID) {}
|
||||
|
||||
virtual const char* getPassName() const {
|
||||
return "PTX Register Allocator";
|
||||
}
|
||||
|
||||
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
|
||||
AU.setPreservesCFG();
|
||||
MachineFunctionPass::getAnalysisUsage(AU);
|
||||
}
|
||||
|
||||
virtual bool runOnMachineFunction(MachineFunction &MF) {
|
||||
// We do not actually do anything (at least not yet).
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
char PTXRegAlloc::ID = 0;
|
||||
|
||||
static RegisterRegAlloc
|
||||
ptxRegAlloc("ptx", "PTX register allocator", createPTXRegisterAllocator);
|
||||
}
|
||||
|
||||
FunctionPass *llvm::createPTXRegisterAllocator() {
|
||||
return new PTXRegAlloc();
|
||||
}
|
||||
|
@ -1,38 +0,0 @@
|
||||
//===-- PTXRegisterInfo.cpp - PTX Register Information --------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the PTX implementation of the TargetRegisterInfo class.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "PTXRegisterInfo.h"
|
||||
#include "PTX.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/Target/TargetInstrInfo.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
#define GET_REGINFO_TARGET_DESC
|
||||
#include "PTXGenRegisterInfo.inc"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
PTXRegisterInfo::PTXRegisterInfo(PTXTargetMachine &TM,
|
||||
const TargetInstrInfo &tii)
|
||||
// PTX does not have a return address register.
|
||||
: PTXGenRegisterInfo(0), TII(tii) {
|
||||
}
|
||||
|
||||
void PTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator /*II*/,
|
||||
int /*SPAdj*/,
|
||||
RegScavenger * /*RS*/) const {
|
||||
llvm_unreachable("FrameIndex should have been previously eliminated!");
|
||||
}
|
@ -1,56 +0,0 @@
|
||||
//===-- PTXRegisterInfo.h - PTX Register Information Impl -------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file contains the PTX implementation of the MRegisterInfo class.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef PTX_REGISTER_INFO_H
|
||||
#define PTX_REGISTER_INFO_H
|
||||
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/ADT/BitVector.h"
|
||||
|
||||
#define GET_REGINFO_HEADER
|
||||
#include "PTXGenRegisterInfo.inc"
|
||||
|
||||
namespace llvm {
|
||||
class PTXTargetMachine;
|
||||
class MachineFunction;
|
||||
|
||||
struct PTXRegisterInfo : public PTXGenRegisterInfo {
|
||||
private:
|
||||
const TargetInstrInfo &TII;
|
||||
|
||||
public:
|
||||
PTXRegisterInfo(PTXTargetMachine &TM,
|
||||
const TargetInstrInfo &tii);
|
||||
|
||||
virtual const uint16_t
|
||||
*getCalleeSavedRegs(const MachineFunction *MF = 0) const {
|
||||
static const uint16_t CalleeSavedRegs[] = { 0 };
|
||||
return CalleeSavedRegs; // save nothing
|
||||
}
|
||||
|
||||
virtual BitVector getReservedRegs(const MachineFunction &MF) const {
|
||||
BitVector Reserved(getNumRegs());
|
||||
return Reserved; // reserve no regs
|
||||
}
|
||||
|
||||
virtual void eliminateFrameIndex(MachineBasicBlock::iterator II,
|
||||
int SPAdj,
|
||||
RegScavenger *RS = NULL) const;
|
||||
|
||||
virtual unsigned getFrameRegister(const MachineFunction &MF) const {
|
||||
llvm_unreachable("PTX does not have a frame register");
|
||||
}
|
||||
}; // struct PTXRegisterInfo
|
||||
} // namespace llvm
|
||||
|
||||
#endif // PTX_REGISTER_INFO_H
|
@ -1,36 +0,0 @@
|
||||
//===-- PTXRegisterInfo.td - PTX Register defs -------------*- tablegen -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Declarations that describe the PTX register file
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class PTXReg<string n> : Register<n> {
|
||||
let Namespace = "PTX";
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Registers
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// The generated register info code throws warnings for empty register classes
|
||||
// (e.g. zero-length arrays), so we use a dummy register here just to prevent
|
||||
// these warnings.
|
||||
def DUMMY_REG : PTXReg<"R0">;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Register classes
|
||||
//===----------------------------------------------------------------------===//
|
||||
def RegPred : RegisterClass<"PTX", [i1], 8, (add DUMMY_REG)>;
|
||||
def RegI16 : RegisterClass<"PTX", [i16], 16, (add DUMMY_REG)>;
|
||||
def RegI32 : RegisterClass<"PTX", [i32], 32, (add DUMMY_REG)>;
|
||||
def RegI64 : RegisterClass<"PTX", [i64], 64, (add DUMMY_REG)>;
|
||||
def RegF32 : RegisterClass<"PTX", [f32], 32, (add DUMMY_REG)>;
|
||||
def RegF64 : RegisterClass<"PTX", [f64], 64, (add DUMMY_REG)>;
|
||||
|
@ -1,150 +0,0 @@
|
||||
//===-- PTXSelectionDAGInfo.cpp - PTX SelectionDAG Info -------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file implements the PTXSelectionDAGInfo class.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#define DEBUG_TYPE "ptx-selectiondag-info"
|
||||
#include "PTXTargetMachine.h"
|
||||
#include "llvm/DerivedTypes.h"
|
||||
#include "llvm/CodeGen/SelectionDAG.h"
|
||||
using namespace llvm;
|
||||
|
||||
PTXSelectionDAGInfo::PTXSelectionDAGInfo(const TargetMachine &TM)
|
||||
: TargetSelectionDAGInfo(TM),
|
||||
Subtarget(&TM.getSubtarget<PTXSubtarget>()) {
|
||||
}
|
||||
|
||||
PTXSelectionDAGInfo::~PTXSelectionDAGInfo() {
|
||||
}
|
||||
|
||||
SDValue
|
||||
PTXSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
|
||||
SDValue Chain,
|
||||
SDValue Dst, SDValue Src,
|
||||
SDValue Size, unsigned Align,
|
||||
bool isVolatile, bool AlwaysInline,
|
||||
MachinePointerInfo DstPtrInfo,
|
||||
MachinePointerInfo SrcPtrInfo) const {
|
||||
// Do repeated 4-byte loads and stores. To be improved.
|
||||
// This requires 4-byte alignment.
|
||||
if ((Align & 3) != 0)
|
||||
return SDValue();
|
||||
// This requires the copy size to be a constant, preferably
|
||||
// within a subtarget-specific limit.
|
||||
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
|
||||
if (!ConstantSize)
|
||||
return SDValue();
|
||||
uint64_t SizeVal = ConstantSize->getZExtValue();
|
||||
// Always inline memcpys. In PTX, we do not have a C library that provides
|
||||
// a memcpy function.
|
||||
//if (!AlwaysInline)
|
||||
// return SDValue();
|
||||
|
||||
unsigned BytesLeft = SizeVal & 3;
|
||||
unsigned NumMemOps = SizeVal >> 2;
|
||||
unsigned EmittedNumMemOps = 0;
|
||||
EVT VT = MVT::i32;
|
||||
unsigned VTSize = 4;
|
||||
unsigned i = 0;
|
||||
const unsigned MAX_LOADS_IN_LDM = 6;
|
||||
SDValue TFOps[MAX_LOADS_IN_LDM];
|
||||
SDValue Loads[MAX_LOADS_IN_LDM];
|
||||
uint64_t SrcOff = 0, DstOff = 0;
|
||||
EVT PointerType = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
|
||||
|
||||
// Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the
|
||||
// same number of stores. The loads and stores will get combined into
|
||||
// ldm/stm later on.
|
||||
while (EmittedNumMemOps < NumMemOps) {
|
||||
for (i = 0;
|
||||
i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
|
||||
Loads[i] = DAG.getLoad(VT, dl, Chain,
|
||||
DAG.getNode(ISD::ADD, dl, PointerType, Src,
|
||||
DAG.getConstant(SrcOff, PointerType)),
|
||||
SrcPtrInfo.getWithOffset(SrcOff), isVolatile,
|
||||
false, false, 0);
|
||||
TFOps[i] = Loads[i].getValue(1);
|
||||
SrcOff += VTSize;
|
||||
}
|
||||
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
|
||||
|
||||
for (i = 0;
|
||||
i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
|
||||
TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
|
||||
DAG.getNode(ISD::ADD, dl, PointerType, Dst,
|
||||
DAG.getConstant(DstOff, PointerType)),
|
||||
DstPtrInfo.getWithOffset(DstOff),
|
||||
isVolatile, false, 0);
|
||||
DstOff += VTSize;
|
||||
}
|
||||
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
|
||||
|
||||
EmittedNumMemOps += i;
|
||||
}
|
||||
|
||||
if (BytesLeft == 0)
|
||||
return Chain;
|
||||
|
||||
// Issue loads / stores for the trailing (1 - 3) bytes.
|
||||
unsigned BytesLeftSave = BytesLeft;
|
||||
i = 0;
|
||||
while (BytesLeft) {
|
||||
if (BytesLeft >= 2) {
|
||||
VT = MVT::i16;
|
||||
VTSize = 2;
|
||||
} else {
|
||||
VT = MVT::i8;
|
||||
VTSize = 1;
|
||||
}
|
||||
|
||||
Loads[i] = DAG.getLoad(VT, dl, Chain,
|
||||
DAG.getNode(ISD::ADD, dl, PointerType, Src,
|
||||
DAG.getConstant(SrcOff, PointerType)),
|
||||
SrcPtrInfo.getWithOffset(SrcOff), false, false,
|
||||
false, 0);
|
||||
TFOps[i] = Loads[i].getValue(1);
|
||||
++i;
|
||||
SrcOff += VTSize;
|
||||
BytesLeft -= VTSize;
|
||||
}
|
||||
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
|
||||
|
||||
i = 0;
|
||||
BytesLeft = BytesLeftSave;
|
||||
while (BytesLeft) {
|
||||
if (BytesLeft >= 2) {
|
||||
VT = MVT::i16;
|
||||
VTSize = 2;
|
||||
} else {
|
||||
VT = MVT::i8;
|
||||
VTSize = 1;
|
||||
}
|
||||
|
||||
TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
|
||||
DAG.getNode(ISD::ADD, dl, PointerType, Dst,
|
||||
DAG.getConstant(DstOff, PointerType)),
|
||||
DstPtrInfo.getWithOffset(DstOff), false, false, 0);
|
||||
++i;
|
||||
DstOff += VTSize;
|
||||
BytesLeft -= VTSize;
|
||||
}
|
||||
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
|
||||
}
|
||||
|
||||
SDValue PTXSelectionDAGInfo::
|
||||
EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
|
||||
SDValue Chain, SDValue Dst,
|
||||
SDValue Src, SDValue Size,
|
||||
unsigned Align, bool isVolatile,
|
||||
MachinePointerInfo DstPtrInfo) const {
|
||||
llvm_unreachable("memset lowering not implemented for PTX yet");
|
||||
}
|
||||
|
@ -1,53 +0,0 @@
|
||||
//===-- PTXSelectionDAGInfo.h - PTX SelectionDAG Info -----------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines the PTX subclass for TargetSelectionDAGInfo.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef PTXSELECTIONDAGINFO_H
|
||||
#define PTXSELECTIONDAGINFO_H
|
||||
|
||||
#include "llvm/Target/TargetSelectionDAGInfo.h"
|
||||
|
||||
namespace llvm {
|
||||
|
||||
/// PTXSelectionDAGInfo - TargetSelectionDAGInfo sub-class for the PTX target.
|
||||
/// At the moment, this is mostly just a copy of ARMSelectionDAGInfo.
|
||||
class PTXSelectionDAGInfo : public TargetSelectionDAGInfo {
|
||||
/// Subtarget - Keep a pointer to the PTXSubtarget around so that we can
|
||||
/// make the right decision when generating code for different targets.
|
||||
const PTXSubtarget *Subtarget;
|
||||
|
||||
public:
|
||||
explicit PTXSelectionDAGInfo(const TargetMachine &TM);
|
||||
~PTXSelectionDAGInfo();
|
||||
|
||||
virtual
|
||||
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
|
||||
SDValue Chain,
|
||||
SDValue Dst, SDValue Src,
|
||||
SDValue Size, unsigned Align,
|
||||
bool isVolatile, bool AlwaysInline,
|
||||
MachinePointerInfo DstPtrInfo,
|
||||
MachinePointerInfo SrcPtrInfo) const;
|
||||
|
||||
virtual
|
||||
SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
|
||||
SDValue Chain,
|
||||
SDValue Op1, SDValue Op2,
|
||||
SDValue Op3, unsigned Align,
|
||||
bool isVolatile,
|
||||
MachinePointerInfo DstPtrInfo) const;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
@ -1,68 +0,0 @@
|
||||
//===-- PTXSubtarget.cpp - PTX Subtarget Information ----------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file implements the PTX specific subclass of TargetSubtargetInfo.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "PTXSubtarget.h"
|
||||
#include "PTX.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/TargetRegistry.h"
|
||||
|
||||
#define GET_SUBTARGETINFO_TARGET_DESC
|
||||
#define GET_SUBTARGETINFO_CTOR
|
||||
#include "PTXGenSubtargetInfo.inc"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
void PTXSubtarget::anchor() { }
|
||||
|
||||
PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &CPU,
|
||||
const std::string &FS, bool is64Bit)
|
||||
: PTXGenSubtargetInfo(TT, CPU, FS),
|
||||
PTXTarget(PTX_COMPUTE_1_0),
|
||||
PTXVersion(PTX_VERSION_2_0),
|
||||
SupportsDouble(false),
|
||||
SupportsFMA(true),
|
||||
Is64Bit(is64Bit) {
|
||||
std::string TARGET = CPU;
|
||||
if (TARGET.empty())
|
||||
TARGET = "generic";
|
||||
ParseSubtargetFeatures(TARGET, FS);
|
||||
}
|
||||
|
||||
std::string PTXSubtarget::getTargetString() const {
|
||||
switch(PTXTarget) {
|
||||
default: llvm_unreachable("Unknown PTX target");
|
||||
case PTX_SM_1_0: return "sm_10";
|
||||
case PTX_SM_1_1: return "sm_11";
|
||||
case PTX_SM_1_2: return "sm_12";
|
||||
case PTX_SM_1_3: return "sm_13";
|
||||
case PTX_SM_2_0: return "sm_20";
|
||||
case PTX_SM_2_1: return "sm_21";
|
||||
case PTX_SM_2_2: return "sm_22";
|
||||
case PTX_SM_2_3: return "sm_23";
|
||||
case PTX_COMPUTE_1_0: return "compute_10";
|
||||
case PTX_COMPUTE_1_1: return "compute_11";
|
||||
case PTX_COMPUTE_1_2: return "compute_12";
|
||||
case PTX_COMPUTE_1_3: return "compute_13";
|
||||
case PTX_COMPUTE_2_0: return "compute_20";
|
||||
}
|
||||
}
|
||||
|
||||
std::string PTXSubtarget::getPTXVersionString() const {
|
||||
switch(PTXVersion) {
|
||||
case PTX_VERSION_2_0: return "2.0";
|
||||
case PTX_VERSION_2_1: return "2.1";
|
||||
case PTX_VERSION_2_2: return "2.2";
|
||||
case PTX_VERSION_2_3: return "2.3";
|
||||
}
|
||||
llvm_unreachable("Invalid PTX version");
|
||||
}
|
@ -1,131 +0,0 @@
|
||||
//===-- PTXSubtarget.h - Define Subtarget for the PTX -----------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file declares the PTX specific subclass of TargetSubtargetInfo.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef PTX_SUBTARGET_H
|
||||
#define PTX_SUBTARGET_H
|
||||
|
||||
#include "llvm/Target/TargetSubtargetInfo.h"
|
||||
|
||||
#define GET_SUBTARGETINFO_HEADER
|
||||
#include "PTXGenSubtargetInfo.inc"
|
||||
|
||||
namespace llvm {
|
||||
class StringRef;
|
||||
|
||||
class PTXSubtarget : public PTXGenSubtargetInfo {
|
||||
virtual void anchor();
|
||||
public:
|
||||
|
||||
/**
|
||||
* Enumeration of Shader Models supported by the back-end.
|
||||
*/
|
||||
enum PTXTargetEnum {
|
||||
PTX_COMPUTE_1_0, /*< Compute Compatibility 1.0 */
|
||||
PTX_COMPUTE_1_1, /*< Compute Compatibility 1.1 */
|
||||
PTX_COMPUTE_1_2, /*< Compute Compatibility 1.2 */
|
||||
PTX_COMPUTE_1_3, /*< Compute Compatibility 1.3 */
|
||||
PTX_COMPUTE_2_0, /*< Compute Compatibility 2.0 */
|
||||
PTX_LAST_COMPUTE,
|
||||
|
||||
PTX_SM_1_0, /*< Shader Model 1.0 */
|
||||
PTX_SM_1_1, /*< Shader Model 1.1 */
|
||||
PTX_SM_1_2, /*< Shader Model 1.2 */
|
||||
PTX_SM_1_3, /*< Shader Model 1.3 */
|
||||
PTX_SM_2_0, /*< Shader Model 2.0 */
|
||||
PTX_SM_2_1, /*< Shader Model 2.1 */
|
||||
PTX_SM_2_2, /*< Shader Model 2.2 */
|
||||
PTX_SM_2_3, /*< Shader Model 2.3 */
|
||||
PTX_LAST_SM
|
||||
};
|
||||
|
||||
/**
|
||||
* Enumeration of PTX versions supported by the back-end.
|
||||
*
|
||||
* Currently, PTX 2.0 is the minimum supported version.
|
||||
*/
|
||||
enum PTXVersionEnum {
|
||||
PTX_VERSION_2_0, /*< PTX Version 2.0 */
|
||||
PTX_VERSION_2_1, /*< PTX Version 2.1 */
|
||||
PTX_VERSION_2_2, /*< PTX Version 2.2 */
|
||||
PTX_VERSION_2_3 /*< PTX Version 2.3 */
|
||||
};
|
||||
|
||||
private:
|
||||
|
||||
/// Shader Model supported on the target GPU.
|
||||
PTXTargetEnum PTXTarget;
|
||||
|
||||
/// PTX Language Version.
|
||||
PTXVersionEnum PTXVersion;
|
||||
|
||||
// The native .f64 type is supported on the hardware.
|
||||
bool SupportsDouble;
|
||||
|
||||
// Support the fused-multiply add (FMA) and multiply-add (MAD)
|
||||
// instructions
|
||||
bool SupportsFMA;
|
||||
|
||||
// Use .u64 instead of .u32 for addresses.
|
||||
bool Is64Bit;
|
||||
|
||||
public:
|
||||
|
||||
PTXSubtarget(const std::string &TT, const std::string &CPU,
|
||||
const std::string &FS, bool is64Bit);
|
||||
|
||||
// Target architecture accessors
|
||||
std::string getTargetString() const;
|
||||
|
||||
std::string getPTXVersionString() const;
|
||||
|
||||
bool supportsDouble() const { return SupportsDouble; }
|
||||
|
||||
bool is64Bit() const { return Is64Bit; }
|
||||
|
||||
bool supportsFMA() const { return SupportsFMA; }
|
||||
|
||||
bool supportsPTX21() const { return PTXVersion >= PTX_VERSION_2_1; }
|
||||
|
||||
bool supportsPTX22() const { return PTXVersion >= PTX_VERSION_2_2; }
|
||||
|
||||
bool supportsPTX23() const { return PTXVersion >= PTX_VERSION_2_3; }
|
||||
|
||||
bool fdivNeedsRoundingMode() const {
|
||||
return (PTXTarget >= PTX_SM_1_3 && PTXTarget < PTX_LAST_SM) ||
|
||||
(PTXTarget >= PTX_COMPUTE_1_3 && PTXTarget < PTX_LAST_COMPUTE);
|
||||
}
|
||||
|
||||
bool fmadNeedsRoundingMode() const {
|
||||
return (PTXTarget >= PTX_SM_1_3 && PTXTarget < PTX_LAST_SM) ||
|
||||
(PTXTarget >= PTX_COMPUTE_1_3 && PTXTarget < PTX_LAST_COMPUTE);
|
||||
}
|
||||
|
||||
bool useParamSpaceForDeviceArgs() const {
|
||||
return (PTXTarget >= PTX_SM_2_0 && PTXTarget < PTX_LAST_SM) ||
|
||||
(PTXTarget >= PTX_COMPUTE_2_0 && PTXTarget < PTX_LAST_COMPUTE);
|
||||
}
|
||||
|
||||
bool callsAreHandled() const {
|
||||
return (PTXTarget >= PTX_SM_2_0 && PTXTarget < PTX_LAST_SM) ||
|
||||
(PTXTarget >= PTX_COMPUTE_2_0 && PTXTarget < PTX_LAST_COMPUTE);
|
||||
}
|
||||
|
||||
bool emitPtrAttribute() const {
|
||||
return PTXVersion >= PTX_VERSION_2_2;
|
||||
}
|
||||
|
||||
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
|
||||
}; // class PTXSubtarget
|
||||
} // namespace llvm
|
||||
|
||||
#endif // PTX_SUBTARGET_H
|
@ -1,165 +0,0 @@
|
||||
//===-- PTXTargetMachine.cpp - Define TargetMachine for PTX ---------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Top-level implementation for the PTX target.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "PTXTargetMachine.h"
|
||||
#include "PTX.h"
|
||||
#include "llvm/PassManager.h"
|
||||
#include "llvm/Analysis/Passes.h"
|
||||
#include "llvm/Analysis/Verifier.h"
|
||||
#include "llvm/Assembly/PrintModulePass.h"
|
||||
#include "llvm/CodeGen/AsmPrinter.h"
|
||||
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
|
||||
#include "llvm/CodeGen/MachineModuleInfo.h"
|
||||
#include "llvm/CodeGen/Passes.h"
|
||||
#include "llvm/MC/MCAsmInfo.h"
|
||||
#include "llvm/MC/MCInstrInfo.h"
|
||||
#include "llvm/MC/MCStreamer.h"
|
||||
#include "llvm/MC/MCSubtargetInfo.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/TargetRegistry.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/Target/TargetData.h"
|
||||
#include "llvm/Target/TargetInstrInfo.h"
|
||||
#include "llvm/Target/TargetLowering.h"
|
||||
#include "llvm/Target/TargetLoweringObjectFile.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
#include "llvm/Target/TargetOptions.h"
|
||||
#include "llvm/Target/TargetRegisterInfo.h"
|
||||
#include "llvm/Target/TargetSubtargetInfo.h"
|
||||
#include "llvm/Transforms/Scalar.h"
|
||||
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace llvm {
|
||||
MCStreamer *createPTXAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
|
||||
bool isVerboseAsm, bool useLoc,
|
||||
bool useCFI, bool useDwarfDirectory,
|
||||
MCInstPrinter *InstPrint,
|
||||
MCCodeEmitter *CE,
|
||||
MCAsmBackend *MAB,
|
||||
bool ShowInst);
|
||||
}
|
||||
|
||||
extern "C" void LLVMInitializePTXTarget() {
|
||||
|
||||
RegisterTargetMachine<PTX32TargetMachine> X(ThePTX32Target);
|
||||
RegisterTargetMachine<PTX64TargetMachine> Y(ThePTX64Target);
|
||||
|
||||
TargetRegistry::RegisterAsmStreamer(ThePTX32Target, createPTXAsmStreamer);
|
||||
TargetRegistry::RegisterAsmStreamer(ThePTX64Target, createPTXAsmStreamer);
|
||||
}
|
||||
|
||||
namespace {
|
||||
const char* DataLayout32 =
|
||||
"e-p:32:32-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64";
|
||||
const char* DataLayout64 =
|
||||
"e-p:64:64-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64";
|
||||
}
|
||||
|
||||
// DataLayout and FrameLowering are filled with dummy data
|
||||
PTXTargetMachine::PTXTargetMachine(const Target &T,
|
||||
StringRef TT, StringRef CPU, StringRef FS,
|
||||
const TargetOptions &Options,
|
||||
Reloc::Model RM, CodeModel::Model CM,
|
||||
CodeGenOpt::Level OL,
|
||||
bool is64Bit)
|
||||
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
|
||||
DataLayout(is64Bit ? DataLayout64 : DataLayout32),
|
||||
Subtarget(TT, CPU, FS, is64Bit),
|
||||
FrameLowering(Subtarget),
|
||||
InstrInfo(*this),
|
||||
TSInfo(*this),
|
||||
TLInfo(*this) {
|
||||
}
|
||||
|
||||
void PTX32TargetMachine::anchor() { }
|
||||
|
||||
PTX32TargetMachine::PTX32TargetMachine(const Target &T, StringRef TT,
|
||||
StringRef CPU, StringRef FS,
|
||||
const TargetOptions &Options,
|
||||
Reloc::Model RM, CodeModel::Model CM,
|
||||
CodeGenOpt::Level OL)
|
||||
: PTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {
|
||||
}
|
||||
|
||||
void PTX64TargetMachine::anchor() { }
|
||||
|
||||
PTX64TargetMachine::PTX64TargetMachine(const Target &T, StringRef TT,
|
||||
StringRef CPU, StringRef FS,
|
||||
const TargetOptions &Options,
|
||||
Reloc::Model RM, CodeModel::Model CM,
|
||||
CodeGenOpt::Level OL)
|
||||
: PTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {
|
||||
}
|
||||
|
||||
namespace llvm {
|
||||
/// PTX Code Generator Pass Configuration Options.
|
||||
class PTXPassConfig : public TargetPassConfig {
|
||||
public:
|
||||
PTXPassConfig(PTXTargetMachine *TM, PassManagerBase &PM)
|
||||
: TargetPassConfig(TM, PM) {}
|
||||
|
||||
PTXTargetMachine &getPTXTargetMachine() const {
|
||||
return getTM<PTXTargetMachine>();
|
||||
}
|
||||
|
||||
bool addInstSelector();
|
||||
FunctionPass *createTargetRegisterAllocator(bool);
|
||||
void addOptimizedRegAlloc(FunctionPass *RegAllocPass);
|
||||
bool addPostRegAlloc();
|
||||
void addMachineLateOptimization();
|
||||
bool addPreEmitPass();
|
||||
};
|
||||
} // namespace
|
||||
|
||||
TargetPassConfig *PTXTargetMachine::createPassConfig(PassManagerBase &PM) {
|
||||
PTXPassConfig *PassConfig = new PTXPassConfig(this, PM);
|
||||
PassConfig->disablePass(PrologEpilogCodeInserterID);
|
||||
return PassConfig;
|
||||
}
|
||||
|
||||
bool PTXPassConfig::addInstSelector() {
|
||||
PM->add(createPTXISelDag(getPTXTargetMachine(), getOptLevel()));
|
||||
return false;
|
||||
}
|
||||
|
||||
FunctionPass *PTXPassConfig::createTargetRegisterAllocator(bool /*Optimized*/) {
|
||||
return createPTXRegisterAllocator();
|
||||
}
|
||||
|
||||
// Modify the optimized compilation path to bypass optimized register alloction.
|
||||
void PTXPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
|
||||
addFastRegAlloc(RegAllocPass);
|
||||
}
|
||||
|
||||
bool PTXPassConfig::addPostRegAlloc() {
|
||||
// PTXMFInfoExtract must after register allocation!
|
||||
//PM->add(createPTXMFInfoExtract(getPTXTargetMachine()));
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Add passes that optimize machine instructions after register allocation.
|
||||
void PTXPassConfig::addMachineLateOptimization() {
|
||||
if (addPass(BranchFolderPassID) != &NoPassID)
|
||||
printAndVerify("After BranchFolding");
|
||||
|
||||
if (addPass(TailDuplicateID) != &NoPassID)
|
||||
printAndVerify("After TailDuplicate");
|
||||
}
|
||||
|
||||
bool PTXPassConfig::addPreEmitPass() {
|
||||
PM->add(createPTXMFInfoExtract(getPTXTargetMachine(), getOptLevel()));
|
||||
PM->add(createPTXFPRoundingModePass(getPTXTargetMachine(), getOptLevel()));
|
||||
return true;
|
||||
}
|
@ -1,104 +0,0 @@
|
||||
//===-- PTXTargetMachine.h - Define TargetMachine for PTX -------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file declares the PTX specific subclass of TargetMachine.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef PTX_TARGET_MACHINE_H
|
||||
#define PTX_TARGET_MACHINE_H
|
||||
|
||||
#include "PTXISelLowering.h"
|
||||
#include "PTXInstrInfo.h"
|
||||
#include "PTXFrameLowering.h"
|
||||
#include "PTXSelectionDAGInfo.h"
|
||||
#include "PTXSubtarget.h"
|
||||
#include "llvm/Target/TargetData.h"
|
||||
#include "llvm/Target/TargetFrameLowering.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
|
||||
namespace llvm {
|
||||
class PTXTargetMachine : public LLVMTargetMachine {
|
||||
private:
|
||||
const TargetData DataLayout;
|
||||
PTXSubtarget Subtarget; // has to be initialized before FrameLowering
|
||||
PTXFrameLowering FrameLowering;
|
||||
PTXInstrInfo InstrInfo;
|
||||
PTXSelectionDAGInfo TSInfo;
|
||||
PTXTargetLowering TLInfo;
|
||||
|
||||
public:
|
||||
PTXTargetMachine(const Target &T, StringRef TT,
|
||||
StringRef CPU, StringRef FS, const TargetOptions &Options,
|
||||
Reloc::Model RM, CodeModel::Model CM,
|
||||
CodeGenOpt::Level OL,
|
||||
bool is64Bit);
|
||||
|
||||
virtual const TargetData *getTargetData() const { return &DataLayout; }
|
||||
|
||||
virtual const TargetFrameLowering *getFrameLowering() const {
|
||||
return &FrameLowering;
|
||||
}
|
||||
|
||||
virtual const PTXInstrInfo *getInstrInfo() const { return &InstrInfo; }
|
||||
virtual const TargetRegisterInfo *getRegisterInfo() const {
|
||||
return &InstrInfo.getRegisterInfo(); }
|
||||
|
||||
virtual const PTXTargetLowering *getTargetLowering() const {
|
||||
return &TLInfo; }
|
||||
|
||||
virtual const PTXSelectionDAGInfo* getSelectionDAGInfo() const {
|
||||
return &TSInfo;
|
||||
}
|
||||
|
||||
virtual const PTXSubtarget *getSubtargetImpl() const { return &Subtarget; }
|
||||
|
||||
// Emission of machine code through JITCodeEmitter is not supported.
|
||||
virtual bool addPassesToEmitMachineCode(PassManagerBase &,
|
||||
JITCodeEmitter &,
|
||||
bool = true) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Emission of machine code through MCJIT is not supported.
|
||||
virtual bool addPassesToEmitMC(PassManagerBase &,
|
||||
MCContext *&,
|
||||
raw_ostream &,
|
||||
bool = true) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Pass Pipeline Configuration
|
||||
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
|
||||
}; // class PTXTargetMachine
|
||||
|
||||
|
||||
class PTX32TargetMachine : public PTXTargetMachine {
|
||||
virtual void anchor();
|
||||
public:
|
||||
|
||||
PTX32TargetMachine(const Target &T, StringRef TT,
|
||||
StringRef CPU, StringRef FS, const TargetOptions &Options,
|
||||
Reloc::Model RM, CodeModel::Model CM,
|
||||
CodeGenOpt::Level OL);
|
||||
}; // class PTX32TargetMachine
|
||||
|
||||
class PTX64TargetMachine : public PTXTargetMachine {
|
||||
virtual void anchor();
|
||||
public:
|
||||
|
||||
PTX64TargetMachine(const Target &T, StringRef TT,
|
||||
StringRef CPU, StringRef FS, const TargetOptions &Options,
|
||||
Reloc::Model RM, CodeModel::Model CM,
|
||||
CodeGenOpt::Level OL);
|
||||
}; // class PTX32TargetMachine
|
||||
|
||||
} // namespace llvm
|
||||
|
||||
#endif // PTX_TARGET_MACHINE_H
|
@ -1,7 +0,0 @@
|
||||
include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
|
||||
|
||||
add_llvm_library(LLVMPTXInfo
|
||||
PTXTargetInfo.cpp
|
||||
)
|
||||
|
||||
add_dependencies(LLVMPTXInfo PTXCommonTableGen)
|
@ -1,23 +0,0 @@
|
||||
;===- ./lib/Target/PTX/TargetInfo/LLVMBuild.txt ----------------*- Conf -*--===;
|
||||
;
|
||||
; The LLVM Compiler Infrastructure
|
||||
;
|
||||
; This file is distributed under the University of Illinois Open Source
|
||||
; License. See LICENSE.TXT for details.
|
||||
;
|
||||
;===------------------------------------------------------------------------===;
|
||||
;
|
||||
; This is an LLVMBuild description file for the components in this subdirectory.
|
||||
;
|
||||
; For more information on the LLVMBuild system, please see:
|
||||
;
|
||||
; http://llvm.org/docs/LLVMBuild.html
|
||||
;
|
||||
;===------------------------------------------------------------------------===;
|
||||
|
||||
[component_0]
|
||||
type = Library
|
||||
name = PTXInfo
|
||||
parent = PTX
|
||||
required_libraries = MC Support Target
|
||||
add_to_library_groups = PTX
|
@ -1,15 +0,0 @@
|
||||
##===- lib/Target/PTX/TargetInfo/Makefile ------------------*- Makefile -*-===##
|
||||
#
|
||||
# The LLVM Compiler Infrastructure
|
||||
#
|
||||
# This file is distributed under the University of Illinois Open Source
|
||||
# License. See LICENSE.TXT for details.
|
||||
#
|
||||
##===----------------------------------------------------------------------===##
|
||||
LEVEL = ../../../..
|
||||
LIBRARYNAME = LLVMPTXInfo
|
||||
|
||||
# Hack: we need to include 'main' target directory to grab private headers
|
||||
CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
|
||||
|
||||
include $(LEVEL)/Makefile.common
|
@ -1,25 +0,0 @@
|
||||
//===-- PTXTargetInfo.cpp - PTX Target Implementation ---------------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "PTX.h"
|
||||
#include "llvm/Module.h"
|
||||
#include "llvm/Support/TargetRegistry.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
Target llvm::ThePTX32Target;
|
||||
Target llvm::ThePTX64Target;
|
||||
|
||||
extern "C" void LLVMInitializePTXTargetInfo() {
|
||||
// see llvm/ADT/Triple.h
|
||||
RegisterTarget<Triple::ptx32> X32(ThePTX32Target, "ptx32",
|
||||
"PTX (32-bit) [Experimental]");
|
||||
RegisterTarget<Triple::ptx64> X64(ThePTX64Target, "ptx64",
|
||||
"PTX (64-bit) [Experimental]");
|
||||
}
|
@ -309,7 +309,6 @@ AC_CACHE_CHECK([target architecture],[llvm_cv_target_arch],
|
||||
msp430-*) llvm_cv_target_arch="MSP430" ;;
|
||||
hexagon-*) llvm_cv_target_arch="Hexagon" ;;
|
||||
mblaze-*) llvm_cv_target_arch="MBlaze" ;;
|
||||
ptx-*) llvm_cv_target_arch="PTX" ;;
|
||||
nvptx-*) llvm_cv_target_arch="NVPTX" ;;
|
||||
*) llvm_cv_target_arch="Unknown" ;;
|
||||
esac])
|
||||
@ -457,7 +456,6 @@ else
|
||||
MSP430) AC_SUBST(TARGET_HAS_JIT,0) ;;
|
||||
Hexagon) AC_SUBST(TARGET_HAS_JIT,0) ;;
|
||||
MBlaze) AC_SUBST(TARGET_HAS_JIT,0) ;;
|
||||
PTX) AC_SUBST(TARGET_HAS_JIT,0) ;;
|
||||
NVPTX) AC_SUBST(TARGET_HAS_JIT,0) ;;
|
||||
*) AC_SUBST(TARGET_HAS_JIT,0) ;;
|
||||
esac
|
||||
@ -569,13 +567,13 @@ TARGETS_TO_BUILD=""
|
||||
AC_ARG_ENABLE([targets],AS_HELP_STRING([--enable-targets],
|
||||
[Build specific host targets: all or target1,target2,... Valid targets are:
|
||||
host, x86, x86_64, sparc, powerpc, arm, mips, spu, hexagon,
|
||||
xcore, msp430, ptx, nvptx, cbe, and cpp (default=all)]),,
|
||||
xcore, msp430, nvptx, cbe, and cpp (default=all)]),,
|
||||
enableval=all)
|
||||
if test "$enableval" = host-only ; then
|
||||
enableval=host
|
||||
fi
|
||||
case "$enableval" in
|
||||
all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 Hexagon CppBackend MBlaze PTX NVPTX" ;;
|
||||
all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 Hexagon CppBackend MBlaze NVPTX" ;;
|
||||
*)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
|
||||
case "$a_target" in
|
||||
x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
|
||||
@ -590,7 +588,6 @@ case "$enableval" in
|
||||
hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
|
||||
cpp) TARGETS_TO_BUILD="CppBackend $TARGETS_TO_BUILD" ;;
|
||||
mblaze) TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
|
||||
ptx) TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
|
||||
nvptx) TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
|
||||
host) case "$llvm_cv_target_arch" in
|
||||
x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
|
||||
@ -604,7 +601,6 @@ case "$enableval" in
|
||||
XCore) TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
|
||||
MSP430) TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
|
||||
Hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
|
||||
PTX) TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
|
||||
NVPTX) TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
|
||||
*) AC_MSG_ERROR([Can not set target to build]) ;;
|
||||
esac ;;
|
||||
|
12
projects/sample/configure
vendored
12
projects/sample/configure
vendored
@ -1402,8 +1402,7 @@ Optional Features:
|
||||
--enable-targets Build specific host targets: all or
|
||||
target1,target2,... Valid targets are: host, x86,
|
||||
x86_64, sparc, powerpc, arm, mips, spu, hexagon,
|
||||
xcore, msp430, ptx, nvptx, cbe, and cpp
|
||||
(default=all)
|
||||
xcore, msp430, nvptx, cbe, and cpp (default=all)
|
||||
--enable-bindings Build specific language bindings:
|
||||
all,auto,none,{binding-name} (default=auto)
|
||||
--enable-libffi Check for the presence of libffi (default is NO)
|
||||
@ -3846,7 +3845,6 @@ else
|
||||
msp430-*) llvm_cv_target_arch="MSP430" ;;
|
||||
hexagon-*) llvm_cv_target_arch="Hexagon" ;;
|
||||
mblaze-*) llvm_cv_target_arch="MBlaze" ;;
|
||||
ptx-*) llvm_cv_target_arch="PTX" ;;
|
||||
nvptx-*) llvm_cv_target_arch="NVPTX" ;;
|
||||
*) llvm_cv_target_arch="Unknown" ;;
|
||||
esac
|
||||
@ -5069,8 +5067,6 @@ else
|
||||
Hexagon) TARGET_HAS_JIT=0
|
||||
;;
|
||||
MBlaze) TARGET_HAS_JIT=0
|
||||
;;
|
||||
PTX) TARGET_HAS_JIT=0
|
||||
;;
|
||||
NVPTX) TARGET_HAS_JIT=0
|
||||
;;
|
||||
@ -5258,7 +5254,7 @@ if test "$enableval" = host-only ; then
|
||||
enableval=host
|
||||
fi
|
||||
case "$enableval" in
|
||||
all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 Hexagon CppBackend MBlaze PTX NVPTX" ;;
|
||||
all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 Hexagon CppBackend MBlaze NVPTX" ;;
|
||||
*)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
|
||||
case "$a_target" in
|
||||
x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
|
||||
@ -5273,7 +5269,6 @@ case "$enableval" in
|
||||
hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
|
||||
cpp) TARGETS_TO_BUILD="CppBackend $TARGETS_TO_BUILD" ;;
|
||||
mblaze) TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
|
||||
ptx) TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
|
||||
nvptx) TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
|
||||
host) case "$llvm_cv_target_arch" in
|
||||
x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
|
||||
@ -5287,7 +5282,6 @@ case "$enableval" in
|
||||
XCore) TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
|
||||
MSP430) TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
|
||||
Hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
|
||||
PTX) TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
|
||||
NVPTX) TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
|
||||
*) { { echo "$as_me:$LINENO: error: Can not set target to build" >&5
|
||||
echo "$as_me: error: Can not set target to build" >&2;}
|
||||
@ -10313,7 +10307,7 @@ else
|
||||
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
|
||||
lt_status=$lt_dlunknown
|
||||
cat > conftest.$ac_ext <<EOF
|
||||
#line 10316 "configure"
|
||||
#line 10310 "configure"
|
||||
#include "confdefs.h"
|
||||
|
||||
#if HAVE_DLFCN_H
|
||||
|
@ -1,24 +0,0 @@
|
||||
; RUN: llc < %s -march=ptx32 | FileCheck %s
|
||||
|
||||
@A = common global [1536 x [1536 x float]] zeroinitializer, align 4
|
||||
@B = common global [1536 x [1536 x float]] zeroinitializer, align 4
|
||||
|
||||
define internal ptx_device void @init_array(i32 %x, i32 %y) {
|
||||
%arrayidx103 = getelementptr [1536 x [1536 x float]]* @A, i32 0, i32 %x, i32 %y
|
||||
%arrayidx224 = getelementptr [1536 x [1536 x float]]* @B, i32 0, i32 %x, i32 %y
|
||||
%mul5 = mul i32 %x, %y
|
||||
%rem = srem i32 %mul5, 1024
|
||||
%add = add nsw i32 %rem, 1
|
||||
; CHECK: cvt.rn.f64.s32 %fd{{[0-9]+}}, %r{{[0-9]+}}
|
||||
%conv = sitofp i32 %add to double
|
||||
%div = fmul double %conv, 5.000000e-01
|
||||
%conv7 = fptrunc double %div to float
|
||||
store float %conv7, float* %arrayidx103, align 4
|
||||
%rem14 = srem i32 %mul5, 1024
|
||||
%add15 = add nsw i32 %rem14, 1
|
||||
%conv16 = sitofp i32 %add15 to double
|
||||
%div17 = fmul double %conv16, 5.000000e-01
|
||||
%conv18 = fptrunc double %div17 to float
|
||||
store float %conv18, float* %arrayidx224, align 4
|
||||
ret void
|
||||
}
|
@ -1,71 +0,0 @@
|
||||
; RUN: llc < %s -march=ptx32 | FileCheck %s
|
||||
|
||||
define ptx_device i16 @t1_u16(i16 %x, i16 %y) {
|
||||
; CHECK: add.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}}, %rh{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%z = add i16 %x, %y
|
||||
ret i16 %z
|
||||
}
|
||||
|
||||
define ptx_device i32 @t1_u32(i32 %x, i32 %y) {
|
||||
; CHECK: add.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%z = add i32 %x, %y
|
||||
ret i32 %z
|
||||
}
|
||||
|
||||
define ptx_device i64 @t1_u64(i64 %x, i64 %y) {
|
||||
; CHECK: add.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}}, %rd{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%z = add i64 %x, %y
|
||||
ret i64 %z
|
||||
}
|
||||
|
||||
define ptx_device float @t1_f32(float %x, float %y) {
|
||||
; CHECK: add.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
|
||||
; CHECK: ret;
|
||||
%z = fadd float %x, %y
|
||||
ret float %z
|
||||
}
|
||||
|
||||
define ptx_device double @t1_f64(double %x, double %y) {
|
||||
; CHECK: add.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}
|
||||
; CHECK: ret;
|
||||
%z = fadd double %x, %y
|
||||
ret double %z
|
||||
}
|
||||
|
||||
define ptx_device i16 @t2_u16(i16 %x) {
|
||||
; CHECK: add.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}}, 1;
|
||||
; CHECK: ret;
|
||||
%z = add i16 %x, 1
|
||||
ret i16 %z
|
||||
}
|
||||
|
||||
define ptx_device i32 @t2_u32(i32 %x) {
|
||||
; CHECK: add.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, 1;
|
||||
; CHECK: ret;
|
||||
%z = add i32 %x, 1
|
||||
ret i32 %z
|
||||
}
|
||||
|
||||
define ptx_device i64 @t2_u64(i64 %x) {
|
||||
; CHECK: add.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}}, 1;
|
||||
; CHECK: ret;
|
||||
%z = add i64 %x, 1
|
||||
ret i64 %z
|
||||
}
|
||||
|
||||
define ptx_device float @t2_f32(float %x) {
|
||||
; CHECK: add.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, 0D3FF0000000000000;
|
||||
; CHECK: ret;
|
||||
%z = fadd float %x, 1.0
|
||||
ret float %z
|
||||
}
|
||||
|
||||
define ptx_device double @t2_f64(double %x) {
|
||||
; CHECK: add.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, 0D3FF0000000000000;
|
||||
; CHECK: ret;
|
||||
%z = fadd double %x, 1.0
|
||||
ret double %z
|
||||
}
|
@ -1,24 +0,0 @@
|
||||
; RUN: llc < %s -march=ptx32 -mattr=sm20 | FileCheck %s
|
||||
; XFAIL: *
|
||||
|
||||
%complex = type { float, float }
|
||||
|
||||
define ptx_device %complex @complex_add(%complex %a, %complex %b) {
|
||||
entry:
|
||||
; CHECK: ld.param.f32 r[[R0:[0-9]+]], [__param_1];
|
||||
; CHECK-NEXT: ld.param.f32 r[[R2:[0-9]+]], [__param_3];
|
||||
; CHECK-NEXT: ld.param.f32 r[[R1:[0-9]+]], [__param_2];
|
||||
; CHECK-NEXT: ld.param.f32 r[[R3:[0-9]+]], [__param_4];
|
||||
; CHECK-NEXT: add.rn.f32 r[[R0]], r[[R0]], r[[R2]];
|
||||
; CHECK-NEXT: add.rn.f32 r[[R1]], r[[R1]], r[[R3]];
|
||||
; CHECK-NEXT: ret;
|
||||
%a.real = extractvalue %complex %a, 0
|
||||
%a.imag = extractvalue %complex %a, 1
|
||||
%b.real = extractvalue %complex %b, 0
|
||||
%b.imag = extractvalue %complex %b, 1
|
||||
%ret.real = fadd float %a.real, %b.real
|
||||
%ret.imag = fadd float %a.imag, %b.imag
|
||||
%ret.0 = insertvalue %complex undef, float %ret.real, 0
|
||||
%ret.1 = insertvalue %complex %ret.0, float %ret.imag, 1
|
||||
ret %complex %ret.1
|
||||
}
|
@ -1,24 +0,0 @@
|
||||
; RUN: llc < %s -march=ptx32 | FileCheck %s
|
||||
|
||||
; preds
|
||||
|
||||
define ptx_device i32 @t1_and_preds(i1 %x, i1 %y) {
|
||||
; CHECK: and.pred %p{{[0-9]+}}, %p{{[0-9]+}}, %p{{[0-9]+}}
|
||||
%c = and i1 %x, %y
|
||||
%d = zext i1 %c to i32
|
||||
ret i32 %d
|
||||
}
|
||||
|
||||
define ptx_device i32 @t1_or_preds(i1 %x, i1 %y) {
|
||||
; CHECK: or.pred %p{{[0-9]+}}, %p{{[0-9]+}}, %p{{[0-9]+}}
|
||||
%a = or i1 %x, %y
|
||||
%b = zext i1 %a to i32
|
||||
ret i32 %b
|
||||
}
|
||||
|
||||
define ptx_device i32 @t1_xor_preds(i1 %x, i1 %y) {
|
||||
; CHECK: xor.pred %p{{[0-9]+}}, %p{{[0-9]+}}, %p{{[0-9]+}}
|
||||
%a = xor i1 %x, %y
|
||||
%b = zext i1 %a to i32
|
||||
ret i32 %b
|
||||
}
|
@ -1,24 +0,0 @@
|
||||
; RUN: llc < %s -march=ptx32 | FileCheck %s
|
||||
|
||||
define ptx_device void @test_bra_direct() {
|
||||
; CHECK: bra $L__BB0_1;
|
||||
entry:
|
||||
br label %loop
|
||||
loop:
|
||||
br label %loop
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_bra_cond_direct(i32 %x, i32 %y) {
|
||||
entry:
|
||||
; CHECK: setp.le.u32 %p0, %r[[R0:[0-9]+]], %r[[R1:[0-9]+]]
|
||||
%p = icmp ugt i32 %x, %y
|
||||
; CHECK-NEXT: @%p0 bra
|
||||
; CHECK-NOT: bra
|
||||
br i1 %p, label %clause.if, label %clause.else
|
||||
clause.if:
|
||||
; CHECK: mov.u32 %ret{{[0-9]+}}, %r[[R0]]
|
||||
ret i32 %x
|
||||
clause.else:
|
||||
; CHECK: mov.u32 %ret{{[0-9]+}}, %r[[R1]]
|
||||
ret i32 %y
|
||||
}
|
@ -1,290 +0,0 @@
|
||||
; RUN: llc < %s -march=ptx32 | FileCheck %s
|
||||
|
||||
; preds
|
||||
; (note: we convert back to i32 to return)
|
||||
|
||||
define ptx_device i32 @cvt_pred_i16(i16 %x, i1 %y) {
|
||||
; CHECK: setp.gt.u16 %p[[P0:[0-9]+]], %rh{{[0-9]+}}, 0
|
||||
; CHECK: and.pred %p2, %p[[P0:[0-9]+]], %p{{[0-9]+}};
|
||||
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0:[0-9]+]];
|
||||
; CHECK: ret;
|
||||
%a = trunc i16 %x to i1
|
||||
%b = and i1 %a, %y
|
||||
%c = zext i1 %b to i32
|
||||
ret i32 %c
|
||||
}
|
||||
|
||||
define ptx_device i32 @cvt_pred_i32(i32 %x, i1 %y) {
|
||||
; CHECK: setp.gt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 0
|
||||
; CHECK: and.pred %p2, %p[[P0:[0-9]+]], %p{{[0-9]+}};
|
||||
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0:[0-9]+]];
|
||||
; CHECK: ret;
|
||||
%a = trunc i32 %x to i1
|
||||
%b = and i1 %a, %y
|
||||
%c = zext i1 %b to i32
|
||||
ret i32 %c
|
||||
}
|
||||
|
||||
define ptx_device i32 @cvt_pred_i64(i64 %x, i1 %y) {
|
||||
; CHECK: setp.gt.u64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, 0
|
||||
; CHECK: and.pred %p2, %p[[P0:[0-9]+]], %p{{[0-9]+}};
|
||||
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0:[0-9]+]];
|
||||
; CHECK: ret;
|
||||
%a = trunc i64 %x to i1
|
||||
%b = and i1 %a, %y
|
||||
%c = zext i1 %b to i32
|
||||
ret i32 %c
|
||||
}
|
||||
|
||||
define ptx_device i32 @cvt_pred_f32(float %x, i1 %y) {
|
||||
; CHECK: setp.gt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 0
|
||||
; CHECK: and.pred %p2, %p[[P0:[0-9]+]], %p{{[0-9]+}};
|
||||
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0:[0-9]+]];
|
||||
; CHECK: ret;
|
||||
%a = fptoui float %x to i1
|
||||
%b = and i1 %a, %y
|
||||
%c = zext i1 %b to i32
|
||||
ret i32 %c
|
||||
}
|
||||
|
||||
define ptx_device i32 @cvt_pred_f64(double %x, i1 %y) {
|
||||
; CHECK: setp.gt.u64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, 0
|
||||
; CHECK: and.pred %p2, %p[[P0:[0-9]+]], %p{{[0-9]+}};
|
||||
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0:[0-9]+]];
|
||||
; CHECK: ret;
|
||||
%a = fptoui double %x to i1
|
||||
%b = and i1 %a, %y
|
||||
%c = zext i1 %b to i32
|
||||
ret i32 %c
|
||||
}
|
||||
|
||||
; i16
|
||||
|
||||
define ptx_device i16 @cvt_i16_preds(i1 %x) {
|
||||
; CHECK: selp.u16 %ret{{[0-9]+}}, 1, 0, %p{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%a = zext i1 %x to i16
|
||||
ret i16 %a
|
||||
}
|
||||
|
||||
define ptx_device i16 @cvt_i16_i32(i32 %x) {
|
||||
; CHECK: cvt.u16.u32 %ret{{[0-9]+}}, %r{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%a = trunc i32 %x to i16
|
||||
ret i16 %a
|
||||
}
|
||||
|
||||
define ptx_device i16 @cvt_i16_i64(i64 %x) {
|
||||
; CHECK: cvt.u16.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%a = trunc i64 %x to i16
|
||||
ret i16 %a
|
||||
}
|
||||
|
||||
define ptx_device i16 @cvt_i16_f32(float %x) {
|
||||
; CHECK: cvt.rzi.u16.f32 %ret{{[0-9]+}}, %f{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%a = fptoui float %x to i16
|
||||
ret i16 %a
|
||||
}
|
||||
|
||||
define ptx_device i16 @cvt_i16_f64(double %x) {
|
||||
; CHECK: cvt.rzi.u16.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%a = fptoui double %x to i16
|
||||
ret i16 %a
|
||||
}
|
||||
|
||||
; i32
|
||||
|
||||
define ptx_device i32 @cvt_i32_preds(i1 %x) {
|
||||
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%a = zext i1 %x to i32
|
||||
ret i32 %a
|
||||
}
|
||||
|
||||
define ptx_device i32 @cvt_i32_i16(i16 %x) {
|
||||
; CHECK: cvt.u32.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%a = zext i16 %x to i32
|
||||
ret i32 %a
|
||||
}
|
||||
|
||||
define ptx_device i32 @cvt_i32_i64(i64 %x) {
|
||||
; CHECK: cvt.u32.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%a = trunc i64 %x to i32
|
||||
ret i32 %a
|
||||
}
|
||||
|
||||
define ptx_device i32 @cvt_i32_f32(float %x) {
|
||||
; CHECK: cvt.rzi.u32.f32 %ret{{[0-9]+}}, %f{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%a = fptoui float %x to i32
|
||||
ret i32 %a
|
||||
}
|
||||
|
||||
define ptx_device i32 @cvt_i32_f64(double %x) {
|
||||
; CHECK: cvt.rzi.u32.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%a = fptoui double %x to i32
|
||||
ret i32 %a
|
||||
}
|
||||
|
||||
; i64
|
||||
|
||||
define ptx_device i64 @cvt_i64_preds(i1 %x) {
|
||||
; CHECK: selp.u64 %ret{{[0-9]+}}, 1, 0, %p{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%a = zext i1 %x to i64
|
||||
ret i64 %a
|
||||
}
|
||||
|
||||
define ptx_device i64 @cvt_i64_i16(i16 %x) {
|
||||
; CHECK: cvt.u64.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%a = zext i16 %x to i64
|
||||
ret i64 %a
|
||||
}
|
||||
|
||||
define ptx_device i64 @cvt_i64_i32(i32 %x) {
|
||||
; CHECK: cvt.u64.u32 %ret{{[0-9]+}}, %r{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%a = zext i32 %x to i64
|
||||
ret i64 %a
|
||||
}
|
||||
|
||||
define ptx_device i64 @cvt_i64_f32(float %x) {
|
||||
; CHECK: cvt.rzi.u64.f32 %ret{{[0-9]+}}, %f{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%a = fptoui float %x to i64
|
||||
ret i64 %a
|
||||
}
|
||||
|
||||
define ptx_device i64 @cvt_i64_f64(double %x) {
|
||||
; CHECK: cvt.rzi.u64.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%a = fptoui double %x to i64
|
||||
ret i64 %a
|
||||
}
|
||||
|
||||
; f32
|
||||
|
||||
define ptx_device float @cvt_f32_preds(i1 %x) {
|
||||
; CHECK: mov.b32 %f0, 0;
|
||||
; CHECK: mov.b32 %f1, 1065353216;
|
||||
; CHECK: selp.f32 %ret{{[0-9]+}}, %f1, %f0, %p{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%a = uitofp i1 %x to float
|
||||
ret float %a
|
||||
}
|
||||
|
||||
define ptx_device float @cvt_f32_i16(i16 %x) {
|
||||
; CHECK: cvt.rn.f32.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%a = uitofp i16 %x to float
|
||||
ret float %a
|
||||
}
|
||||
|
||||
define ptx_device float @cvt_f32_i32(i32 %x) {
|
||||
; CHECK: cvt.rn.f32.u32 %ret{{[0-9]+}}, %r{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%a = uitofp i32 %x to float
|
||||
ret float %a
|
||||
}
|
||||
|
||||
define ptx_device float @cvt_f32_i64(i64 %x) {
|
||||
; CHECK: cvt.rn.f32.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%a = uitofp i64 %x to float
|
||||
ret float %a
|
||||
}
|
||||
|
||||
define ptx_device float @cvt_f32_f64(double %x) {
|
||||
; CHECK: cvt.rn.f32.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%a = fptrunc double %x to float
|
||||
ret float %a
|
||||
}
|
||||
|
||||
define ptx_device float @cvt_f32_s16(i16 %x) {
|
||||
; CHECK: cvt.rn.f32.s16 %ret{{[0-9]+}}, %rh{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%a = sitofp i16 %x to float
|
||||
ret float %a
|
||||
}
|
||||
|
||||
define ptx_device float @cvt_f32_s32(i32 %x) {
|
||||
; CHECK: cvt.rn.f32.s32 %ret{{[0-9]+}}, %r{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%a = sitofp i32 %x to float
|
||||
ret float %a
|
||||
}
|
||||
|
||||
define ptx_device float @cvt_f32_s64(i64 %x) {
|
||||
; CHECK: cvt.rn.f32.s64 %ret{{[0-9]+}}, %rd{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%a = sitofp i64 %x to float
|
||||
ret float %a
|
||||
}
|
||||
|
||||
; f64
|
||||
|
||||
define ptx_device double @cvt_f64_preds(i1 %x) {
|
||||
; CHECK: mov.b64 %fd0, 0;
|
||||
; CHECK: mov.b64 %fd1, 4575657221408423936;
|
||||
; CHECK: selp.f64 %ret{{[0-9]+}}, %fd1, %fd0, %p{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%a = uitofp i1 %x to double
|
||||
ret double %a
|
||||
}
|
||||
|
||||
define ptx_device double @cvt_f64_i16(i16 %x) {
|
||||
; CHECK: cvt.rn.f64.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%a = uitofp i16 %x to double
|
||||
ret double %a
|
||||
}
|
||||
|
||||
define ptx_device double @cvt_f64_i32(i32 %x) {
|
||||
; CHECK: cvt.rn.f64.u32 %ret{{[0-9]+}}, %r{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%a = uitofp i32 %x to double
|
||||
ret double %a
|
||||
}
|
||||
|
||||
define ptx_device double @cvt_f64_i64(i64 %x) {
|
||||
; CHECK: cvt.rn.f64.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%a = uitofp i64 %x to double
|
||||
ret double %a
|
||||
}
|
||||
|
||||
define ptx_device double @cvt_f64_f32(float %x) {
|
||||
; CHECK: cvt.f64.f32 %ret{{[0-9]+}}, %f{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%a = fpext float %x to double
|
||||
ret double %a
|
||||
}
|
||||
|
||||
define ptx_device double @cvt_f64_s16(i16 %x) {
|
||||
; CHECK: cvt.rn.f64.s16 %ret{{[0-9]+}}, %rh{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%a = sitofp i16 %x to double
|
||||
ret double %a
|
||||
}
|
||||
|
||||
define ptx_device double @cvt_f64_s32(i32 %x) {
|
||||
; CHECK: cvt.rn.f64.s32 %ret{{[0-9]+}}, %r{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%a = sitofp i32 %x to double
|
||||
ret double %a
|
||||
}
|
||||
|
||||
define ptx_device double @cvt_f64_s64(i64 %x) {
|
||||
; CHECK: cvt.rn.f64.s64 %ret{{[0-9]+}}, %rd{{[0-9]+}}
|
||||
; CHECK: ret
|
||||
%a = sitofp i64 %x to double
|
||||
ret double %a
|
||||
}
|
@ -1,14 +0,0 @@
|
||||
; RUN: llc < %s -march=ptx32 | FileCheck %s
|
||||
|
||||
define ptx_kernel void @t1() {
|
||||
; CHECK: exit;
|
||||
; CHECK-NOT: ret;
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_kernel void @t2(i32* %p, i32 %x) {
|
||||
store i32 %x, i32* %p
|
||||
; CHECK: exit;
|
||||
; CHECK-NOT: ret;
|
||||
ret void
|
||||
}
|
@ -1,15 +0,0 @@
|
||||
; RUN: llc < %s -march=ptx32 -mattr=+sm10 | FileCheck %s
|
||||
|
||||
define ptx_device float @t1_f32(float %x, float %y) {
|
||||
; CHECK: div.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%a = fdiv float %x, %y
|
||||
ret float %a
|
||||
}
|
||||
|
||||
define ptx_device double @t1_f64(double %x, double %y) {
|
||||
; CHECK: div.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%a = fdiv double %x, %y
|
||||
ret double %a
|
||||
}
|
@ -1,15 +0,0 @@
|
||||
; RUN: llc < %s -march=ptx32 -mattr=+sm13 | FileCheck %s
|
||||
|
||||
define ptx_device float @t1_f32(float %x, float %y) {
|
||||
; CHECK: div.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%a = fdiv float %x, %y
|
||||
ret float %a
|
||||
}
|
||||
|
||||
define ptx_device double @t1_f64(double %x, double %y) {
|
||||
; CHECK: div.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%a = fdiv double %x, %y
|
||||
ret double %a
|
||||
}
|
@ -1,15 +0,0 @@
|
||||
; RUN: llc < %s -march=ptx32 | FileCheck %s
|
||||
|
||||
define ptx_device float @t1_f32(float %x) {
|
||||
; CHECK: neg.f32 %ret{{[0-9]+}}, %f{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%y = fsub float -0.000000e+00, %x
|
||||
ret float %y
|
||||
}
|
||||
|
||||
define ptx_device double @t1_f64(double %x) {
|
||||
; CHECK: neg.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%y = fsub double -0.000000e+00, %x
|
||||
ret double %y
|
||||
}
|
@ -1,281 +0,0 @@
|
||||
; RUN: llc < %s -march=ptx32 -mattr=+ptx20 | FileCheck %s
|
||||
|
||||
define ptx_device i32 @test_tid_x() {
|
||||
; CHECK: mov.u32 %ret0, %tid.x;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.tid.x()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_tid_y() {
|
||||
; CHECK: mov.u32 %ret0, %tid.y;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.tid.y()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_tid_z() {
|
||||
; CHECK: mov.u32 %ret0, %tid.z;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.tid.z()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_tid_w() {
|
||||
; CHECK: mov.u32 %ret0, %tid.w;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.tid.w()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_ntid_x() {
|
||||
; CHECK: mov.u32 %ret0, %ntid.x;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.ntid.x()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_ntid_y() {
|
||||
; CHECK: mov.u32 %ret0, %ntid.y;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.ntid.y()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_ntid_z() {
|
||||
; CHECK: mov.u32 %ret0, %ntid.z;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.ntid.z()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_ntid_w() {
|
||||
; CHECK: mov.u32 %ret0, %ntid.w;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.ntid.w()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_laneid() {
|
||||
; CHECK: mov.u32 %ret0, %laneid;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.laneid()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_warpid() {
|
||||
; CHECK: mov.u32 %ret0, %warpid;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.warpid()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_nwarpid() {
|
||||
; CHECK: mov.u32 %ret0, %nwarpid;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.nwarpid()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_ctaid_x() {
|
||||
; CHECK: mov.u32 %ret0, %ctaid.x;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.ctaid.x()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_ctaid_y() {
|
||||
; CHECK: mov.u32 %ret0, %ctaid.y;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.ctaid.y()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_ctaid_z() {
|
||||
; CHECK: mov.u32 %ret0, %ctaid.z;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.ctaid.z()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_ctaid_w() {
|
||||
; CHECK: mov.u32 %ret0, %ctaid.w;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.ctaid.w()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_nctaid_x() {
|
||||
; CHECK: mov.u32 %ret0, %nctaid.x;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.nctaid.x()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_nctaid_y() {
|
||||
; CHECK: mov.u32 %ret0, %nctaid.y;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.nctaid.y()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_nctaid_z() {
|
||||
; CHECK: mov.u32 %ret0, %nctaid.z;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.nctaid.z()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_nctaid_w() {
|
||||
; CHECK: mov.u32 %ret0, %nctaid.w;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.nctaid.w()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_smid() {
|
||||
; CHECK: mov.u32 %ret0, %smid;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.smid()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_nsmid() {
|
||||
; CHECK: mov.u32 %ret0, %nsmid;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.nsmid()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_gridid() {
|
||||
; CHECK: mov.u32 %ret0, %gridid;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.gridid()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_lanemask_eq() {
|
||||
; CHECK: mov.u32 %ret0, %lanemask_eq;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.lanemask.eq()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_lanemask_le() {
|
||||
; CHECK: mov.u32 %ret0, %lanemask_le;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.lanemask.le()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_lanemask_lt() {
|
||||
; CHECK: mov.u32 %ret0, %lanemask_lt;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.lanemask.lt()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_lanemask_ge() {
|
||||
; CHECK: mov.u32 %ret0, %lanemask_ge;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.lanemask.ge()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_lanemask_gt() {
|
||||
; CHECK: mov.u32 %ret0, %lanemask_gt;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.lanemask.gt()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_clock() {
|
||||
; CHECK: mov.u32 %ret0, %clock;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.clock()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i64 @test_clock64() {
|
||||
; CHECK: mov.u64 %ret0, %clock64;
|
||||
; CHECK: ret;
|
||||
%x = call i64 @llvm.ptx.read.clock64()
|
||||
ret i64 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_pm0() {
|
||||
; CHECK: mov.u32 %ret0, %pm0;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.pm0()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_pm1() {
|
||||
; CHECK: mov.u32 %ret0, %pm1;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.pm1()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_pm2() {
|
||||
; CHECK: mov.u32 %ret0, %pm2;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.pm2()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_pm3() {
|
||||
; CHECK: mov.u32 %ret0, %pm3;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.pm3()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device void @test_bar_sync() {
|
||||
; CHECK: bar.sync 0
|
||||
; CHECK: ret;
|
||||
call void @llvm.ptx.bar.sync(i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare i32 @llvm.ptx.read.tid.x()
|
||||
declare i32 @llvm.ptx.read.tid.y()
|
||||
declare i32 @llvm.ptx.read.tid.z()
|
||||
declare i32 @llvm.ptx.read.tid.w()
|
||||
declare i32 @llvm.ptx.read.ntid.x()
|
||||
declare i32 @llvm.ptx.read.ntid.y()
|
||||
declare i32 @llvm.ptx.read.ntid.z()
|
||||
declare i32 @llvm.ptx.read.ntid.w()
|
||||
|
||||
declare i32 @llvm.ptx.read.laneid()
|
||||
declare i32 @llvm.ptx.read.warpid()
|
||||
declare i32 @llvm.ptx.read.nwarpid()
|
||||
|
||||
declare i32 @llvm.ptx.read.ctaid.x()
|
||||
declare i32 @llvm.ptx.read.ctaid.y()
|
||||
declare i32 @llvm.ptx.read.ctaid.z()
|
||||
declare i32 @llvm.ptx.read.ctaid.w()
|
||||
declare i32 @llvm.ptx.read.nctaid.x()
|
||||
declare i32 @llvm.ptx.read.nctaid.y()
|
||||
declare i32 @llvm.ptx.read.nctaid.z()
|
||||
declare i32 @llvm.ptx.read.nctaid.w()
|
||||
|
||||
declare i32 @llvm.ptx.read.smid()
|
||||
declare i32 @llvm.ptx.read.nsmid()
|
||||
declare i32 @llvm.ptx.read.gridid()
|
||||
|
||||
declare i32 @llvm.ptx.read.lanemask.eq()
|
||||
declare i32 @llvm.ptx.read.lanemask.le()
|
||||
declare i32 @llvm.ptx.read.lanemask.lt()
|
||||
declare i32 @llvm.ptx.read.lanemask.ge()
|
||||
declare i32 @llvm.ptx.read.lanemask.gt()
|
||||
|
||||
declare i32 @llvm.ptx.read.clock()
|
||||
declare i64 @llvm.ptx.read.clock64()
|
||||
|
||||
declare i32 @llvm.ptx.read.pm0()
|
||||
declare i32 @llvm.ptx.read.pm1()
|
||||
declare i32 @llvm.ptx.read.pm2()
|
||||
declare i32 @llvm.ptx.read.pm3()
|
||||
|
||||
declare void @llvm.ptx.bar.sync(i32 %i)
|
@ -1,382 +0,0 @@
|
||||
; RUN: llc < %s -march=ptx32 | FileCheck %s
|
||||
|
||||
;CHECK: .extern .global .b16 array_i16[10];
|
||||
@array_i16 = external global [10 x i16]
|
||||
|
||||
;CHECK: .extern .const .b16 array_constant_i16[10];
|
||||
@array_constant_i16 = external addrspace(1) constant [10 x i16]
|
||||
|
||||
;CHECK: .extern .shared .b16 array_shared_i16[10];
|
||||
@array_shared_i16 = external addrspace(4) global [10 x i16]
|
||||
|
||||
;CHECK: .extern .global .b32 array_i32[10];
|
||||
@array_i32 = external global [10 x i32]
|
||||
|
||||
;CHECK: .extern .const .b32 array_constant_i32[10];
|
||||
@array_constant_i32 = external addrspace(1) constant [10 x i32]
|
||||
|
||||
;CHECK: .extern .shared .b32 array_shared_i32[10];
|
||||
@array_shared_i32 = external addrspace(4) global [10 x i32]
|
||||
|
||||
;CHECK: .extern .global .b64 array_i64[10];
|
||||
@array_i64 = external global [10 x i64]
|
||||
|
||||
;CHECK: .extern .const .b64 array_constant_i64[10];
|
||||
@array_constant_i64 = external addrspace(1) constant [10 x i64]
|
||||
|
||||
;CHECK: .extern .shared .b64 array_shared_i64[10];
|
||||
@array_shared_i64 = external addrspace(4) global [10 x i64]
|
||||
|
||||
;CHECK: .extern .global .b32 array_float[10];
|
||||
@array_float = external global [10 x float]
|
||||
|
||||
;CHECK: .extern .const .b32 array_constant_float[10];
|
||||
@array_constant_float = external addrspace(1) constant [10 x float]
|
||||
|
||||
;CHECK: .extern .shared .b32 array_shared_float[10];
|
||||
@array_shared_float = external addrspace(4) global [10 x float]
|
||||
|
||||
;CHECK: .extern .global .b64 array_double[10];
|
||||
@array_double = external global [10 x double]
|
||||
|
||||
;CHECK: .extern .const .b64 array_constant_double[10];
|
||||
@array_constant_double = external addrspace(1) constant [10 x double]
|
||||
|
||||
;CHECK: .extern .shared .b64 array_shared_double[10];
|
||||
@array_shared_double = external addrspace(4) global [10 x double]
|
||||
|
||||
|
||||
define ptx_device i16 @t1_u16(i16* %p) {
|
||||
entry:
|
||||
;CHECK: ld.global.u16 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
|
||||
;CHECK: ret;
|
||||
%x = load i16* %p
|
||||
ret i16 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @t1_u32(i32* %p) {
|
||||
entry:
|
||||
;CHECK: ld.global.u32 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
|
||||
;CHECK: ret;
|
||||
%x = load i32* %p
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i64 @t1_u64(i64* %p) {
|
||||
entry:
|
||||
;CHECK: ld.global.u64 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
|
||||
;CHECK: ret;
|
||||
%x = load i64* %p
|
||||
ret i64 %x
|
||||
}
|
||||
|
||||
define ptx_device float @t1_f32(float* %p) {
|
||||
entry:
|
||||
;CHECK: ld.global.f32 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
|
||||
;CHECK: ret;
|
||||
%x = load float* %p
|
||||
ret float %x
|
||||
}
|
||||
|
||||
define ptx_device double @t1_f64(double* %p) {
|
||||
entry:
|
||||
;CHECK: ld.global.f64 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
|
||||
;CHECK: ret;
|
||||
%x = load double* %p
|
||||
ret double %x
|
||||
}
|
||||
|
||||
define ptx_device i16 @t2_u16(i16* %p) {
|
||||
entry:
|
||||
;CHECK: ld.global.u16 %ret{{[0-9]+}}, [%r{{[0-9]+}}+2];
|
||||
;CHECK: ret;
|
||||
%i = getelementptr i16* %p, i32 1
|
||||
%x = load i16* %i
|
||||
ret i16 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @t2_u32(i32* %p) {
|
||||
entry:
|
||||
;CHECK: ld.global.u32 %ret{{[0-9]+}}, [%r{{[0-9]+}}+4];
|
||||
;CHECK: ret;
|
||||
%i = getelementptr i32* %p, i32 1
|
||||
%x = load i32* %i
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i64 @t2_u64(i64* %p) {
|
||||
entry:
|
||||
;CHECK: ld.global.u64 %ret{{[0-9]+}}, [%r{{[0-9]+}}+8];
|
||||
;CHECK: ret;
|
||||
%i = getelementptr i64* %p, i32 1
|
||||
%x = load i64* %i
|
||||
ret i64 %x
|
||||
}
|
||||
|
||||
define ptx_device float @t2_f32(float* %p) {
|
||||
entry:
|
||||
;CHECK: ld.global.f32 %ret{{[0-9]+}}, [%r{{[0-9]+}}+4];
|
||||
;CHECK: ret;
|
||||
%i = getelementptr float* %p, i32 1
|
||||
%x = load float* %i
|
||||
ret float %x
|
||||
}
|
||||
|
||||
define ptx_device double @t2_f64(double* %p) {
|
||||
entry:
|
||||
;CHECK: ld.global.f64 %ret{{[0-9]+}}, [%r{{[0-9]+}}+8];
|
||||
;CHECK: ret;
|
||||
%i = getelementptr double* %p, i32 1
|
||||
%x = load double* %i
|
||||
ret double %x
|
||||
}
|
||||
|
||||
define ptx_device i16 @t3_u16(i16* %p, i32 %q) {
|
||||
entry:
|
||||
;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 1;
|
||||
;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
|
||||
;CHECK: ld.global.u16 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
|
||||
%i = getelementptr i16* %p, i32 %q
|
||||
%x = load i16* %i
|
||||
ret i16 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @t3_u32(i32* %p, i32 %q) {
|
||||
entry:
|
||||
;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 2;
|
||||
;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
|
||||
;CHECK: ld.global.u32 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
|
||||
%i = getelementptr i32* %p, i32 %q
|
||||
%x = load i32* %i
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i64 @t3_u64(i64* %p, i32 %q) {
|
||||
entry:
|
||||
;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 3;
|
||||
;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
|
||||
;CHECK: ld.global.u64 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
|
||||
%i = getelementptr i64* %p, i32 %q
|
||||
%x = load i64* %i
|
||||
ret i64 %x
|
||||
}
|
||||
|
||||
define ptx_device float @t3_f32(float* %p, i32 %q) {
|
||||
entry:
|
||||
;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 2;
|
||||
;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
|
||||
;CHECK: ld.global.f32 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
|
||||
%i = getelementptr float* %p, i32 %q
|
||||
%x = load float* %i
|
||||
ret float %x
|
||||
}
|
||||
|
||||
define ptx_device double @t3_f64(double* %p, i32 %q) {
|
||||
entry:
|
||||
;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 3;
|
||||
;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
|
||||
;CHECK: ld.global.f64 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
|
||||
%i = getelementptr double* %p, i32 %q
|
||||
%x = load double* %i
|
||||
ret double %x
|
||||
}
|
||||
|
||||
define ptx_device i16 @t4_global_u16() {
|
||||
entry:
|
||||
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i16;
|
||||
;CHECK: ld.global.u16 %ret{{[0-9]+}}, [%r[[R0]]];
|
||||
;CHECK: ret;
|
||||
%i = getelementptr [10 x i16]* @array_i16, i32 0, i32 0
|
||||
%x = load i16* %i
|
||||
ret i16 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @t4_global_u32() {
|
||||
entry:
|
||||
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i32;
|
||||
;CHECK: ld.global.u32 %ret{{[0-9]+}}, [%r[[R0]]];
|
||||
;CHECK: ret;
|
||||
%i = getelementptr [10 x i32]* @array_i32, i32 0, i32 0
|
||||
%x = load i32* %i
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i64 @t4_global_u64() {
|
||||
entry:
|
||||
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i64;
|
||||
;CHECK: ld.global.u64 %ret{{[0-9]+}}, [%r[[R0]]];
|
||||
;CHECK: ret;
|
||||
%i = getelementptr [10 x i64]* @array_i64, i32 0, i32 0
|
||||
%x = load i64* %i
|
||||
ret i64 %x
|
||||
}
|
||||
|
||||
define ptx_device float @t4_global_f32() {
|
||||
entry:
|
||||
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_float;
|
||||
;CHECK: ld.global.f32 %ret{{[0-9]+}}, [%r[[R0]]];
|
||||
;CHECK: ret;
|
||||
%i = getelementptr [10 x float]* @array_float, i32 0, i32 0
|
||||
%x = load float* %i
|
||||
ret float %x
|
||||
}
|
||||
|
||||
define ptx_device double @t4_global_f64() {
|
||||
entry:
|
||||
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_double;
|
||||
;CHECK: ld.global.f64 %ret{{[0-9]+}}, [%r[[R0]]];
|
||||
;CHECK: ret;
|
||||
%i = getelementptr [10 x double]* @array_double, i32 0, i32 0
|
||||
%x = load double* %i
|
||||
ret double %x
|
||||
}
|
||||
|
||||
define ptx_device i16 @t4_const_u16() {
|
||||
entry:
|
||||
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_constant_i16;
|
||||
;CHECK: ld.const.u16 %ret{{[0-9]+}}, [%r[[R0]]];
|
||||
;CHECK: ret;
|
||||
%i = getelementptr [10 x i16] addrspace(1)* @array_constant_i16, i32 0, i32 0
|
||||
%x = load i16 addrspace(1)* %i
|
||||
ret i16 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @t4_const_u32() {
|
||||
entry:
|
||||
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_constant_i32;
|
||||
;CHECK: ld.const.u32 %ret{{[0-9]+}}, [%r[[R0]]];
|
||||
;CHECK: ret;
|
||||
%i = getelementptr [10 x i32] addrspace(1)* @array_constant_i32, i32 0, i32 0
|
||||
%x = load i32 addrspace(1)* %i
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i64 @t4_const_u64() {
|
||||
entry:
|
||||
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_constant_i64;
|
||||
;CHECK: ld.const.u64 %ret{{[0-9]+}}, [%r[[R0]]];
|
||||
;CHECK: ret;
|
||||
%i = getelementptr [10 x i64] addrspace(1)* @array_constant_i64, i32 0, i32 0
|
||||
%x = load i64 addrspace(1)* %i
|
||||
ret i64 %x
|
||||
}
|
||||
|
||||
define ptx_device float @t4_const_f32() {
|
||||
entry:
|
||||
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_constant_float;
|
||||
;CHECK: ld.const.f32 %ret{{[0-9]+}}, [%r[[R0]]];
|
||||
;CHECK: ret;
|
||||
%i = getelementptr [10 x float] addrspace(1)* @array_constant_float, i32 0, i32 0
|
||||
%x = load float addrspace(1)* %i
|
||||
ret float %x
|
||||
}
|
||||
|
||||
define ptx_device double @t4_const_f64() {
|
||||
entry:
|
||||
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_constant_double;
|
||||
;CHECK: ld.const.f64 %ret{{[0-9]+}}, [%r[[R0]]];
|
||||
;CHECK: ret;
|
||||
%i = getelementptr [10 x double] addrspace(1)* @array_constant_double, i32 0, i32 0
|
||||
%x = load double addrspace(1)* %i
|
||||
ret double %x
|
||||
}
|
||||
|
||||
define ptx_device i16 @t4_shared_u16() {
|
||||
entry:
|
||||
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_i16;
|
||||
;CHECK: ld.shared.u16 %ret{{[0-9]+}}, [%r[[R0]]];
|
||||
;CHECK: ret;
|
||||
%i = getelementptr [10 x i16] addrspace(4)* @array_shared_i16, i32 0, i32 0
|
||||
%x = load i16 addrspace(4)* %i
|
||||
ret i16 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @t4_shared_u32() {
|
||||
entry:
|
||||
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_i32;
|
||||
;CHECK: ld.shared.u32 %ret{{[0-9]+}}, [%r[[R0]]];
|
||||
;CHECK: ret;
|
||||
%i = getelementptr [10 x i32] addrspace(4)* @array_shared_i32, i32 0, i32 0
|
||||
%x = load i32 addrspace(4)* %i
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i64 @t4_shared_u64() {
|
||||
entry:
|
||||
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_i64;
|
||||
;CHECK: ld.shared.u64 %ret{{[0-9]+}}, [%r[[R0]]];
|
||||
;CHECK: ret;
|
||||
%i = getelementptr [10 x i64] addrspace(4)* @array_shared_i64, i32 0, i32 0
|
||||
%x = load i64 addrspace(4)* %i
|
||||
ret i64 %x
|
||||
}
|
||||
|
||||
define ptx_device float @t4_shared_f32() {
|
||||
entry:
|
||||
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_float;
|
||||
;CHECK: ld.shared.f32 %ret{{[0-9]+}}, [%r[[R0]]];
|
||||
;CHECK: ret;
|
||||
%i = getelementptr [10 x float] addrspace(4)* @array_shared_float, i32 0, i32 0
|
||||
%x = load float addrspace(4)* %i
|
||||
ret float %x
|
||||
}
|
||||
|
||||
define ptx_device double @t4_shared_f64() {
|
||||
entry:
|
||||
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_double;
|
||||
;CHECK: ld.shared.f64 %ret{{[0-9]+}}, [%r[[R0]]];
|
||||
;CHECK: ret;
|
||||
%i = getelementptr [10 x double] addrspace(4)* @array_shared_double, i32 0, i32 0
|
||||
%x = load double addrspace(4)* %i
|
||||
ret double %x
|
||||
}
|
||||
|
||||
define ptx_device i16 @t5_u16() {
|
||||
entry:
|
||||
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i16;
|
||||
;CHECK: ld.global.u16 %ret{{[0-9]+}}, [%r[[R0]]+2];
|
||||
;CHECK: ret;
|
||||
%i = getelementptr [10 x i16]* @array_i16, i32 0, i32 1
|
||||
%x = load i16* %i
|
||||
ret i16 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @t5_u32() {
|
||||
entry:
|
||||
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i32;
|
||||
;CHECK: ld.global.u32 %ret{{[0-9]+}}, [%r[[R0]]+4];
|
||||
;CHECK: ret;
|
||||
%i = getelementptr [10 x i32]* @array_i32, i32 0, i32 1
|
||||
%x = load i32* %i
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i64 @t5_u64() {
|
||||
entry:
|
||||
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i64;
|
||||
;CHECK: ld.global.u64 %ret{{[0-9]+}}, [%r[[R0]]+8];
|
||||
;CHECK: ret;
|
||||
%i = getelementptr [10 x i64]* @array_i64, i32 0, i32 1
|
||||
%x = load i64* %i
|
||||
ret i64 %x
|
||||
}
|
||||
|
||||
define ptx_device float @t5_f32() {
|
||||
entry:
|
||||
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_float;
|
||||
;CHECK: ld.global.f32 %ret{{[0-9]+}}, [%r[[R0]]+4];
|
||||
;CHECK: ret;
|
||||
%i = getelementptr [10 x float]* @array_float, i32 0, i32 1
|
||||
%x = load float* %i
|
||||
ret float %x
|
||||
}
|
||||
|
||||
define ptx_device double @t5_f64() {
|
||||
entry:
|
||||
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_double;
|
||||
;CHECK: ld.global.f64 %ret{{[0-9]+}}, [%r[[R0]]+8];
|
||||
;CHECK: ret;
|
||||
%i = getelementptr [10 x double]* @array_double, i32 0, i32 1
|
||||
%x = load double* %i
|
||||
ret double %x
|
||||
}
|
@ -1,6 +0,0 @@
|
||||
config.suffixes = ['.ll', '.c', '.cpp']
|
||||
|
||||
targets = set(config.root.targets_to_build.split())
|
||||
if not 'PTX' in targets:
|
||||
config.unsupported = True
|
||||
|
@ -1,56 +0,0 @@
|
||||
; RUN: llc < %s -march=ptx32 -mattr=+ptx20 | FileCheck %s
|
||||
|
||||
define ptx_device float @test_sqrt_f32(float %x) {
|
||||
entry:
|
||||
; CHECK: sqrt.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%y = call float @llvm.sqrt.f32(float %x)
|
||||
ret float %y
|
||||
}
|
||||
|
||||
define ptx_device double @test_sqrt_f64(double %x) {
|
||||
entry:
|
||||
; CHECK: sqrt.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%y = call double @llvm.sqrt.f64(double %x)
|
||||
ret double %y
|
||||
}
|
||||
|
||||
define ptx_device float @test_sin_f32(float %x) {
|
||||
entry:
|
||||
; CHECK: sin.approx.f32 %ret{{[0-9]+}}, %f{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%y = call float @llvm.sin.f32(float %x)
|
||||
ret float %y
|
||||
}
|
||||
|
||||
define ptx_device double @test_sin_f64(double %x) {
|
||||
entry:
|
||||
; CHECK: sin.approx.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%y = call double @llvm.sin.f64(double %x)
|
||||
ret double %y
|
||||
}
|
||||
|
||||
define ptx_device float @test_cos_f32(float %x) {
|
||||
entry:
|
||||
; CHECK: cos.approx.f32 %ret{{[0-9]+}}, %f{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%y = call float @llvm.cos.f32(float %x)
|
||||
ret float %y
|
||||
}
|
||||
|
||||
define ptx_device double @test_cos_f64(double %x) {
|
||||
entry:
|
||||
; CHECK: cos.approx.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%y = call double @llvm.cos.f64(double %x)
|
||||
ret double %y
|
||||
}
|
||||
|
||||
declare float @llvm.sqrt.f32(float)
|
||||
declare double @llvm.sqrt.f64(double)
|
||||
declare float @llvm.sin.f32(float)
|
||||
declare double @llvm.sin.f64(double)
|
||||
declare float @llvm.cos.f32(float)
|
||||
declare double @llvm.cos.f64(double)
|
@ -1,24 +0,0 @@
|
||||
; RUN: llc < %s -march=ptx32 -mattr=+ptx20,+sm20 | FileCheck %s -check-prefix=FMA
|
||||
; RUN: llc < %s -march=ptx32 -mattr=+ptx20,+sm20,+no-fma | FileCheck %s -check-prefix=MUL
|
||||
; RUN: llc < %s -march=ptx64 -mattr=+ptx20,+sm20 | FileCheck %s -check-prefix=FMA
|
||||
; RUN: llc < %s -march=ptx64 -mattr=+ptx20,+sm20,+no-fma | FileCheck %s -check-prefix=MUL
|
||||
|
||||
define ptx_device float @test_mul_add_f(float %x, float %y, float %z) {
|
||||
entry:
|
||||
; FMA: mad.rn.f32
|
||||
; MUL: mul.rn.f32
|
||||
; MUL: add.rn.f32
|
||||
%a = fmul float %x, %y
|
||||
%b = fadd float %a, %z
|
||||
ret float %b
|
||||
}
|
||||
|
||||
define ptx_device double @test_mul_add_d(double %x, double %y, double %z) {
|
||||
entry:
|
||||
; FMA: mad.rn.f64
|
||||
; MUL: mul.rn.f64
|
||||
; MUL: add.rn.f64
|
||||
%a = fmul double %x, %y
|
||||
%b = fadd double %a, %z
|
||||
ret double %b
|
||||
}
|
@ -1,17 +0,0 @@
|
||||
; RUN: llc < %s -march=ptx32 -mattr=+sm13 | FileCheck %s
|
||||
|
||||
define ptx_device float @t1_f32(float %x, float %y, float %z) {
|
||||
; CHECK: mad.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%a = fmul float %x, %y
|
||||
%b = fadd float %a, %z
|
||||
ret float %b
|
||||
}
|
||||
|
||||
define ptx_device double @t1_f64(double %x, double %y, double %z) {
|
||||
; CHECK: mad.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%a = fmul double %x, %y
|
||||
%b = fadd double %a, %z
|
||||
ret double %b
|
||||
}
|
@ -1,62 +0,0 @@
|
||||
; RUN: llc < %s -march=ptx32 | FileCheck %s
|
||||
|
||||
define ptx_device i16 @t1_u16() {
|
||||
; CHECK: mov.u16 %ret{{[0-9]+}}, 0;
|
||||
; CHECK: ret;
|
||||
ret i16 0
|
||||
}
|
||||
|
||||
define ptx_device i32 @t1_u32() {
|
||||
; CHECK: mov.u32 %ret{{[0-9]+}}, 0;
|
||||
; CHECK: ret;
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
define ptx_device i64 @t1_u64() {
|
||||
; CHECK: mov.u64 %ret{{[0-9]+}}, 0;
|
||||
; CHECK: ret;
|
||||
ret i64 0
|
||||
}
|
||||
|
||||
define ptx_device float @t1_f32() {
|
||||
; CHECK: mov.f32 %ret{{[0-9]+}}, 0D0000000000000000;
|
||||
; CHECK: ret;
|
||||
ret float 0.0
|
||||
}
|
||||
|
||||
define ptx_device double @t1_f64() {
|
||||
; CHECK: mov.f64 %ret{{[0-9]+}}, 0D0000000000000000;
|
||||
; CHECK: ret;
|
||||
ret double 0.0
|
||||
}
|
||||
|
||||
define ptx_device i16 @t2_u16(i16 %x) {
|
||||
; CHECK: mov.b16 %ret{{[0-9]+}}, %arg{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
ret i16 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @t2_u32(i32 %x) {
|
||||
; CHECK: mov.b32 %ret{{[0-9]+}}, %arg{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i64 @t2_u64(i64 %x) {
|
||||
; CHECK: mov.b64 %ret{{[0-9]+}}, %arg{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
ret i64 %x
|
||||
}
|
||||
|
||||
define ptx_device float @t3_f32(float %x) {
|
||||
; CHECK: mov.f32 %ret{{[0-9]+}}, %arg{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
ret float %x
|
||||
}
|
||||
|
||||
define ptx_device double @t3_f64(double %x) {
|
||||
; CHECK: mov.f64 %ret{{[0-9]+}}, %arg{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
ret double %x
|
||||
}
|
||||
|
@ -1,39 +0,0 @@
|
||||
; RUN: llc < %s -march=ptx32 | FileCheck %s
|
||||
|
||||
;define ptx_device i32 @t1(i32 %x, i32 %y) {
|
||||
; %z = mul i32 %x, %y
|
||||
; ret i32 %z
|
||||
;}
|
||||
|
||||
;define ptx_device i32 @t2(i32 %x) {
|
||||
; %z = mul i32 %x, 1
|
||||
; ret i32 %z
|
||||
;}
|
||||
|
||||
define ptx_device float @t1_f32(float %x, float %y) {
|
||||
; CHECK: mul.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
|
||||
; CHECK: ret;
|
||||
%z = fmul float %x, %y
|
||||
ret float %z
|
||||
}
|
||||
|
||||
define ptx_device double @t1_f64(double %x, double %y) {
|
||||
; CHECK: mul.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}
|
||||
; CHECK: ret;
|
||||
%z = fmul double %x, %y
|
||||
ret double %z
|
||||
}
|
||||
|
||||
define ptx_device float @t2_f32(float %x) {
|
||||
; CHECK: mul.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, 0D4014000000000000;
|
||||
; CHECK: ret;
|
||||
%z = fmul float %x, 5.0
|
||||
ret float %z
|
||||
}
|
||||
|
||||
define ptx_device double @t2_f64(double %x) {
|
||||
; CHECK: mul.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, 0D4014000000000000;
|
||||
; CHECK: ret;
|
||||
%z = fmul double %x, 5.0
|
||||
ret double %z
|
||||
}
|
@ -1,13 +0,0 @@
|
||||
; RUN: llc < %s -march=ptx32 -mattr=ptx20 | grep ".version 2.0"
|
||||
; RUN: llc < %s -march=ptx32 -mattr=ptx21 | grep ".version 2.1"
|
||||
; RUN: llc < %s -march=ptx32 -mattr=ptx22 | grep ".version 2.2"
|
||||
; RUN: llc < %s -march=ptx32 -mattr=ptx23 | grep ".version 2.3"
|
||||
; RUN: llc < %s -march=ptx32 -mattr=sm10 | grep ".target sm_10"
|
||||
; RUN: llc < %s -march=ptx32 -mattr=sm13 | grep ".target sm_13"
|
||||
; RUN: llc < %s -march=ptx32 -mattr=sm20 | grep ".target sm_20"
|
||||
; RUN: llc < %s -march=ptx32 -mattr=ptx23 | grep ".address_size 32"
|
||||
; RUN: llc < %s -march=ptx64 -mattr=ptx23 | grep ".address_size 64"
|
||||
|
||||
define ptx_device void @t1() {
|
||||
ret void
|
||||
}
|
@ -1,8 +0,0 @@
|
||||
; RUN: llc < %s -march=ptx32 | FileCheck %s
|
||||
|
||||
; CHECK: .func (.reg .b32 %ret{{[0-9]+}}) test_parameter_order (.reg .f32 %arg{{[0-9]+}}, .reg .b32 %arg{{[0-9]+}}, .reg .b32 %arg{{[0-9]+}}, .reg .f32 %arg{{[0-9]+}})
|
||||
define ptx_device i32 @test_parameter_order(float %a, i32 %b, i32 %c, float %d) {
|
||||
; CHECK: sub.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
|
||||
%result = sub i32 %b, %c
|
||||
ret i32 %result
|
||||
}
|
@ -1,25 +0,0 @@
|
||||
; RUN: llc < %s -march=ptx64 -mattr=+ptx20,+sm20 | FileCheck %s
|
||||
|
||||
declare i32 @printf(i8*, ...)
|
||||
|
||||
@str = private unnamed_addr constant [6 x i8] c"test\0A\00"
|
||||
|
||||
define ptx_device void @t1_printf() {
|
||||
; CHECK: mov.u64 %rd{{[0-9]+}}, $L__str;
|
||||
; CHECK: call.uni (__localparam_{{[0-9]+}}), vprintf, (__localparam_{{[0-9]+}}, __localparam_{{[0-9]+}});
|
||||
; CHECK: ret;
|
||||
%1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8]* @str, i64 0, i64 0))
|
||||
ret void
|
||||
}
|
||||
|
||||
@str2 = private unnamed_addr constant [11 x i8] c"test = %f\0A\00"
|
||||
|
||||
define ptx_device void @t2_printf() {
|
||||
; CHECK: .local .align 8 .b8 __local{{[0-9]+}}[{{[0-9]+}}];
|
||||
; CHECK: mov.u64 %rd{{[0-9]+}}, $L__str2;
|
||||
; CHECK: cvta.local.u64 %rd{{[0-9]+}}, __local{{[0-9+]}};
|
||||
; CHECK: call.uni (__localparam_{{[0-9]+}}), vprintf, (__localparam_{{[0-9]+}}, __localparam_{{[0-9]+}});
|
||||
; CHECK: ret;
|
||||
%1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @str2, i64 0, i64 0), double 0x3FF3333340000000)
|
||||
ret void
|
||||
}
|
@ -1,7 +0,0 @@
|
||||
; RUN: llc < %s -march=ptx32 | FileCheck %s
|
||||
|
||||
define ptx_device void @t1() {
|
||||
; CHECK: ret;
|
||||
; CHECK-NOT: exit;
|
||||
ret void
|
||||
}
|
@ -1,25 +0,0 @@
|
||||
; RUN: llc < %s -march=ptx32 | FileCheck %s
|
||||
|
||||
define ptx_device i32 @test_selp_i32(i1 %x, i32 %y, i32 %z) {
|
||||
; CHECK: selp.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %p{{[0-9]+}};
|
||||
%a = select i1 %x, i32 %y, i32 %z
|
||||
ret i32 %a
|
||||
}
|
||||
|
||||
define ptx_device i64 @test_selp_i64(i1 %x, i64 %y, i64 %z) {
|
||||
; CHECK: selp.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}}, %rd{{[0-9]+}}, %p{{[0-9]+}};
|
||||
%a = select i1 %x, i64 %y, i64 %z
|
||||
ret i64 %a
|
||||
}
|
||||
|
||||
define ptx_device float @test_selp_f32(i1 %x, float %y, float %z) {
|
||||
; CHECK: selp.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %p{{[0-9]+}};
|
||||
%a = select i1 %x, float %y, float %z
|
||||
ret float %a
|
||||
}
|
||||
|
||||
define ptx_device double @test_selp_f64(i1 %x, double %y, double %z) {
|
||||
; CHECK: selp.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %p{{[0-9]+}};
|
||||
%a = select i1 %x, double %y, double %z
|
||||
ret double %a
|
||||
}
|
@ -1,206 +0,0 @@
|
||||
; RUN: llc < %s -march=ptx32 | FileCheck %s
|
||||
|
||||
define ptx_device i32 @test_setp_eq_u32_rr(i32 %x, i32 %y) {
|
||||
; CHECK: setp.eq.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
|
||||
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
|
||||
; CHECK: ret;
|
||||
%p = icmp eq i32 %x, %y
|
||||
%z = zext i1 %p to i32
|
||||
ret i32 %z
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_setp_ne_u32_rr(i32 %x, i32 %y) {
|
||||
; CHECK: setp.ne.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
|
||||
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
|
||||
; CHECK: ret;
|
||||
%p = icmp ne i32 %x, %y
|
||||
%z = zext i1 %p to i32
|
||||
ret i32 %z
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_setp_lt_u32_rr(i32 %x, i32 %y) {
|
||||
; CHECK: setp.lt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
|
||||
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
|
||||
; CHECK: ret;
|
||||
%p = icmp ult i32 %x, %y
|
||||
%z = zext i1 %p to i32
|
||||
ret i32 %z
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_setp_le_u32_rr(i32 %x, i32 %y) {
|
||||
; CHECK: setp.le.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
|
||||
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
|
||||
; CHECK: ret;
|
||||
%p = icmp ule i32 %x, %y
|
||||
%z = zext i1 %p to i32
|
||||
ret i32 %z
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_setp_gt_u32_rr(i32 %x, i32 %y) {
|
||||
; CHECK: setp.gt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
|
||||
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
|
||||
; CHECK: ret;
|
||||
%p = icmp ugt i32 %x, %y
|
||||
%z = zext i1 %p to i32
|
||||
ret i32 %z
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_setp_ge_u32_rr(i32 %x, i32 %y) {
|
||||
; CHECK: setp.ge.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
|
||||
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
|
||||
; CHECK: ret;
|
||||
%p = icmp uge i32 %x, %y
|
||||
%z = zext i1 %p to i32
|
||||
ret i32 %z
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_setp_lt_s32_rr(i32 %x, i32 %y) {
|
||||
; CHECK: setp.lt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
|
||||
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
|
||||
; CHECK: ret;
|
||||
%p = icmp slt i32 %x, %y
|
||||
%z = zext i1 %p to i32
|
||||
ret i32 %z
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_setp_le_s32_rr(i32 %x, i32 %y) {
|
||||
; CHECK: setp.le.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
|
||||
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
|
||||
; CHECK: ret;
|
||||
%p = icmp sle i32 %x, %y
|
||||
%z = zext i1 %p to i32
|
||||
ret i32 %z
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_setp_gt_s32_rr(i32 %x, i32 %y) {
|
||||
; CHECK: setp.gt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
|
||||
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
|
||||
; CHECK: ret;
|
||||
%p = icmp sgt i32 %x, %y
|
||||
%z = zext i1 %p to i32
|
||||
ret i32 %z
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_setp_ge_s32_rr(i32 %x, i32 %y) {
|
||||
; CHECK: setp.ge.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
|
||||
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
|
||||
; CHECK: ret;
|
||||
%p = icmp sge i32 %x, %y
|
||||
%z = zext i1 %p to i32
|
||||
ret i32 %z
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_setp_eq_u32_ri(i32 %x) {
|
||||
; CHECK: setp.eq.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 1;
|
||||
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
|
||||
; CHECK: ret;
|
||||
%p = icmp eq i32 %x, 1
|
||||
%z = zext i1 %p to i32
|
||||
ret i32 %z
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_setp_ne_u32_ri(i32 %x) {
|
||||
; CHECK: setp.ne.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 1;
|
||||
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
|
||||
; CHECK: ret;
|
||||
%p = icmp ne i32 %x, 1
|
||||
%z = zext i1 %p to i32
|
||||
ret i32 %z
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_setp_lt_u32_ri(i32 %x) {
|
||||
; CHECK: setp.eq.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 0;
|
||||
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
|
||||
; CHECK: ret;
|
||||
%p = icmp ult i32 %x, 1
|
||||
%z = zext i1 %p to i32
|
||||
ret i32 %z
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_setp_le_u32_ri(i32 %x) {
|
||||
; CHECK: setp.lt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 2;
|
||||
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
|
||||
; CHECK: ret;
|
||||
%p = icmp ule i32 %x, 1
|
||||
%z = zext i1 %p to i32
|
||||
ret i32 %z
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_setp_gt_u32_ri(i32 %x) {
|
||||
; CHECK: setp.gt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 1;
|
||||
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
|
||||
; CHECK: ret;
|
||||
%p = icmp ugt i32 %x, 1
|
||||
%z = zext i1 %p to i32
|
||||
ret i32 %z
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_setp_ge_u32_ri(i32 %x) {
|
||||
; CHECK: setp.ne.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 0;
|
||||
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
|
||||
; CHECK: ret;
|
||||
%p = icmp uge i32 %x, 1
|
||||
%z = zext i1 %p to i32
|
||||
ret i32 %z
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_setp_lt_s32_ri(i32 %x) {
|
||||
; CHECK: setp.lt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 1;
|
||||
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
|
||||
; CHECK: ret;
|
||||
%p = icmp slt i32 %x, 1
|
||||
%z = zext i1 %p to i32
|
||||
ret i32 %z
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_setp_le_s32_ri(i32 %x) {
|
||||
; CHECK: setp.lt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 2;
|
||||
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
|
||||
; CHECK: ret;
|
||||
%p = icmp sle i32 %x, 1
|
||||
%z = zext i1 %p to i32
|
||||
ret i32 %z
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_setp_gt_s32_ri(i32 %x) {
|
||||
; CHECK: setp.gt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 1;
|
||||
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
|
||||
; CHECK: ret;
|
||||
%p = icmp sgt i32 %x, 1
|
||||
%z = zext i1 %p to i32
|
||||
ret i32 %z
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_setp_ge_s32_ri(i32 %x) {
|
||||
; CHECK: setp.gt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 0;
|
||||
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
|
||||
; CHECK: ret;
|
||||
%p = icmp sge i32 %x, 1
|
||||
%z = zext i1 %p to i32
|
||||
ret i32 %z
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_setp_4_op_format_1(i32 %x, i32 %y, i32 %u, i32 %v) {
|
||||
; CHECK: setp.gt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
|
||||
; CHECK: setp.eq.and.u32 %p1, %r{{[0-9]+}}, %r{{[0-9]+}}, %p[[P0]];
|
||||
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p1;
|
||||
; CHECK: ret;
|
||||
%c = icmp eq i32 %x, %y
|
||||
%d = icmp ugt i32 %u, %v
|
||||
%e = and i1 %c, %d
|
||||
%z = zext i1 %e to i32
|
||||
ret i32 %z
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_setp_4_op_format_2(i32 %x, i32 %y, i32 %w) {
|
||||
; CHECK: setp.gt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 0;
|
||||
; CHECK: setp.eq.and.u32 %p1, %r{{[0-9]+}}, %r{{[0-9]+}}, !%p[[P0]];
|
||||
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p1;
|
||||
; CHECK: ret;
|
||||
%c = trunc i32 %w to i1
|
||||
%d = icmp eq i32 %x, %y
|
||||
%e = xor i1 %c, 1
|
||||
%f = and i1 %d, %e
|
||||
%z = zext i1 %f to i32
|
||||
ret i32 %z
|
||||
}
|
@ -1,22 +0,0 @@
|
||||
; RUN: llc < %s -march=ptx32 | FileCheck %s
|
||||
|
||||
define ptx_device i32 @t1(i32 %x, i32 %y) {
|
||||
; CHECK: shl.b32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
|
||||
%z = shl i32 %x, %y
|
||||
; CHECK: ret;
|
||||
ret i32 %z
|
||||
}
|
||||
|
||||
define ptx_device i32 @t2(i32 %x) {
|
||||
; CHECK: shl.b32 %ret{{[0-9]+}}, %r{{[0-9]+}}, 3
|
||||
%z = shl i32 %x, 3
|
||||
; CHECK: ret;
|
||||
ret i32 %z
|
||||
}
|
||||
|
||||
define ptx_device i32 @t3(i32 %x) {
|
||||
; CHECK: shl.b32 %ret{{[0-9]+}}, 3, %r{{[0-9]+}}
|
||||
%z = shl i32 3, %x
|
||||
; CHECK: ret;
|
||||
ret i32 %z
|
||||
}
|
@ -1,43 +0,0 @@
|
||||
; RUN: llc < %s -march=ptx32 | FileCheck %s
|
||||
|
||||
define ptx_device i32 @t1(i32 %x, i32 %y) {
|
||||
; CHECK: shr.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
|
||||
%z = lshr i32 %x, %y
|
||||
; CHECK: ret;
|
||||
ret i32 %z
|
||||
}
|
||||
|
||||
define ptx_device i32 @t2(i32 %x) {
|
||||
; CHECK: shr.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, 3
|
||||
%z = lshr i32 %x, 3
|
||||
; CHECK: ret;
|
||||
ret i32 %z
|
||||
}
|
||||
|
||||
define ptx_device i32 @t3(i32 %x) {
|
||||
; CHECK: shr.u32 %ret{{[0-9]+}}, 3, %r{{[0-9]+}}
|
||||
%z = lshr i32 3, %x
|
||||
; CHECK: ret;
|
||||
ret i32 %z
|
||||
}
|
||||
|
||||
define ptx_device i32 @t4(i32 %x, i32 %y) {
|
||||
; CHECK: shr.s32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
|
||||
%z = ashr i32 %x, %y
|
||||
; CHECK: ret;
|
||||
ret i32 %z
|
||||
}
|
||||
|
||||
define ptx_device i32 @t5(i32 %x) {
|
||||
; CHECK: shr.s32 %ret{{[0-9]+}}, %r{{[0-9]+}}, 3
|
||||
%z = ashr i32 %x, 3
|
||||
; CHECK: ret;
|
||||
ret i32 %z
|
||||
}
|
||||
|
||||
define ptx_device i32 @t6(i32 %x) {
|
||||
; CHECK: shr.s32 %ret{{[0-9]+}}, -3, %r{{[0-9]+}}
|
||||
%z = ashr i32 -3, %x
|
||||
; CHECK: ret;
|
||||
ret i32 %z
|
||||
}
|
@ -1,27 +0,0 @@
|
||||
; RUN: llc < %s -march=ptx32 -mattr=sm20 | FileCheck %s
|
||||
|
||||
define ptx_device void @test_add(float %x, float %y) {
|
||||
; CHECK: ret;
|
||||
%z = fadd float %x, %y
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device float @test_call(float %x, float %y) {
|
||||
%a = fadd float %x, %y
|
||||
; CHECK: call.uni test_add, (__localparam_{{[0-9]+}}, __localparam_{{[0-9]+}});
|
||||
call void @test_add(float %a, float %y)
|
||||
ret float %a
|
||||
}
|
||||
|
||||
define ptx_device float @test_compute(float %x, float %y) {
|
||||
; CHECK: ret;
|
||||
%z = fadd float %x, %y
|
||||
ret float %z
|
||||
}
|
||||
|
||||
define ptx_device float @test_call_compute(float %x, float %y) {
|
||||
; CHECK: call.uni (__localparam_{{[0-9]+}}), test_compute, (__localparam_{{[0-9]+}}, __localparam_{{[0-9]+}})
|
||||
%z = call float @test_compute(float %x, float %y)
|
||||
ret float %z
|
||||
}
|
||||
|
@ -1,337 +0,0 @@
|
||||
; RUN: llc < %s -march=ptx32 | FileCheck %s
|
||||
|
||||
;CHECK: .extern .global .b16 array_i16[10];
|
||||
@array_i16 = external global [10 x i16]
|
||||
|
||||
;CHECK: .extern .const .b16 array_constant_i16[10];
|
||||
@array_constant_i16 = external addrspace(1) constant [10 x i16]
|
||||
|
||||
;CHECK: .extern .shared .b16 array_shared_i16[10];
|
||||
@array_shared_i16 = external addrspace(4) global [10 x i16]
|
||||
|
||||
;CHECK: .extern .global .b32 array_i32[10];
|
||||
@array_i32 = external global [10 x i32]
|
||||
|
||||
;CHECK: .extern .const .b32 array_constant_i32[10];
|
||||
@array_constant_i32 = external addrspace(1) constant [10 x i32]
|
||||
|
||||
;CHECK: .extern .shared .b32 array_shared_i32[10];
|
||||
@array_shared_i32 = external addrspace(4) global [10 x i32]
|
||||
|
||||
;CHECK: .extern .global .b64 array_i64[10];
|
||||
@array_i64 = external global [10 x i64]
|
||||
|
||||
;CHECK: .extern .const .b64 array_constant_i64[10];
|
||||
@array_constant_i64 = external addrspace(1) constant [10 x i64]
|
||||
|
||||
;CHECK: .extern .shared .b64 array_shared_i64[10];
|
||||
@array_shared_i64 = external addrspace(4) global [10 x i64]
|
||||
|
||||
;CHECK: .extern .global .b32 array_float[10];
|
||||
@array_float = external global [10 x float]
|
||||
|
||||
;CHECK: .extern .const .b32 array_constant_float[10];
|
||||
@array_constant_float = external addrspace(1) constant [10 x float]
|
||||
|
||||
;CHECK: .extern .shared .b32 array_shared_float[10];
|
||||
@array_shared_float = external addrspace(4) global [10 x float]
|
||||
|
||||
;CHECK: .extern .global .b64 array_double[10];
|
||||
@array_double = external global [10 x double]
|
||||
|
||||
;CHECK: .extern .const .b64 array_constant_double[10];
|
||||
@array_constant_double = external addrspace(1) constant [10 x double]
|
||||
|
||||
;CHECK: .extern .shared .b64 array_shared_double[10];
|
||||
@array_shared_double = external addrspace(4) global [10 x double]
|
||||
|
||||
|
||||
define ptx_device void @t1_u16(i16* %p, i16 %x) {
|
||||
entry:
|
||||
;CHECK: st.global.u16 [%r{{[0-9]+}}], %rh{{[0-9]+}};
|
||||
;CHECK: ret;
|
||||
store i16 %x, i16* %p
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t1_u32(i32* %p, i32 %x) {
|
||||
entry:
|
||||
;CHECK: st.global.u32 [%r{{[0-9]+}}], %r{{[0-9]+}};
|
||||
;CHECK: ret;
|
||||
store i32 %x, i32* %p
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t1_u64(i64* %p, i64 %x) {
|
||||
entry:
|
||||
;CHECK: st.global.u64 [%r{{[0-9]+}}], %rd{{[0-9]+}};
|
||||
;CHECK: ret;
|
||||
store i64 %x, i64* %p
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t1_f32(float* %p, float %x) {
|
||||
entry:
|
||||
;CHECK: st.global.f32 [%r{{[0-9]+}}], %f{{[0-9]+}};
|
||||
;CHECK: ret;
|
||||
store float %x, float* %p
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t1_f64(double* %p, double %x) {
|
||||
entry:
|
||||
;CHECK: st.global.f64 [%r{{[0-9]+}}], %fd{{[0-9]+}};
|
||||
;CHECK: ret;
|
||||
store double %x, double* %p
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t2_u16(i16* %p, i16 %x) {
|
||||
entry:
|
||||
;CHECK: st.global.u16 [%r{{[0-9]+}}+2], %rh{{[0-9]+}};
|
||||
;CHECK: ret;
|
||||
%i = getelementptr i16* %p, i32 1
|
||||
store i16 %x, i16* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t2_u32(i32* %p, i32 %x) {
|
||||
entry:
|
||||
;CHECK: st.global.u32 [%r{{[0-9]+}}+4], %r{{[0-9]+}};
|
||||
;CHECK: ret;
|
||||
%i = getelementptr i32* %p, i32 1
|
||||
store i32 %x, i32* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t2_u64(i64* %p, i64 %x) {
|
||||
entry:
|
||||
;CHECK: st.global.u64 [%r{{[0-9]+}}+8], %rd{{[0-9]+}};
|
||||
;CHECK: ret;
|
||||
%i = getelementptr i64* %p, i32 1
|
||||
store i64 %x, i64* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t2_f32(float* %p, float %x) {
|
||||
entry:
|
||||
;CHECK: st.global.f32 [%r{{[0-9]+}}+4], %f{{[0-9]+}};
|
||||
;CHECK: ret;
|
||||
%i = getelementptr float* %p, i32 1
|
||||
store float %x, float* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t2_f64(double* %p, double %x) {
|
||||
entry:
|
||||
;CHECK: st.global.f64 [%r{{[0-9]+}}+8], %fd{{[0-9]+}};
|
||||
;CHECK: ret;
|
||||
%i = getelementptr double* %p, i32 1
|
||||
store double %x, double* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t3_u16(i16* %p, i32 %q, i16 %x) {
|
||||
entry:
|
||||
;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 1;
|
||||
;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
|
||||
;CHECK: st.global.u16 [%r{{[0-9]+}}], %rh{{[0-9]+}};
|
||||
;CHECK: ret;
|
||||
%i = getelementptr i16* %p, i32 %q
|
||||
store i16 %x, i16* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t3_u32(i32* %p, i32 %q, i32 %x) {
|
||||
entry:
|
||||
;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 2;
|
||||
;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
|
||||
;CHECK: st.global.u32 [%r{{[0-9]+}}], %r{{[0-9]+}};
|
||||
;CHECK: ret;
|
||||
%i = getelementptr i32* %p, i32 %q
|
||||
store i32 %x, i32* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t3_u64(i64* %p, i32 %q, i64 %x) {
|
||||
entry:
|
||||
;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 3;
|
||||
;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
|
||||
;CHECK: st.global.u64 [%r{{[0-9]+}}], %rd{{[0-9]+}};
|
||||
;CHECK: ret;
|
||||
%i = getelementptr i64* %p, i32 %q
|
||||
store i64 %x, i64* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t3_f32(float* %p, i32 %q, float %x) {
|
||||
entry:
|
||||
;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 2;
|
||||
;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
|
||||
;CHECK: st.global.f32 [%r{{[0-9]+}}], %f{{[0-9]+}};
|
||||
;CHECK: ret;
|
||||
%i = getelementptr float* %p, i32 %q
|
||||
store float %x, float* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t3_f64(double* %p, i32 %q, double %x) {
|
||||
entry:
|
||||
;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 3;
|
||||
;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
|
||||
;CHECK: st.global.f64 [%r{{[0-9]+}}], %fd{{[0-9]+}};
|
||||
;CHECK: ret;
|
||||
%i = getelementptr double* %p, i32 %q
|
||||
store double %x, double* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t4_global_u16(i16 %x) {
|
||||
entry:
|
||||
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i16;
|
||||
;CHECK: st.global.u16 [%r[[R0]]], %rh{{[0-9]+}};
|
||||
;CHECK: ret;
|
||||
%i = getelementptr [10 x i16]* @array_i16, i16 0, i16 0
|
||||
store i16 %x, i16* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t4_global_u32(i32 %x) {
|
||||
entry:
|
||||
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i32;
|
||||
;CHECK: st.global.u32 [%r[[R0]]], %r{{[0-9]+}};
|
||||
;CHECK: ret;
|
||||
%i = getelementptr [10 x i32]* @array_i32, i32 0, i32 0
|
||||
store i32 %x, i32* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t4_global_u64(i64 %x) {
|
||||
entry:
|
||||
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i64;
|
||||
;CHECK: st.global.u64 [%r[[R0]]], %rd{{[0-9]+}};
|
||||
;CHECK: ret;
|
||||
%i = getelementptr [10 x i64]* @array_i64, i32 0, i32 0
|
||||
store i64 %x, i64* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t4_global_f32(float %x) {
|
||||
entry:
|
||||
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_float;
|
||||
;CHECK: st.global.f32 [%r[[R0]]], %f{{[0-9]+}};
|
||||
;CHECK: ret;
|
||||
%i = getelementptr [10 x float]* @array_float, i32 0, i32 0
|
||||
store float %x, float* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t4_global_f64(double %x) {
|
||||
entry:
|
||||
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_double;
|
||||
;CHECK: st.global.f64 [%r[[R0]]], %fd{{[0-9]+}};
|
||||
;CHECK: ret;
|
||||
%i = getelementptr [10 x double]* @array_double, i32 0, i32 0
|
||||
store double %x, double* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t4_shared_u16(i16 %x) {
|
||||
entry:
|
||||
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_i16;
|
||||
;CHECK: st.shared.u16 [%r[[R0]]], %rh{{[0-9]+}};
|
||||
;CHECK: ret;
|
||||
%i = getelementptr [10 x i16] addrspace(4)* @array_shared_i16, i32 0, i32 0
|
||||
store i16 %x, i16 addrspace(4)* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t4_shared_u32(i32 %x) {
|
||||
entry:
|
||||
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_i32;
|
||||
;CHECK: st.shared.u32 [%r[[R0]]], %r{{[0-9]+}};
|
||||
;CHECK: ret;
|
||||
%i = getelementptr [10 x i32] addrspace(4)* @array_shared_i32, i32 0, i32 0
|
||||
store i32 %x, i32 addrspace(4)* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t4_shared_u64(i64 %x) {
|
||||
entry:
|
||||
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_i64;
|
||||
;CHECK: st.shared.u64 [%r[[R0]]], %rd{{[0-9]+}};
|
||||
;CHECK: ret;
|
||||
%i = getelementptr [10 x i64] addrspace(4)* @array_shared_i64, i32 0, i32 0
|
||||
store i64 %x, i64 addrspace(4)* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t4_shared_f32(float %x) {
|
||||
entry:
|
||||
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_float;
|
||||
;CHECK: st.shared.f32 [%r[[R0]]], %f{{[0-9]+}};
|
||||
;CHECK: ret;
|
||||
%i = getelementptr [10 x float] addrspace(4)* @array_shared_float, i32 0, i32 0
|
||||
store float %x, float addrspace(4)* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t4_shared_f64(double %x) {
|
||||
entry:
|
||||
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_double;
|
||||
;CHECK: st.shared.f64 [%r[[R0]]], %fd{{[0-9]+}};
|
||||
;CHECK: ret;
|
||||
%i = getelementptr [10 x double] addrspace(4)* @array_shared_double, i32 0, i32 0
|
||||
store double %x, double addrspace(4)* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t5_u16(i16 %x) {
|
||||
entry:
|
||||
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i16;
|
||||
;CHECK: st.global.u16 [%r[[R0]]+2], %rh{{[0-9]+}};
|
||||
;CHECK: ret;
|
||||
%i = getelementptr [10 x i16]* @array_i16, i32 0, i32 1
|
||||
store i16 %x, i16* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t5_u32(i32 %x) {
|
||||
entry:
|
||||
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i32;
|
||||
;CHECK: st.global.u32 [%r[[R0]]+4], %r{{[0-9]+}};
|
||||
;CHECK: ret;
|
||||
%i = getelementptr [10 x i32]* @array_i32, i32 0, i32 1
|
||||
store i32 %x, i32* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t5_u64(i64 %x) {
|
||||
entry:
|
||||
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i64;
|
||||
;CHECK: st.global.u64 [%r[[R0]]+8], %rd{{[0-9]+}};
|
||||
;CHECK: ret;
|
||||
%i = getelementptr [10 x i64]* @array_i64, i32 0, i32 1
|
||||
store i64 %x, i64* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t5_f32(float %x) {
|
||||
entry:
|
||||
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_float;
|
||||
;CHECK: st.global.f32 [%r[[R0]]+4], %f{{[0-9]+}};
|
||||
;CHECK: ret;
|
||||
%i = getelementptr [10 x float]* @array_float, i32 0, i32 1
|
||||
store float %x, float* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t5_f64(double %x) {
|
||||
entry:
|
||||
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_double;
|
||||
;CHECK: st.global.f64 [%r[[R0]]+8], %fd{{[0-9]+}};
|
||||
;CHECK: ret;
|
||||
%i = getelementptr [10 x double]* @array_double, i32 0, i32 1
|
||||
store double %x, double* %i
|
||||
ret void
|
||||
}
|
@ -1,19 +0,0 @@
|
||||
; RUN: llc < %s -march=ptx32 -mattr=sm20 | FileCheck %s
|
||||
|
||||
define ptx_device float @stack1(float %a) {
|
||||
; CHECK: .local .align 4 .b8 __local0[4];
|
||||
%a.2 = alloca float, align 4
|
||||
; CHECK: st.local.f32 [__local0], %f0
|
||||
store float %a, float* %a.2
|
||||
%a.3 = load float* %a.2
|
||||
ret float %a.3
|
||||
}
|
||||
|
||||
define ptx_device float @stack1_align8(float %a) {
|
||||
; CHECK: .local .align 8 .b8 __local0[4];
|
||||
%a.2 = alloca float, align 8
|
||||
; CHECK: st.local.f32 [__local0], %f0
|
||||
store float %a, float* %a.2
|
||||
%a.3 = load float* %a.2
|
||||
ret float %a.3
|
||||
}
|
@ -1,71 +0,0 @@
|
||||
; RUN: llc < %s -march=ptx32 | FileCheck %s
|
||||
|
||||
define ptx_device i16 @t1_u16(i16 %x, i16 %y) {
|
||||
; CHECK: sub.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}}, %rh{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%z = sub i16 %x, %y
|
||||
ret i16 %z
|
||||
}
|
||||
|
||||
define ptx_device i32 @t1_u32(i32 %x, i32 %y) {
|
||||
; CHECK: sub.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%z = sub i32 %x, %y
|
||||
ret i32 %z
|
||||
}
|
||||
|
||||
define ptx_device i64 @t1_u64(i64 %x, i64 %y) {
|
||||
; CHECK: sub.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}}, %rd{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%z = sub i64 %x, %y
|
||||
ret i64 %z
|
||||
}
|
||||
|
||||
define ptx_device float @t1_f32(float %x, float %y) {
|
||||
; CHECK: sub.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
|
||||
; CHECK: ret;
|
||||
%z = fsub float %x, %y
|
||||
ret float %z
|
||||
}
|
||||
|
||||
define ptx_device double @t1_f64(double %x, double %y) {
|
||||
; CHECK: sub.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}
|
||||
; CHECK: ret;
|
||||
%z = fsub double %x, %y
|
||||
ret double %z
|
||||
}
|
||||
|
||||
define ptx_device i16 @t2_u16(i16 %x) {
|
||||
; CHECK: add.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}}, -1;
|
||||
; CHECK: ret;
|
||||
%z = sub i16 %x, 1
|
||||
ret i16 %z
|
||||
}
|
||||
|
||||
define ptx_device i32 @t2_u32(i32 %x) {
|
||||
; CHECK: add.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, -1;
|
||||
; CHECK: ret;
|
||||
%z = sub i32 %x, 1
|
||||
ret i32 %z
|
||||
}
|
||||
|
||||
define ptx_device i64 @t2_u64(i64 %x) {
|
||||
; CHECK: add.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}}, -1;
|
||||
; CHECK: ret;
|
||||
%z = sub i64 %x, 1
|
||||
ret i64 %z
|
||||
}
|
||||
|
||||
define ptx_device float @t2_f32(float %x) {
|
||||
; CHECK: add.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, 0DBFF0000000000000;
|
||||
; CHECK: ret;
|
||||
%z = fsub float %x, 1.0
|
||||
ret float %z
|
||||
}
|
||||
|
||||
define ptx_device double @t2_f64(double %x) {
|
||||
; CHECK: add.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, 0DBFF0000000000000;
|
||||
; CHECK: ret;
|
||||
%z = fsub double %x, 1.0
|
||||
ret double %z
|
||||
}
|
@ -353,9 +353,9 @@ TEST(TripleTest, BitWidthArchVariants) {
|
||||
EXPECT_EQ(Triple::ppc, T.get32BitArchVariant().getArch());
|
||||
EXPECT_EQ(Triple::ppc64, T.get64BitArchVariant().getArch());
|
||||
|
||||
T.setArch(Triple::ptx32);
|
||||
EXPECT_EQ(Triple::ptx32, T.get32BitArchVariant().getArch());
|
||||
EXPECT_EQ(Triple::ptx64, T.get64BitArchVariant().getArch());
|
||||
T.setArch(Triple::nvptx);
|
||||
EXPECT_EQ(Triple::nvptx, T.get32BitArchVariant().getArch());
|
||||
EXPECT_EQ(Triple::nvptx64, T.get64BitArchVariant().getArch());
|
||||
|
||||
T.setArch(Triple::sparc);
|
||||
EXPECT_EQ(Triple::sparc, T.get32BitArchVariant().getArch());
|
||||
@ -377,9 +377,9 @@ TEST(TripleTest, BitWidthArchVariants) {
|
||||
EXPECT_EQ(Triple::ppc, T.get32BitArchVariant().getArch());
|
||||
EXPECT_EQ(Triple::ppc64, T.get64BitArchVariant().getArch());
|
||||
|
||||
T.setArch(Triple::ptx64);
|
||||
EXPECT_EQ(Triple::ptx32, T.get32BitArchVariant().getArch());
|
||||
EXPECT_EQ(Triple::ptx64, T.get64BitArchVariant().getArch());
|
||||
T.setArch(Triple::nvptx64);
|
||||
EXPECT_EQ(Triple::nvptx, T.get32BitArchVariant().getArch());
|
||||
EXPECT_EQ(Triple::nvptx64, T.get64BitArchVariant().getArch());
|
||||
|
||||
T.setArch(Triple::sparcv9);
|
||||
EXPECT_EQ(Triple::sparc, T.get32BitArchVariant().getArch());
|
||||
|
Loading…
Reference in New Issue
Block a user