Remove the PTX back-end and all of its artifacts (triple, etc.)

This back-end was deprecated in favor of the NVPTX back-end.

NV_CONTRIB

llvm-svn: 157417
This commit is contained in:
Justin Holewinski 2012-05-24 21:38:21 +00:00
parent 03e1ab9c5b
commit ee3de35dee
95 changed files with 97 additions and 9086 deletions

View File

@ -83,7 +83,6 @@ set(LLVM_ALL_TARGETS
MSP430
NVPTX
PowerPC
PTX
Sparc
X86
XCore

View File

@ -369,7 +369,6 @@ AC_CACHE_CHECK([target architecture],[llvm_cv_target_arch],
msp430-*) llvm_cv_target_arch="MSP430" ;;
hexagon-*) llvm_cv_target_arch="Hexagon" ;;
mblaze-*) llvm_cv_target_arch="MBlaze" ;;
ptx-*) llvm_cv_target_arch="PTX" ;;
nvptx-*) llvm_cv_target_arch="NVPTX" ;;
*) llvm_cv_target_arch="Unknown" ;;
esac])
@ -403,7 +402,6 @@ case $host in
msp430-*) host_arch="MSP430" ;;
hexagon-*) host_arch="Hexagon" ;;
mblaze-*) host_arch="MBlaze" ;;
ptx-*) host_arch="PTX" ;;
*) host_arch="Unknown" ;;
esac
@ -542,7 +540,6 @@ else
MSP430) AC_SUBST(TARGET_HAS_JIT,0) ;;
Hexagon) AC_SUBST(TARGET_HAS_JIT,0) ;;
MBlaze) AC_SUBST(TARGET_HAS_JIT,0) ;;
PTX) AC_SUBST(TARGET_HAS_JIT,0) ;;
NVPTX) AC_SUBST(TARGET_HAS_JIT,0) ;;
*) AC_SUBST(TARGET_HAS_JIT,0) ;;
esac
@ -655,13 +652,13 @@ TARGETS_TO_BUILD=""
AC_ARG_ENABLE([targets],AS_HELP_STRING([--enable-targets],
[Build specific host targets: all or target1,target2,... Valid targets are:
host, x86, x86_64, sparc, powerpc, arm, mips, spu, hexagon,
xcore, msp430, ptx, nvptx, and cpp (default=all)]),,
xcore, msp430, nvptx, and cpp (default=all)]),,
enableval=all)
if test "$enableval" = host-only ; then
enableval=host
fi
case "$enableval" in
all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 CppBackend MBlaze PTX NVPTX Hexagon" ;;
all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 CppBackend MBlaze NVPTX Hexagon" ;;
*)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
case "$a_target" in
x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
@ -677,7 +674,6 @@ case "$enableval" in
cpp) TARGETS_TO_BUILD="CppBackend $TARGETS_TO_BUILD" ;;
hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
mblaze) TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
ptx) TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
nvptx) TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
host) case "$llvm_cv_target_arch" in
x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
@ -691,7 +687,6 @@ case "$enableval" in
XCore) TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
MSP430) TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
Hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
PTX) TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
NVPTX) TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
*) AC_MSG_ERROR([Can not set target to build]) ;;
esac ;;

12
configure vendored
View File

@ -1419,7 +1419,7 @@ Optional Features:
--enable-targets Build specific host targets: all or
target1,target2,... Valid targets are: host, x86,
x86_64, sparc, powerpc, arm, mips, spu, hexagon,
xcore, msp430, ptx, nvptx, and cpp (default=all)
xcore, msp430, nvptx, and cpp (default=all)
--enable-bindings Build specific language bindings:
all,auto,none,{binding-name} (default=auto)
--enable-libffi Check for the presence of libffi (default is NO)
@ -3901,7 +3901,6 @@ else
msp430-*) llvm_cv_target_arch="MSP430" ;;
hexagon-*) llvm_cv_target_arch="Hexagon" ;;
mblaze-*) llvm_cv_target_arch="MBlaze" ;;
ptx-*) llvm_cv_target_arch="PTX" ;;
nvptx-*) llvm_cv_target_arch="NVPTX" ;;
*) llvm_cv_target_arch="Unknown" ;;
esac
@ -3935,7 +3934,6 @@ case $host in
msp430-*) host_arch="MSP430" ;;
hexagon-*) host_arch="Hexagon" ;;
mblaze-*) host_arch="MBlaze" ;;
ptx-*) host_arch="PTX" ;;
*) host_arch="Unknown" ;;
esac
@ -5147,8 +5145,6 @@ else
Hexagon) TARGET_HAS_JIT=0
;;
MBlaze) TARGET_HAS_JIT=0
;;
PTX) TARGET_HAS_JIT=0
;;
NVPTX) TARGET_HAS_JIT=0
;;
@ -5336,7 +5332,7 @@ if test "$enableval" = host-only ; then
enableval=host
fi
case "$enableval" in
all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 CppBackend MBlaze PTX NVPTX Hexagon" ;;
all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 CppBackend MBlaze NVPTX Hexagon" ;;
*)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
case "$a_target" in
x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
@ -5352,7 +5348,6 @@ case "$enableval" in
cpp) TARGETS_TO_BUILD="CppBackend $TARGETS_TO_BUILD" ;;
hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
mblaze) TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
ptx) TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
nvptx) TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
host) case "$llvm_cv_target_arch" in
x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
@ -5366,7 +5361,6 @@ case "$enableval" in
XCore) TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
MSP430) TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
Hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
PTX) TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
NVPTX) TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
*) { { echo "$as_me:$LINENO: error: Can not set target to build" >&5
echo "$as_me: error: Can not set target to build" >&2;}
@ -10349,7 +10343,7 @@ else
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<EOF
#line 10352 "configure"
#line 10346 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H

View File

@ -62,8 +62,6 @@ public:
x86_64, // X86-64: amd64, x86_64
xcore, // XCore: xcore
mblaze, // MBlaze: mblaze
ptx32, // PTX: ptx (32-bit)
ptx64, // PTX: ptx (64-bit)
nvptx, // NVPTX: 32-bit
nvptx64, // NVPTX: 64-bit
le32, // le32: generic little-endian 32-bit CPU (PNaCl / Emscripten)

View File

@ -445,6 +445,5 @@ include "llvm/IntrinsicsX86.td"
include "llvm/IntrinsicsARM.td"
include "llvm/IntrinsicsCellSPU.td"
include "llvm/IntrinsicsXCore.td"
include "llvm/IntrinsicsPTX.td"
include "llvm/IntrinsicsHexagon.td"
include "llvm/IntrinsicsNVVM.td"

View File

@ -870,3 +870,83 @@ def int_nvvm_compiler_error :
Intrinsic<[], [llvm_anyptr_ty], [], "llvm.nvvm.compiler.error">;
def int_nvvm_compiler_warn :
Intrinsic<[], [llvm_anyptr_ty], [], "llvm.nvvm.compiler.warn">;
// Old PTX back-end intrinsics retained here for backwards-compatibility
multiclass PTXReadSpecialRegisterIntrinsic_v4i32<string prefix> {
// FIXME: Do we need the 128-bit integer type version?
// def _r64 : Intrinsic<[llvm_i128_ty], [], [IntrNoMem]>;
// FIXME: Enable this once v4i32 support is enabled in back-end.
// def _v4i16 : Intrinsic<[llvm_v4i32_ty], [], [IntrNoMem]>;
def _x : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
GCCBuiltin<!strconcat(prefix, "_x")>;
def _y : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
GCCBuiltin<!strconcat(prefix, "_y")>;
def _z : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
GCCBuiltin<!strconcat(prefix, "_z")>;
def _w : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
GCCBuiltin<!strconcat(prefix, "_w")>;
}
class PTXReadSpecialRegisterIntrinsic_r32<string name>
: Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
GCCBuiltin<name>;
class PTXReadSpecialRegisterIntrinsic_r64<string name>
: Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>,
GCCBuiltin<name>;
defm int_ptx_read_tid : PTXReadSpecialRegisterIntrinsic_v4i32
<"__builtin_ptx_read_tid">;
defm int_ptx_read_ntid : PTXReadSpecialRegisterIntrinsic_v4i32
<"__builtin_ptx_read_ntid">;
def int_ptx_read_laneid : PTXReadSpecialRegisterIntrinsic_r32
<"__builtin_ptx_read_laneid">;
def int_ptx_read_warpid : PTXReadSpecialRegisterIntrinsic_r32
<"__builtin_ptx_read_warpid">;
def int_ptx_read_nwarpid : PTXReadSpecialRegisterIntrinsic_r32
<"__builtin_ptx_read_nwarpid">;
defm int_ptx_read_ctaid : PTXReadSpecialRegisterIntrinsic_v4i32
<"__builtin_ptx_read_ctaid">;
defm int_ptx_read_nctaid : PTXReadSpecialRegisterIntrinsic_v4i32
<"__builtin_ptx_read_nctaid">;
def int_ptx_read_smid : PTXReadSpecialRegisterIntrinsic_r32
<"__builtin_ptx_read_smid">;
def int_ptx_read_nsmid : PTXReadSpecialRegisterIntrinsic_r32
<"__builtin_ptx_read_nsmid">;
def int_ptx_read_gridid : PTXReadSpecialRegisterIntrinsic_r32
<"__builtin_ptx_read_gridid">;
def int_ptx_read_lanemask_eq : PTXReadSpecialRegisterIntrinsic_r32
<"__builtin_ptx_read_lanemask_eq">;
def int_ptx_read_lanemask_le : PTXReadSpecialRegisterIntrinsic_r32
<"__builtin_ptx_read_lanemask_le">;
def int_ptx_read_lanemask_lt : PTXReadSpecialRegisterIntrinsic_r32
<"__builtin_ptx_read_lanemask_lt">;
def int_ptx_read_lanemask_ge : PTXReadSpecialRegisterIntrinsic_r32
<"__builtin_ptx_read_lanemask_ge">;
def int_ptx_read_lanemask_gt : PTXReadSpecialRegisterIntrinsic_r32
<"__builtin_ptx_read_lanemask_gt">;
def int_ptx_read_clock : PTXReadSpecialRegisterIntrinsic_r32
<"__builtin_ptx_read_clock">;
def int_ptx_read_clock64 : PTXReadSpecialRegisterIntrinsic_r64
<"__builtin_ptx_read_clock64">;
def int_ptx_read_pm0 : PTXReadSpecialRegisterIntrinsic_r32
<"__builtin_ptx_read_pm0">;
def int_ptx_read_pm1 : PTXReadSpecialRegisterIntrinsic_r32
<"__builtin_ptx_read_pm1">;
def int_ptx_read_pm2 : PTXReadSpecialRegisterIntrinsic_r32
<"__builtin_ptx_read_pm2">;
def int_ptx_read_pm3 : PTXReadSpecialRegisterIntrinsic_r32
<"__builtin_ptx_read_pm3">;
def int_ptx_bar_sync : Intrinsic<[], [llvm_i32_ty], []>,
GCCBuiltin<"__builtin_ptx_bar_sync">;

View File

@ -1,92 +0,0 @@
//===- IntrinsicsPTX.td - Defines PTX intrinsics -----------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines all of the PTX-specific intrinsics.
//
//===----------------------------------------------------------------------===//
let TargetPrefix = "ptx" in {
multiclass PTXReadSpecialRegisterIntrinsic_v4i32<string prefix> {
// FIXME: Do we need the 128-bit integer type version?
// def _r64 : Intrinsic<[llvm_i128_ty], [], [IntrNoMem]>;
// FIXME: Enable this once v4i32 support is enabled in back-end.
// def _v4i16 : Intrinsic<[llvm_v4i32_ty], [], [IntrNoMem]>;
def _x : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
GCCBuiltin<!strconcat(prefix, "_x")>;
def _y : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
GCCBuiltin<!strconcat(prefix, "_y")>;
def _z : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
GCCBuiltin<!strconcat(prefix, "_z")>;
def _w : Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
GCCBuiltin<!strconcat(prefix, "_w")>;
}
class PTXReadSpecialRegisterIntrinsic_r32<string name>
: Intrinsic<[llvm_i32_ty], [], [IntrNoMem]>,
GCCBuiltin<name>;
class PTXReadSpecialRegisterIntrinsic_r64<string name>
: Intrinsic<[llvm_i64_ty], [], [IntrNoMem]>,
GCCBuiltin<name>;
}
defm int_ptx_read_tid : PTXReadSpecialRegisterIntrinsic_v4i32
<"__builtin_ptx_read_tid">;
defm int_ptx_read_ntid : PTXReadSpecialRegisterIntrinsic_v4i32
<"__builtin_ptx_read_ntid">;
def int_ptx_read_laneid : PTXReadSpecialRegisterIntrinsic_r32
<"__builtin_ptx_read_laneid">;
def int_ptx_read_warpid : PTXReadSpecialRegisterIntrinsic_r32
<"__builtin_ptx_read_warpid">;
def int_ptx_read_nwarpid : PTXReadSpecialRegisterIntrinsic_r32
<"__builtin_ptx_read_nwarpid">;
defm int_ptx_read_ctaid : PTXReadSpecialRegisterIntrinsic_v4i32
<"__builtin_ptx_read_ctaid">;
defm int_ptx_read_nctaid : PTXReadSpecialRegisterIntrinsic_v4i32
<"__builtin_ptx_read_nctaid">;
def int_ptx_read_smid : PTXReadSpecialRegisterIntrinsic_r32
<"__builtin_ptx_read_smid">;
def int_ptx_read_nsmid : PTXReadSpecialRegisterIntrinsic_r32
<"__builtin_ptx_read_nsmid">;
def int_ptx_read_gridid : PTXReadSpecialRegisterIntrinsic_r32
<"__builtin_ptx_read_gridid">;
def int_ptx_read_lanemask_eq : PTXReadSpecialRegisterIntrinsic_r32
<"__builtin_ptx_read_lanemask_eq">;
def int_ptx_read_lanemask_le : PTXReadSpecialRegisterIntrinsic_r32
<"__builtin_ptx_read_lanemask_le">;
def int_ptx_read_lanemask_lt : PTXReadSpecialRegisterIntrinsic_r32
<"__builtin_ptx_read_lanemask_lt">;
def int_ptx_read_lanemask_ge : PTXReadSpecialRegisterIntrinsic_r32
<"__builtin_ptx_read_lanemask_ge">;
def int_ptx_read_lanemask_gt : PTXReadSpecialRegisterIntrinsic_r32
<"__builtin_ptx_read_lanemask_gt">;
def int_ptx_read_clock : PTXReadSpecialRegisterIntrinsic_r32
<"__builtin_ptx_read_clock">;
def int_ptx_read_clock64 : PTXReadSpecialRegisterIntrinsic_r64
<"__builtin_ptx_read_clock64">;
def int_ptx_read_pm0 : PTXReadSpecialRegisterIntrinsic_r32
<"__builtin_ptx_read_pm0">;
def int_ptx_read_pm1 : PTXReadSpecialRegisterIntrinsic_r32
<"__builtin_ptx_read_pm1">;
def int_ptx_read_pm2 : PTXReadSpecialRegisterIntrinsic_r32
<"__builtin_ptx_read_pm2">;
def int_ptx_read_pm3 : PTXReadSpecialRegisterIntrinsic_r32
<"__builtin_ptx_read_pm3">;
let TargetPrefix = "ptx" in
def int_ptx_bar_sync : Intrinsic<[], [llvm_i32_ty], []>,
GCCBuiltin<"__builtin_ptx_bar_sync">;

View File

@ -38,8 +38,6 @@ const char *Triple::getArchTypeName(ArchType Kind) {
case x86_64: return "x86_64";
case xcore: return "xcore";
case mblaze: return "mblaze";
case ptx32: return "ptx32";
case ptx64: return "ptx64";
case nvptx: return "nvptx";
case nvptx64: return "nvptx64";
case le32: return "le32";
@ -76,8 +74,6 @@ const char *Triple::getArchTypePrefix(ArchType Kind) {
case xcore: return "xcore";
case ptx32: return "ptx";
case ptx64: return "ptx";
case nvptx: return "nvptx";
case nvptx64: return "nvptx";
case le32: return "le32";
@ -164,8 +160,6 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
.Case("x86", x86)
.Case("x86-64", x86_64)
.Case("xcore", xcore)
.Case("ptx32", ptx32)
.Case("ptx64", ptx64)
.Case("nvptx", nvptx)
.Case("nvptx64", nvptx64)
.Case("le32", le32)
@ -198,8 +192,6 @@ Triple::ArchType Triple::getArchTypeForDarwinArchName(StringRef Str) {
.Cases("arm", "armv4t", "armv5", "armv6", Triple::arm)
.Cases("armv7", "armv7f", "armv7k", "armv7s", "xscale", Triple::arm)
.Case("r600", Triple::r600)
.Case("ptx32", Triple::ptx32)
.Case("ptx64", Triple::ptx64)
.Case("nvptx", Triple::nvptx)
.Case("nvptx64", Triple::nvptx64)
.Case("amdil", Triple::amdil)
@ -223,8 +215,6 @@ const char *Triple::getArchNameForAssembler() {
.Cases("armv6", "thumbv6", "armv6")
.Cases("armv7", "thumbv7", "armv7")
.Case("r600", "r600")
.Case("ptx32", "ptx32")
.Case("ptx64", "ptx64")
.Case("nvptx", "nvptx")
.Case("nvptx64", "nvptx64")
.Case("le32", "le32")
@ -259,8 +249,6 @@ static Triple::ArchType parseArch(StringRef ArchName) {
.Case("sparcv9", Triple::sparcv9)
.Case("tce", Triple::tce)
.Case("xcore", Triple::xcore)
.Case("ptx32", Triple::ptx32)
.Case("ptx64", Triple::ptx64)
.Case("nvptx", Triple::nvptx)
.Case("nvptx64", Triple::nvptx64)
.Case("le32", Triple::le32)
@ -689,7 +677,6 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
case llvm::Triple::mipsel:
case llvm::Triple::nvptx:
case llvm::Triple::ppc:
case llvm::Triple::ptx32:
case llvm::Triple::r600:
case llvm::Triple::sparc:
case llvm::Triple::tce:
@ -702,7 +689,6 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) {
case llvm::Triple::mips64el:
case llvm::Triple::nvptx64:
case llvm::Triple::ppc64:
case llvm::Triple::ptx64:
case llvm::Triple::sparcv9:
case llvm::Triple::x86_64:
return 64;
@ -740,7 +726,6 @@ Triple Triple::get32BitArchVariant() const {
case Triple::mipsel:
case Triple::nvptx:
case Triple::ppc:
case Triple::ptx32:
case Triple::r600:
case Triple::sparc:
case Triple::tce:
@ -754,7 +739,6 @@ Triple Triple::get32BitArchVariant() const {
case Triple::mips64el: T.setArch(Triple::mipsel); break;
case Triple::nvptx64: T.setArch(Triple::nvptx); break;
case Triple::ppc64: T.setArch(Triple::ppc); break;
case Triple::ptx64: T.setArch(Triple::ptx32); break;
case Triple::sparcv9: T.setArch(Triple::sparc); break;
case Triple::x86_64: T.setArch(Triple::x86); break;
}
@ -783,7 +767,6 @@ Triple Triple::get64BitArchVariant() const {
case Triple::mips64el:
case Triple::nvptx64:
case Triple::ppc64:
case Triple::ptx64:
case Triple::sparcv9:
case Triple::x86_64:
// Already 64-bit.
@ -793,7 +776,6 @@ Triple Triple::get64BitArchVariant() const {
case Triple::mipsel: T.setArch(Triple::mips64el); break;
case Triple::nvptx: T.setArch(Triple::nvptx64); break;
case Triple::ppc: T.setArch(Triple::ppc64); break;
case Triple::ptx32: T.setArch(Triple::ptx64); break;
case Triple::sparc: T.setArch(Triple::sparcv9); break;
case Triple::x86: T.setArch(Triple::x86_64); break;
}

View File

@ -16,7 +16,7 @@
;===------------------------------------------------------------------------===;
[common]
subdirectories = ARM CellSPU CppBackend Hexagon MBlaze MSP430 NVPTX Mips PTX PowerPC Sparc X86 XCore
subdirectories = ARM CellSPU CppBackend Hexagon MBlaze MSP430 NVPTX Mips PowerPC Sparc X86 XCore
; This is a special group whose required libraries are extended (by llvm-build)
; with the best execution engine (the native JIT, if available, or the

View File

@ -1,32 +0,0 @@
set(LLVM_TARGET_DEFINITIONS PTX.td)
tablegen(LLVM PTXGenAsmWriter.inc -gen-asm-writer)
tablegen(LLVM PTXGenDAGISel.inc -gen-dag-isel)
tablegen(LLVM PTXGenInstrInfo.inc -gen-instr-info)
tablegen(LLVM PTXGenRegisterInfo.inc -gen-register-info)
tablegen(LLVM PTXGenSubtargetInfo.inc -gen-subtarget)
add_public_tablegen_target(PTXCommonTableGen)
add_llvm_target(PTXCodeGen
PTXAsmPrinter.cpp
PTXISelDAGToDAG.cpp
PTXISelLowering.cpp
PTXInstrInfo.cpp
PTXFPRoundingModePass.cpp
PTXFrameLowering.cpp
PTXMCAsmStreamer.cpp
PTXMCInstLower.cpp
PTXMFInfoExtract.cpp
PTXMachineFunctionInfo.cpp
PTXParamManager.cpp
PTXRegAlloc.cpp
PTXRegisterInfo.cpp
PTXSelectionDAGInfo.cpp
PTXSubtarget.cpp
PTXTargetMachine.cpp
)
add_subdirectory(TargetInfo)
add_subdirectory(InstPrinter)
add_subdirectory(MCTargetDesc)

View File

@ -1,8 +0,0 @@
include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
add_llvm_library(LLVMPTXAsmPrinter
PTXInstPrinter.cpp
)
add_dependencies(LLVMPTXAsmPrinter PTXCommonTableGen)

View File

@ -1,23 +0,0 @@
;===- ./lib/Target/PTX/InstPrinter/LLVMBuild.txt ---------------*- Conf -*--===;
;
; The LLVM Compiler Infrastructure
;
; This file is distributed under the University of Illinois Open Source
; License. See LICENSE.TXT for details.
;
;===------------------------------------------------------------------------===;
;
; This is an LLVMBuild description file for the components in this subdirectory.
;
; For more information on the LLVMBuild system, please see:
;
; http://llvm.org/docs/LLVMBuild.html
;
;===------------------------------------------------------------------------===;
[component_0]
type = Library
name = PTXAsmPrinter
parent = PTX
required_libraries = MC Support
add_to_library_groups = PTX

View File

@ -1,16 +0,0 @@
##===- lib/Target/PTX/AsmPrinter/Makefile ------------------*- Makefile -*-===##
#
# The LLVM Compiler Infrastructure
#
# This file is distributed under the University of Illinois Open Source
# License. See LICENSE.TXT for details.
#
##===----------------------------------------------------------------------===##
LEVEL = ../../../..
LIBRARYNAME = LLVMPTXAsmPrinter
# Hack: we need to include 'main' ptx target directory to grab private headers
CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
include $(LEVEL)/Makefile.common

View File

@ -1,249 +0,0 @@
//===-- PTXInstPrinter.cpp - Convert PTX MCInst to assembly syntax --------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This class prints a PTX MCInst to a .ptx file.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "asm-printer"
#include "PTXInstPrinter.h"
#include "MCTargetDesc/PTXBaseInfo.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/ADT/APFloat.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
#include "PTXGenAsmWriter.inc"
PTXInstPrinter::PTXInstPrinter(const MCAsmInfo &MAI,
const MCInstrInfo &MII,
const MCRegisterInfo &MRI,
const MCSubtargetInfo &STI) :
MCInstPrinter(MAI, MII, MRI) {
// Initialize the set of available features.
setAvailableFeatures(STI.getFeatureBits());
}
void PTXInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const {
// Decode the register number into type and offset
unsigned RegSpace = RegNo & 0x7;
unsigned RegType = (RegNo >> 3) & 0x7;
unsigned RegOffset = RegNo >> 6;
// Print the register
OS << "%";
switch (RegSpace) {
default:
llvm_unreachable("Unknown register space!");
case PTXRegisterSpace::Reg:
switch (RegType) {
default:
llvm_unreachable("Unknown register type!");
case PTXRegisterType::Pred:
OS << "p";
break;
case PTXRegisterType::B16:
OS << "rh";
break;
case PTXRegisterType::B32:
OS << "r";
break;
case PTXRegisterType::B64:
OS << "rd";
break;
case PTXRegisterType::F32:
OS << "f";
break;
case PTXRegisterType::F64:
OS << "fd";
break;
}
break;
case PTXRegisterSpace::Return:
OS << "ret";
break;
case PTXRegisterSpace::Argument:
OS << "arg";
break;
}
OS << RegOffset;
}
void PTXInstPrinter::printInst(const MCInst *MI, raw_ostream &O,
StringRef Annot) {
printPredicate(MI, O);
switch (MI->getOpcode()) {
default:
printInstruction(MI, O);
break;
case PTX::CALL:
printCall(MI, O);
}
O << ";";
printAnnotation(O, Annot);
}
void PTXInstPrinter::printPredicate(const MCInst *MI, raw_ostream &O) {
// The last two operands are the predicate operands
int RegIndex;
int OpIndex;
if (MI->getOpcode() == PTX::CALL) {
RegIndex = 0;
OpIndex = 1;
} else {
RegIndex = MI->getNumOperands()-2;
OpIndex = MI->getNumOperands()-1;
}
int PredOp = MI->getOperand(OpIndex).getImm();
if (PredOp == PTXPredicate::None)
return;
if (PredOp == PTXPredicate::Negate)
O << '!';
else
O << '@';
printOperand(MI, RegIndex, O);
}
void PTXInstPrinter::printCall(const MCInst *MI, raw_ostream &O) {
O << "\tcall.uni\t";
// The first two operands are the predicate slot
unsigned Index = 2;
unsigned NumRets = MI->getOperand(Index++).getImm();
if (NumRets > 0) {
O << "(";
printOperand(MI, Index++, O);
for (unsigned i = 1; i < NumRets; ++i) {
O << ", ";
printOperand(MI, Index++, O);
}
O << "), ";
}
const MCExpr* Expr = MI->getOperand(Index++).getExpr();
unsigned NumArgs = MI->getOperand(Index++).getImm();
// if the function call is to printf or puts, change to vprintf
if (const MCSymbolRefExpr *SymRefExpr = dyn_cast<MCSymbolRefExpr>(Expr)) {
const MCSymbol &Sym = SymRefExpr->getSymbol();
if (Sym.getName() == "printf" || Sym.getName() == "puts") {
O << "vprintf";
} else {
O << Sym.getName();
}
} else {
O << *Expr;
}
O << ", (";
if (NumArgs > 0) {
printOperand(MI, Index++, O);
for (unsigned i = 1; i < NumArgs; ++i) {
O << ", ";
printOperand(MI, Index++, O);
}
}
O << ")";
}
void PTXInstPrinter::printOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNo);
if (Op.isImm()) {
O << Op.getImm();
} else if (Op.isFPImm()) {
double Imm = Op.getFPImm();
APFloat FPImm(Imm);
APInt FPIntImm = FPImm.bitcastToAPInt();
O << "0D";
// PTX requires us to output the full 64 bits, even if the number is zero
if (FPIntImm.getZExtValue() > 0) {
O << FPIntImm.toString(16, false);
} else {
O << "0000000000000000";
}
} else if (Op.isReg()) {
printRegName(O, Op.getReg());
} else {
assert(Op.isExpr() && "unknown operand kind in printOperand");
const MCExpr *Expr = Op.getExpr();
if (const MCSymbolRefExpr *SymRefExpr = dyn_cast<MCSymbolRefExpr>(Expr)) {
const MCSymbol &Sym = SymRefExpr->getSymbol();
O << Sym.getName();
} else {
O << *Op.getExpr();
}
}
}
void PTXInstPrinter::printMemOperand(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
// By definition, operand OpNo+1 is an i32imm
const MCOperand &Op2 = MI->getOperand(OpNo+1);
printOperand(MI, OpNo, O);
if (Op2.getImm() == 0)
return; // don't print "+0"
O << "+" << Op2.getImm();
}
void PTXInstPrinter::printRoundingMode(const MCInst *MI, unsigned OpNo,
raw_ostream &O) {
const MCOperand &Op = MI->getOperand(OpNo);
assert (Op.isImm() && "Rounding modes must be immediate values");
switch (Op.getImm()) {
default:
llvm_unreachable("Unknown rounding mode!");
case PTXRoundingMode::RndDefault:
llvm_unreachable("FP rounding-mode pass did not handle instruction!");
case PTXRoundingMode::RndNone:
// Do not print anything.
break;
case PTXRoundingMode::RndNearestEven:
O << ".rn";
break;
case PTXRoundingMode::RndTowardsZero:
O << ".rz";
break;
case PTXRoundingMode::RndNegInf:
O << ".rm";
break;
case PTXRoundingMode::RndPosInf:
O << ".rp";
break;
case PTXRoundingMode::RndApprox:
O << ".approx";
break;
case PTXRoundingMode::RndNearestEvenInt:
O << ".rni";
break;
case PTXRoundingMode::RndTowardsZeroInt:
O << ".rzi";
break;
case PTXRoundingMode::RndNegInfInt:
O << ".rmi";
break;
case PTXRoundingMode::RndPosInfInt:
O << ".rpi";
break;
}
}

View File

@ -1,45 +0,0 @@
//===- PTXInstPrinter.h - Convert PTX MCInst to assembly syntax -*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This class prints n PTX MCInst to a .ptx file.
//
//===----------------------------------------------------------------------===//
#ifndef PTXINSTPRINTER_H
#define PTXINSTPRINTER_H
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCSubtargetInfo.h"
namespace llvm {
class MCOperand;
class PTXInstPrinter : public MCInstPrinter {
public:
PTXInstPrinter(const MCAsmInfo &MAI, const MCInstrInfo &MII,
const MCRegisterInfo &MRI, const MCSubtargetInfo &STI);
virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot);
virtual void printRegName(raw_ostream &OS, unsigned RegNo) const;
// Autogenerated by tblgen.
void printInstruction(const MCInst *MI, raw_ostream &O);
static const char *getRegisterName(unsigned RegNo);
void printPredicate(const MCInst *MI, raw_ostream &O);
void printCall(const MCInst *MI, raw_ostream &O);
void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printMemOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O);
void printRoundingMode(const MCInst *MI, unsigned OpNo, raw_ostream &O);
};
}
#endif

View File

@ -1,32 +0,0 @@
;===- ./lib/Target/PTX/LLVMBuild.txt ---------------------------*- Conf -*--===;
;
; The LLVM Compiler Infrastructure
;
; This file is distributed under the University of Illinois Open Source
; License. See LICENSE.TXT for details.
;
;===------------------------------------------------------------------------===;
;
; This is an LLVMBuild description file for the components in this subdirectory.
;
; For more information on the LLVMBuild system, please see:
;
; http://llvm.org/docs/LLVMBuild.html
;
;===------------------------------------------------------------------------===;
[common]
subdirectories = InstPrinter MCTargetDesc TargetInfo
[component_0]
type = TargetGroup
name = PTX
parent = Target
has_asmprinter = 1
[component_1]
type = Library
name = PTXCodeGen
parent = PTX
required_libraries = Analysis AsmPrinter CodeGen Core MC PTXDesc PTXInfo SelectionDAG Support Target TransformUtils
add_to_library_groups = PTX

View File

@ -1,6 +0,0 @@
add_llvm_library(LLVMPTXDesc
PTXMCTargetDesc.cpp
PTXMCAsmInfo.cpp
)
add_dependencies(LLVMPTXDesc PTXCommonTableGen)

View File

@ -1,23 +0,0 @@
;===- ./lib/Target/PTX/MCTargetDesc/LLVMBuild.txt --------------*- Conf -*--===;
;
; The LLVM Compiler Infrastructure
;
; This file is distributed under the University of Illinois Open Source
; License. See LICENSE.TXT for details.
;
;===------------------------------------------------------------------------===;
;
; This is an LLVMBuild description file for the components in this subdirectory.
;
; For more information on the LLVMBuild system, please see:
;
; http://llvm.org/docs/LLVMBuild.html
;
;===------------------------------------------------------------------------===;
[component_0]
type = Library
name = PTXDesc
parent = PTX
required_libraries = MC PTXAsmPrinter PTXInfo Support
add_to_library_groups = PTX

View File

@ -1,16 +0,0 @@
##===- lib/Target/PTX/TargetDesc/Makefile ------------------*- Makefile -*-===##
#
# The LLVM Compiler Infrastructure
#
# This file is distributed under the University of Illinois Open Source
# License. See LICENSE.TXT for details.
#
##===----------------------------------------------------------------------===##
LEVEL = ../../../..
LIBRARYNAME = LLVMPTXDesc
# Hack: we need to include 'main' target directory to grab private headers
CPP.Flags += -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
include $(LEVEL)/Makefile.common

View File

@ -1,134 +0,0 @@
//===-- PTXBaseInfo.h - Top level definitions for PTX -------- --*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains small standalone helper functions and enum definitions for
// the PTX target useful for the compiler back-end and the MC libraries.
// As such, it deliberately does not include references to LLVM core
// code gen types, passes, etc..
//
//===----------------------------------------------------------------------===//
#ifndef PTXBASEINFO_H
#define PTXBASEINFO_H
#include "PTXMCTargetDesc.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
namespace llvm {
namespace PTXStateSpace {
enum {
Global = 0, // default to global state space
Constant = 1,
Local = 2,
Parameter = 3,
Shared = 4
};
} // namespace PTXStateSpace
namespace PTXPredicate {
enum {
Normal = 0,
Negate = 1,
None = 2
};
} // namespace PTXPredicate
/// Namespace to hold all target-specific flags.
namespace PTXRoundingMode {
// Instruction Flags
enum {
// Rounding Mode Flags
RndMask = 15,
RndDefault = 0, // ---
RndNone = 1, // <NONE>
RndNearestEven = 2, // .rn
RndTowardsZero = 3, // .rz
RndNegInf = 4, // .rm
RndPosInf = 5, // .rp
RndApprox = 6, // .approx
RndNearestEvenInt = 7, // .rni
RndTowardsZeroInt = 8, // .rzi
RndNegInfInt = 9, // .rmi
RndPosInfInt = 10 // .rpi
};
} // namespace PTXII
namespace PTXRegisterType {
// Register type encoded in MCOperands
enum {
Pred = 0,
B16,
B32,
B64,
F32,
F64
};
} // namespace PTXRegisterType
namespace PTXRegisterSpace {
// Register space encoded in MCOperands
enum {
Reg = 0,
Local,
Param,
Argument,
Return
};
}
inline static void decodeRegisterName(raw_ostream &OS,
unsigned EncodedReg) {
OS << "%";
unsigned RegSpace = EncodedReg & 0x7;
unsigned RegType = (EncodedReg >> 3) & 0x7;
unsigned RegOffset = EncodedReg >> 6;
switch (RegSpace) {
default:
llvm_unreachable("Unknown register space!");
case PTXRegisterSpace::Reg:
switch (RegType) {
default:
llvm_unreachable("Unknown register type!");
case PTXRegisterType::Pred:
OS << "p";
break;
case PTXRegisterType::B16:
OS << "rh";
break;
case PTXRegisterType::B32:
OS << "r";
break;
case PTXRegisterType::B64:
OS << "rd";
break;
case PTXRegisterType::F32:
OS << "f";
break;
case PTXRegisterType::F64:
OS << "fd";
break;
}
break;
case PTXRegisterSpace::Return:
OS << "ret";
break;
case PTXRegisterSpace::Argument:
OS << "arg";
break;
}
OS << RegOffset;
}
} // namespace llvm
#endif

View File

@ -1,37 +0,0 @@
//===-- PTXMCAsmInfo.cpp - PTX asm properties -----------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the declarations of the PTXMCAsmInfo properties.
//
//===----------------------------------------------------------------------===//
#include "PTXMCAsmInfo.h"
#include "llvm/ADT/Triple.h"
using namespace llvm;
void PTXMCAsmInfo::anchor() { }
PTXMCAsmInfo::PTXMCAsmInfo(const Target &T, const StringRef &TT) {
Triple TheTriple(TT);
if (TheTriple.getArch() == Triple::ptx64)
PointerSize = 8;
CommentString = "//";
PrivateGlobalPrefix = "$L__";
AllowPeriodsInName = false;
HasSetDirective = false;
HasDotTypeDotSizeDirective = false;
HasSingleParameterDotFile = false;
}

View File

@ -1,30 +0,0 @@
//===-- PTXMCAsmInfo.h - PTX asm properties --------------------*- C++ -*--===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the declaration of the PTXMCAsmInfo class.
//
//===----------------------------------------------------------------------===//
#ifndef PTX_MCASM_INFO_H
#define PTX_MCASM_INFO_H
#include "llvm/MC/MCAsmInfo.h"
namespace llvm {
class Target;
class StringRef;
class PTXMCAsmInfo : public MCAsmInfo {
virtual void anchor();
public:
explicit PTXMCAsmInfo(const Target &T, const StringRef &TT);
};
} // namespace llvm
#endif // PTX_MCASM_INFO_H

View File

@ -1,98 +0,0 @@
//===-- PTXMCTargetDesc.cpp - PTX Target Descriptions ---------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file provides PTX specific target descriptions.
//
//===----------------------------------------------------------------------===//
#include "PTXMCTargetDesc.h"
#include "PTXMCAsmInfo.h"
#include "InstPrinter/PTXInstPrinter.h"
#include "llvm/MC/MCCodeGenInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCRegisterInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/TargetRegistry.h"
#define GET_INSTRINFO_MC_DESC
#include "PTXGenInstrInfo.inc"
#define GET_SUBTARGETINFO_MC_DESC
#include "PTXGenSubtargetInfo.inc"
#define GET_REGINFO_MC_DESC
#include "PTXGenRegisterInfo.inc"
using namespace llvm;
static MCInstrInfo *createPTXMCInstrInfo() {
MCInstrInfo *X = new MCInstrInfo();
InitPTXMCInstrInfo(X);
return X;
}
static MCRegisterInfo *createPTXMCRegisterInfo(StringRef TT) {
MCRegisterInfo *X = new MCRegisterInfo();
// PTX does not have a return address register.
InitPTXMCRegisterInfo(X, 0);
return X;
}
static MCSubtargetInfo *createPTXMCSubtargetInfo(StringRef TT, StringRef CPU,
StringRef FS) {
MCSubtargetInfo *X = new MCSubtargetInfo();
InitPTXMCSubtargetInfo(X, TT, CPU, FS);
return X;
}
static MCCodeGenInfo *createPTXMCCodeGenInfo(StringRef TT, Reloc::Model RM,
CodeModel::Model CM,
CodeGenOpt::Level OL) {
MCCodeGenInfo *X = new MCCodeGenInfo();
X->InitMCCodeGenInfo(RM, CM, OL);
return X;
}
static MCInstPrinter *createPTXMCInstPrinter(const Target &T,
unsigned SyntaxVariant,
const MCAsmInfo &MAI,
const MCInstrInfo &MII,
const MCRegisterInfo &MRI,
const MCSubtargetInfo &STI) {
assert(SyntaxVariant == 0 && "We only have one syntax variant");
return new PTXInstPrinter(MAI, MII, MRI, STI);
}
extern "C" void LLVMInitializePTXTargetMC() {
// Register the MC asm info.
RegisterMCAsmInfo<PTXMCAsmInfo> X(ThePTX32Target);
RegisterMCAsmInfo<PTXMCAsmInfo> Y(ThePTX64Target);
// Register the MC codegen info.
TargetRegistry::RegisterMCCodeGenInfo(ThePTX32Target, createPTXMCCodeGenInfo);
TargetRegistry::RegisterMCCodeGenInfo(ThePTX64Target, createPTXMCCodeGenInfo);
// Register the MC instruction info.
TargetRegistry::RegisterMCInstrInfo(ThePTX32Target, createPTXMCInstrInfo);
TargetRegistry::RegisterMCInstrInfo(ThePTX64Target, createPTXMCInstrInfo);
// Register the MC register info.
TargetRegistry::RegisterMCRegInfo(ThePTX32Target, createPTXMCRegisterInfo);
TargetRegistry::RegisterMCRegInfo(ThePTX64Target, createPTXMCRegisterInfo);
// Register the MC subtarget info.
TargetRegistry::RegisterMCSubtargetInfo(ThePTX32Target,
createPTXMCSubtargetInfo);
TargetRegistry::RegisterMCSubtargetInfo(ThePTX64Target,
createPTXMCSubtargetInfo);
// Register the MCInstPrinter.
TargetRegistry::RegisterMCInstPrinter(ThePTX32Target, createPTXMCInstPrinter);
TargetRegistry::RegisterMCInstPrinter(ThePTX64Target, createPTXMCInstPrinter);
}

View File

@ -1,36 +0,0 @@
//===-- PTXMCTargetDesc.h - PTX Target Descriptions ------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file provides PTX specific target descriptions.
//
//===----------------------------------------------------------------------===//
#ifndef PTXMCTARGETDESC_H
#define PTXMCTARGETDESC_H
namespace llvm {
class Target;
extern Target ThePTX32Target;
extern Target ThePTX64Target;
} // End llvm namespace
// Defines symbolic names for PTX registers.
#define GET_REGINFO_ENUM
#include "PTXGenRegisterInfo.inc"
// Defines symbolic names for the PTX instructions.
#define GET_INSTRINFO_ENUM
#include "PTXGenInstrInfo.inc"
#define GET_SUBTARGETINFO_ENUM
#include "PTXGenSubtargetInfo.inc"
#endif

View File

@ -1,23 +0,0 @@
##===- lib/Target/PTX/Makefile -----------------------------*- Makefile -*-===##
#
# The LLVM Compiler Infrastructure
#
# This file is distributed under the University of Illinois Open Source
# License. See LICENSE.TXT for details.
#
##===----------------------------------------------------------------------===##
LEVEL = ../../..
LIBRARYNAME = LLVMPTXCodeGen
TARGET = PTX
# Make sure that tblgen is run, first thing.
BUILT_SOURCES = PTXGenAsmWriter.inc \
PTXGenDAGISel.inc \
PTXGenInstrInfo.inc \
PTXGenRegisterInfo.inc \
PTXGenSubtargetInfo.inc
DIRS = InstPrinter TargetInfo MCTargetDesc
include $(LEVEL)/Makefile.common

View File

@ -1,43 +0,0 @@
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the entry points for global functions defined in the LLVM
// PTX back-end.
//
//===----------------------------------------------------------------------===//
#ifndef PTX_H
#define PTX_H
#include "MCTargetDesc/PTXBaseInfo.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
class MachineInstr;
class MCInst;
class PTXAsmPrinter;
class PTXTargetMachine;
class FunctionPass;
FunctionPass *createPTXISelDag(PTXTargetMachine &TM,
CodeGenOpt::Level OptLevel);
FunctionPass *createPTXMFInfoExtract(PTXTargetMachine &TM,
CodeGenOpt::Level OptLevel);
FunctionPass *createPTXFPRoundingModePass(PTXTargetMachine &TM,
CodeGenOpt::Level OptLevel);
FunctionPass *createPTXRegisterAllocator();
void LowerPTXMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
PTXAsmPrinter &AP);
} // namespace llvm;
#endif // PTX_H

View File

@ -1,141 +0,0 @@
//===-- PTX.td - Describe the PTX Target Machine -----------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
// This is the top level entry point for the PTX target.
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// Target-independent interfaces
//===----------------------------------------------------------------------===//
include "llvm/Target/Target.td"
//===----------------------------------------------------------------------===//
// Subtarget Features
//===----------------------------------------------------------------------===//
//===- Architectural Features ---------------------------------------------===//
def FeatureDouble : SubtargetFeature<"double", "SupportsDouble", "true",
"Do not demote .f64 to .f32">;
def FeatureNoFMA : SubtargetFeature<"no-fma","SupportsFMA", "false",
"Disable Fused-Multiply Add">;
//===- PTX Version --------------------------------------------------------===//
def FeaturePTX20 : SubtargetFeature<"ptx20", "PTXVersion", "PTX_VERSION_2_0",
"Use PTX Language Version 2.0">;
def FeaturePTX21 : SubtargetFeature<"ptx21", "PTXVersion", "PTX_VERSION_2_1",
"Use PTX Language Version 2.1">;
def FeaturePTX22 : SubtargetFeature<"ptx22", "PTXVersion", "PTX_VERSION_2_2",
"Use PTX Language Version 2.2">;
def FeaturePTX23 : SubtargetFeature<"ptx23", "PTXVersion", "PTX_VERSION_2_3",
"Use PTX Language Version 2.3">;
//===- PTX Target ---------------------------------------------------------===//
def FeatureSM10 : SubtargetFeature<"sm10", "PTXTarget", "PTX_SM_1_0",
"Use Shader Model 1.0">;
def FeatureSM11 : SubtargetFeature<"sm11", "PTXTarget", "PTX_SM_1_1",
"Use Shader Model 1.1">;
def FeatureSM12 : SubtargetFeature<"sm12", "PTXTarget", "PTX_SM_1_2",
"Use Shader Model 1.2">;
def FeatureSM13 : SubtargetFeature<"sm13", "PTXTarget", "PTX_SM_1_3",
"Use Shader Model 1.3">;
def FeatureSM20 : SubtargetFeature<"sm20", "PTXTarget", "PTX_SM_2_0",
"Use Shader Model 2.0", [FeatureDouble]>;
def FeatureSM21 : SubtargetFeature<"sm21", "PTXTarget", "PTX_SM_2_1",
"Use Shader Model 2.1", [FeatureDouble]>;
def FeatureSM22 : SubtargetFeature<"sm22", "PTXTarget", "PTX_SM_2_2",
"Use Shader Model 2.2", [FeatureDouble]>;
def FeatureSM23 : SubtargetFeature<"sm23", "PTXTarget", "PTX_SM_2_3",
"Use Shader Model 2.3", [FeatureDouble]>;
def FeatureCOMPUTE10 : SubtargetFeature<"compute10", "PTXTarget",
"PTX_COMPUTE_1_0",
"Use Compute Compatibility 1.0">;
def FeatureCOMPUTE11 : SubtargetFeature<"compute11", "PTXTarget",
"PTX_COMPUTE_1_1",
"Use Compute Compatibility 1.1">;
def FeatureCOMPUTE12 : SubtargetFeature<"compute12", "PTXTarget",
"PTX_COMPUTE_1_2",
"Use Compute Compatibility 1.2">;
def FeatureCOMPUTE13 : SubtargetFeature<"compute13", "PTXTarget",
"PTX_COMPUTE_1_3",
"Use Compute Compatibility 1.3">;
def FeatureCOMPUTE20 : SubtargetFeature<"compute20", "PTXTarget",
"PTX_COMPUTE_2_0",
"Use Compute Compatibility 2.0",
[FeatureDouble]>;
//===----------------------------------------------------------------------===//
// PTX supported processors
//===----------------------------------------------------------------------===//
class Proc<string Name, list<SubtargetFeature> Features>
: Processor<Name, NoItineraries, Features>;
def : Proc<"generic", []>;
// Processor definitions for compute/shader models
def : Proc<"compute_10", [FeatureCOMPUTE10]>;
def : Proc<"compute_11", [FeatureCOMPUTE11]>;
def : Proc<"compute_12", [FeatureCOMPUTE12]>;
def : Proc<"compute_13", [FeatureCOMPUTE13]>;
def : Proc<"compute_20", [FeatureCOMPUTE20]>;
def : Proc<"sm_10", [FeatureSM10]>;
def : Proc<"sm_11", [FeatureSM11]>;
def : Proc<"sm_12", [FeatureSM12]>;
def : Proc<"sm_13", [FeatureSM13]>;
def : Proc<"sm_20", [FeatureSM20]>;
def : Proc<"sm_21", [FeatureSM21]>;
def : Proc<"sm_22", [FeatureSM22]>;
def : Proc<"sm_23", [FeatureSM23]>;
// Processor definitions for common GPU architectures
def : Proc<"g80", [FeatureSM10]>;
def : Proc<"gt200", [FeatureSM13]>;
def : Proc<"gf100", [FeatureSM20, FeatureDouble]>;
def : Proc<"fermi", [FeatureSM20, FeatureDouble]>;
//===----------------------------------------------------------------------===//
// Register File Description
//===----------------------------------------------------------------------===//
include "PTXRegisterInfo.td"
//===----------------------------------------------------------------------===//
// Instruction Descriptions
//===----------------------------------------------------------------------===//
include "PTXInstrInfo.td"
def PTXInstrInfo : InstrInfo;
//===----------------------------------------------------------------------===//
// Assembly printer
//===----------------------------------------------------------------------===//
// PTX uses the MC printer for asm output, so make sure the TableGen
// AsmWriter bits get associated with the correct class.
def PTXAsmWriter : AsmWriter {
string AsmWriterClassName = "InstPrinter";
bit isMCAsmWriter = 1;
}
//===----------------------------------------------------------------------===//
// Target Declaration
//===----------------------------------------------------------------------===//
def PTX : Target {
let InstructionSet = PTXInstrInfo;
let AssemblyWriters = [PTXAsmWriter];
}

View File

@ -1,561 +0,0 @@
//===-- PTXAsmPrinter.cpp - PTX LLVM assembly writer ----------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains a printer that converts from our internal representation
// of machine-dependent LLVM code to PTX assembly language.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "ptx-asm-printer"
#include "PTXAsmPrinter.h"
#include "PTX.h"
#include "PTXMachineFunctionInfo.h"
#include "PTXParamManager.h"
#include "PTXRegisterInfo.h"
#include "PTXTargetMachine.h"
#include "llvm/Argument.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
#include "llvm/Module.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Twine.h"
#include "llvm/Analysis/DebugInfo.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/Mangler.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
static const char PARAM_PREFIX[] = "__param_";
static const char RETURN_PREFIX[] = "__ret_";
static const char *getRegisterTypeName(unsigned RegType) {
switch (RegType) {
default:
llvm_unreachable("Unknown register type");
case PTXRegisterType::Pred:
return ".pred";
case PTXRegisterType::B16:
return ".b16";
case PTXRegisterType::B32:
return ".b32";
case PTXRegisterType::B64:
return ".b64";
case PTXRegisterType::F32:
return ".f32";
case PTXRegisterType::F64:
return ".f64";
}
}
static const char *getStateSpaceName(unsigned addressSpace) {
switch (addressSpace) {
default: llvm_unreachable("Unknown state space");
case PTXStateSpace::Global: return "global";
case PTXStateSpace::Constant: return "const";
case PTXStateSpace::Local: return "local";
case PTXStateSpace::Parameter: return "param";
case PTXStateSpace::Shared: return "shared";
}
}
static const char *getTypeName(Type* type) {
while (true) {
switch (type->getTypeID()) {
default: llvm_unreachable("Unknown type");
case Type::FloatTyID: return ".f32";
case Type::DoubleTyID: return ".f64";
case Type::IntegerTyID:
switch (type->getPrimitiveSizeInBits()) {
default: llvm_unreachable("Unknown integer bit-width");
case 16: return ".u16";
case 32: return ".u32";
case 64: return ".u64";
}
case Type::ArrayTyID:
case Type::PointerTyID:
type = dyn_cast<SequentialType>(type)->getElementType();
break;
}
}
return NULL;
}
bool PTXAsmPrinter::doFinalization(Module &M) {
// XXX Temproarily remove global variables so that doFinalization() will not
// emit them again (global variables are emitted at beginning).
Module::GlobalListType &global_list = M.getGlobalList();
int i, n = global_list.size();
GlobalVariable **gv_array = new GlobalVariable* [n];
// first, back-up GlobalVariable in gv_array
i = 0;
for (Module::global_iterator I = global_list.begin(), E = global_list.end();
I != E; ++I)
gv_array[i++] = &*I;
// second, empty global_list
while (!global_list.empty())
global_list.remove(global_list.begin());
// call doFinalization
bool ret = AsmPrinter::doFinalization(M);
// now we restore global variables
for (i = 0; i < n; i ++)
global_list.insert(global_list.end(), gv_array[i]);
delete[] gv_array;
return ret;
}
void PTXAsmPrinter::EmitStartOfAsmFile(Module &M)
{
const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>();
// Emit the PTX .version and .target attributes
OutStreamer.EmitRawText(Twine("\t.version ") + ST.getPTXVersionString());
OutStreamer.EmitRawText(Twine("\t.target ") + ST.getTargetString() +
(ST.supportsDouble() ? ""
: ", map_f64_to_f32"));
// .address_size directive is optional, but it must immediately follow
// the .target directive if present within a module
if (ST.supportsPTX23()) {
const char *addrSize = ST.is64Bit() ? "64" : "32";
OutStreamer.EmitRawText(Twine("\t.address_size ") + addrSize);
}
OutStreamer.AddBlankLine();
// Define any .file directives
DebugInfoFinder DbgFinder;
DbgFinder.processModule(M);
for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(),
E = DbgFinder.compile_unit_end(); I != E; ++I) {
DICompileUnit DIUnit(*I);
StringRef FN = DIUnit.getFilename();
StringRef Dir = DIUnit.getDirectory();
GetOrCreateSourceID(FN, Dir);
}
OutStreamer.AddBlankLine();
// declare external functions
for (Module::const_iterator i = M.begin(), e = M.end();
i != e; ++i)
EmitFunctionDeclaration(i);
// declare global variables
for (Module::const_global_iterator i = M.global_begin(), e = M.global_end();
i != e; ++i)
EmitVariableDeclaration(i);
}
void PTXAsmPrinter::EmitFunctionBodyStart() {
OutStreamer.EmitRawText(Twine("{"));
const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>();
const PTXParamManager &PM = MFI->getParamManager();
// Print register definitions
SmallString<128> regDefs;
raw_svector_ostream os(regDefs);
unsigned numRegs;
// pred
numRegs = MFI->countRegisters(PTXRegisterType::Pred, PTXRegisterSpace::Reg);
if(numRegs > 0)
os << "\t.reg .pred %p<" << numRegs << ">;\n";
// i16
numRegs = MFI->countRegisters(PTXRegisterType::B16, PTXRegisterSpace::Reg);
if(numRegs > 0)
os << "\t.reg .b16 %rh<" << numRegs << ">;\n";
// i32
numRegs = MFI->countRegisters(PTXRegisterType::B32, PTXRegisterSpace::Reg);
if(numRegs > 0)
os << "\t.reg .b32 %r<" << numRegs << ">;\n";
// i64
numRegs = MFI->countRegisters(PTXRegisterType::B64, PTXRegisterSpace::Reg);
if(numRegs > 0)
os << "\t.reg .b64 %rd<" << numRegs << ">;\n";
// f32
numRegs = MFI->countRegisters(PTXRegisterType::F32, PTXRegisterSpace::Reg);
if(numRegs > 0)
os << "\t.reg .f32 %f<" << numRegs << ">;\n";
// f64
numRegs = MFI->countRegisters(PTXRegisterType::F64, PTXRegisterSpace::Reg);
if(numRegs > 0)
os << "\t.reg .f64 %fd<" << numRegs << ">;\n";
// Local params
for (PTXParamManager::param_iterator i = PM.local_begin(), e = PM.local_end();
i != e; ++i)
os << "\t.param .b" << PM.getParamSize(*i) << ' ' << PM.getParamName(*i)
<< ";\n";
OutStreamer.EmitRawText(os.str());
const MachineFrameInfo* FrameInfo = MF->getFrameInfo();
DEBUG(dbgs() << "Have " << FrameInfo->getNumObjects()
<< " frame object(s)\n");
for (unsigned i = 0, e = FrameInfo->getNumObjects(); i != e; ++i) {
DEBUG(dbgs() << "Size of object: " << FrameInfo->getObjectSize(i) << "\n");
if (FrameInfo->getObjectSize(i) > 0) {
OutStreamer.EmitRawText("\t.local .align " +
Twine(FrameInfo->getObjectAlignment(i)) +
" .b8 __local" +
Twine(i) +
"[" +
Twine(FrameInfo->getObjectSize(i)) +
"];");
}
}
//unsigned Index = 1;
// Print parameter passing params
//for (PTXMachineFunctionInfo::param_iterator
// i = MFI->paramBegin(), e = MFI->paramEnd(); i != e; ++i) {
// std::string def = "\t.param .b";
// def += utostr(*i);
// def += " __ret_";
// def += utostr(Index);
// Index++;
// def += ";";
// OutStreamer.EmitRawText(Twine(def));
//}
}
void PTXAsmPrinter::EmitFunctionBodyEnd() {
OutStreamer.EmitRawText(Twine("}"));
}
void PTXAsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCInst TmpInst;
LowerPTXMachineInstrToMCInst(MI, TmpInst, *this);
OutStreamer.EmitInstruction(TmpInst);
}
void PTXAsmPrinter::EmitVariableDeclaration(const GlobalVariable *gv) {
// Check to see if this is a special global used by LLVM, if so, emit it.
if (EmitSpecialLLVMGlobal(gv))
return;
MCSymbol *gvsym = Mang->getSymbol(gv);
assert(gvsym->isUndefined() && "Cannot define a symbol twice!");
SmallString<128> decl;
raw_svector_ostream os(decl);
// check if it is defined in some other translation unit
if (gv->isDeclaration())
os << ".extern ";
// state space: e.g., .global
os << '.' << getStateSpaceName(gv->getType()->getAddressSpace()) << ' ';
// alignment (optional)
unsigned alignment = gv->getAlignment();
if (alignment != 0)
os << ".align " << gv->getAlignment() << ' ';
if (PointerType::classof(gv->getType())) {
PointerType* pointerTy = dyn_cast<PointerType>(gv->getType());
Type* elementTy = pointerTy->getElementType();
if (elementTy->isArrayTy()) {
assert(elementTy->isArrayTy() && "Only pointers to arrays are supported");
ArrayType* arrayTy = dyn_cast<ArrayType>(elementTy);
elementTy = arrayTy->getElementType();
unsigned numElements = arrayTy->getNumElements();
while (elementTy->isArrayTy()) {
arrayTy = dyn_cast<ArrayType>(elementTy);
elementTy = arrayTy->getElementType();
numElements *= arrayTy->getNumElements();
}
// FIXME: isPrimitiveType() == false for i16?
assert(elementTy->isSingleValueType() &&
"Non-primitive types are not handled");
// Find the size of the element in bits
unsigned elementSize = elementTy->getPrimitiveSizeInBits();
os << ".b" << elementSize << ' ' << gvsym->getName()
<< '[' << numElements << ']';
} else {
os << ".b8" << gvsym->getName() << "[]";
}
// handle string constants (assume ConstantArray means string)
if (gv->hasInitializer()) {
const Constant *C = gv->getInitializer();
if (const ConstantArray *CA = dyn_cast<ConstantArray>(C)) {
os << " = {";
for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
if (i > 0)
os << ',';
os << "0x";
os.write_hex(cast<ConstantInt>(CA->getOperand(i))->getZExtValue());
}
os << '}';
}
}
} else {
// Note: this is currently the fall-through case and most likely generates
// incorrect code.
os << getTypeName(gv->getType()) << ' ' << gvsym->getName();
if (isa<ArrayType>(gv->getType()) || isa<PointerType>(gv->getType()))
os << "[]";
}
os << ';';
OutStreamer.EmitRawText(os.str());
OutStreamer.AddBlankLine();
}
void PTXAsmPrinter::EmitFunctionEntryLabel() {
// The function label could have already been emitted if two symbols end up
// conflicting due to asm renaming. Detect this and emit an error.
if (!CurrentFnSym->isUndefined())
report_fatal_error("'" + Twine(CurrentFnSym->getName()) +
"' label emitted multiple times to assembly file");
const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>();
const PTXParamManager &PM = MFI->getParamManager();
const bool isKernel = MFI->isKernel();
const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>();
SmallString<128> decl;
raw_svector_ostream os(decl);
os << (isKernel ? ".entry" : ".func");
if (!isKernel) {
os << " (";
if (ST.useParamSpaceForDeviceArgs()) {
for (PTXParamManager::param_iterator i = PM.ret_begin(), e = PM.ret_end(),
b = i; i != e; ++i) {
if (i != b)
os << ", ";
os << ".param .b" << PM.getParamSize(*i) << ' ' << PM.getParamName(*i);
}
} else {
for (PTXMachineFunctionInfo::reg_iterator
i = MFI->retreg_begin(), e = MFI->retreg_end(), b = i;
i != e; ++i) {
if (i != b)
os << ", ";
os << ".reg " << getRegisterTypeName(MFI->getRegisterType(*i)) << ' '
<< MFI->getRegisterName(*i);
}
}
os << ')';
}
// Print function name
os << ' ' << CurrentFnSym->getName() << " (";
const Function *F = MF->getFunction();
// Print parameters
if (isKernel || ST.useParamSpaceForDeviceArgs()) {
/*for (PTXParamManager::param_iterator i = PM.arg_begin(), e = PM.arg_end(),
b = i; i != e; ++i) {
if (i != b)
os << ", ";
os << ".param .b" << PM.getParamSize(*i) << ' ' << PM.getParamName(*i);
}*/
int Counter = 1;
for (Function::const_arg_iterator i = F->arg_begin(), e = F->arg_end(),
b = i; i != e; ++i) {
if (i != b)
os << ", ";
const Type *ArgType = (*i).getType();
os << ".param .b";
if (ArgType->isPointerTy()) {
if (ST.is64Bit())
os << "64";
else
os << "32";
} else {
os << ArgType->getPrimitiveSizeInBits();
}
if (ArgType->isPointerTy() && ST.emitPtrAttribute()) {
const PointerType *PtrType = dyn_cast<const PointerType>(ArgType);
os << " .ptr";
switch (PtrType->getAddressSpace()) {
default:
llvm_unreachable("Unknown address space in argument");
case PTXStateSpace::Global:
os << " .global";
break;
case PTXStateSpace::Shared:
os << " .shared";
break;
}
}
os << " __param_" << Counter++;
}
} else {
for (PTXMachineFunctionInfo::reg_iterator
i = MFI->argreg_begin(), e = MFI->argreg_end(), b = i;
i != e; ++i) {
if (i != b)
os << ", ";
os << ".reg " << getRegisterTypeName(MFI->getRegisterType(*i)) << ' '
<< MFI->getRegisterName(*i);
}
}
os << ')';
OutStreamer.EmitRawText(os.str());
}
void PTXAsmPrinter::EmitFunctionDeclaration(const Function* func)
{
const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>();
std::string decl = "";
// hard-coded emission of extern vprintf function
if (func->getName() == "printf" || func->getName() == "puts") {
decl += ".extern .func (.param .b32 __param_1) vprintf (.param .b";
if (ST.is64Bit())
decl += "64";
else
decl += "32";
decl += " __param_2, .param .b";
if (ST.is64Bit())
decl += "64";
else
decl += "32";
decl += " __param_3)\n";
}
OutStreamer.EmitRawText(Twine(decl));
}
unsigned PTXAsmPrinter::GetOrCreateSourceID(StringRef FileName,
StringRef DirName) {
// If FE did not provide a file name, then assume stdin.
if (FileName.empty())
return GetOrCreateSourceID("<stdin>", StringRef());
// MCStream expects full path name as filename.
if (!DirName.empty() && !sys::path::is_absolute(FileName)) {
SmallString<128> FullPathName = DirName;
sys::path::append(FullPathName, FileName);
// Here FullPathName will be copied into StringMap by GetOrCreateSourceID.
return GetOrCreateSourceID(StringRef(FullPathName), StringRef());
}
StringMapEntry<unsigned> &Entry = SourceIdMap.GetOrCreateValue(FileName);
if (Entry.getValue())
return Entry.getValue();
unsigned SrcId = SourceIdMap.size();
Entry.setValue(SrcId);
// Print out a .file directive to specify files for .loc directives.
OutStreamer.EmitDwarfFileDirective(SrcId, "", Entry.getKey());
return SrcId;
}
MCOperand PTXAsmPrinter::GetSymbolRef(const MachineOperand &MO,
const MCSymbol *Symbol) {
const MCExpr *Expr;
Expr = MCSymbolRefExpr::Create(Symbol, MCSymbolRefExpr::VK_None, OutContext);
return MCOperand::CreateExpr(Expr);
}
MCOperand PTXAsmPrinter::lowerOperand(const MachineOperand &MO) {
MCOperand MCOp;
const PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>();
unsigned EncodedReg;
switch (MO.getType()) {
default:
llvm_unreachable("Unknown operand type");
case MachineOperand::MO_Register:
if (MO.getReg() > 0) {
// Encode the register
EncodedReg = MFI->getEncodedRegister(MO.getReg());
} else {
EncodedReg = 0;
}
MCOp = MCOperand::CreateReg(EncodedReg);
break;
case MachineOperand::MO_Immediate:
MCOp = MCOperand::CreateImm(MO.getImm());
break;
case MachineOperand::MO_MachineBasicBlock:
MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create(
MO.getMBB()->getSymbol(), OutContext));
break;
case MachineOperand::MO_GlobalAddress:
MCOp = GetSymbolRef(MO, Mang->getSymbol(MO.getGlobal()));
break;
case MachineOperand::MO_ExternalSymbol:
MCOp = GetSymbolRef(MO, GetExternalSymbolSymbol(MO.getSymbolName()));
break;
case MachineOperand::MO_FPImmediate:
APFloat Val = MO.getFPImm()->getValueAPF();
bool ignored;
Val.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &ignored);
MCOp = MCOperand::CreateFPImm(Val.convertToDouble());
break;
}
return MCOp;
}
// Force static initialization.
extern "C" void LLVMInitializePTXAsmPrinter() {
RegisterAsmPrinter<PTXAsmPrinter> X(ThePTX32Target);
RegisterAsmPrinter<PTXAsmPrinter> Y(ThePTX64Target);
}

View File

@ -1,57 +0,0 @@
//===-- PTXAsmPrinter.h - Print machine code to a PTX file ------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// PTX Assembly printer class.
//
//===----------------------------------------------------------------------===//
#ifndef PTXASMPRINTER_H
#define PTXASMPRINTER_H
#include "PTX.h"
#include "PTXTargetMachine.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/Support/Compiler.h"
namespace llvm {
class MCOperand;
class LLVM_LIBRARY_VISIBILITY PTXAsmPrinter : public AsmPrinter {
public:
explicit PTXAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
: AsmPrinter(TM, Streamer) {}
const char *getPassName() const { return "PTX Assembly Printer"; }
bool doFinalization(Module &M);
virtual void EmitStartOfAsmFile(Module &M);
virtual void EmitFunctionBodyStart();
virtual void EmitFunctionBodyEnd();
virtual void EmitFunctionEntryLabel();
virtual void EmitInstruction(const MachineInstr *MI);
unsigned GetOrCreateSourceID(StringRef FileName,
StringRef DirName);
MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol);
MCOperand lowerOperand(const MachineOperand &MO);
private:
void EmitVariableDeclaration(const GlobalVariable *gv);
void EmitFunctionDeclaration(const Function* func);
StringMap<unsigned> SourceIdMap;
}; // class PTXAsmPrinter
} // namespace llvm
#endif

View File

@ -1,181 +0,0 @@
//===-- PTXFPRoundingModePass.cpp - Assign rounding modes pass ------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines a machine function pass that sets appropriate FP rounding
// modes for all relevant instructions.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "ptx-fp-rounding-mode"
#include "PTX.h"
#include "PTXTargetMachine.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
// NOTE: PTXFPRoundingModePass should be executed just before emission.
namespace {
/// PTXFPRoundingModePass - Pass to assign appropriate FP rounding modes to
/// all FP instructions. Essentially, this pass just looks for all FP
/// instructions that have a rounding mode set to RndDefault, and sets an
/// appropriate rounding mode based on the target device.
///
class PTXFPRoundingModePass : public MachineFunctionPass {
private:
static char ID;
typedef std::pair<unsigned, unsigned> RndModeDesc;
PTXTargetMachine& TargetMachine;
DenseMap<unsigned, RndModeDesc> Instrs;
public:
PTXFPRoundingModePass(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel)
: MachineFunctionPass(ID),
TargetMachine(TM) {
initializeMap();
}
virtual bool runOnMachineFunction(MachineFunction &MF);
virtual const char *getPassName() const {
return "PTX FP Rounding Mode Pass";
}
private:
void initializeMap();
void processInstruction(MachineInstr &MI);
}; // class PTXFPRoundingModePass
} // end anonymous namespace
using namespace llvm;
char PTXFPRoundingModePass::ID = 0;
bool PTXFPRoundingModePass::runOnMachineFunction(MachineFunction &MF) {
// Look at each basic block
for (MachineFunction::iterator bbi = MF.begin(), bbe = MF.end(); bbi != bbe;
++bbi) {
MachineBasicBlock &MBB = *bbi;
// Look at each instruction
for (MachineBasicBlock::iterator ii = MBB.begin(), ie = MBB.end();
ii != ie; ++ii) {
MachineInstr &MI = *ii;
processInstruction(MI);
}
}
return false;
}
void PTXFPRoundingModePass::initializeMap() {
using namespace PTXRoundingMode;
const PTXSubtarget& ST = TargetMachine.getSubtarget<PTXSubtarget>();
// Build a map of default rounding mode for all instructions that need a
// rounding mode.
Instrs[PTX::FADDrr32] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::FADDri32] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::FADDrr64] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::FADDri64] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::FSUBrr32] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::FSUBri32] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::FSUBrr64] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::FSUBri64] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::FMULrr32] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::FMULri32] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::FMULrr64] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::FMULri64] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::FNEGrr32] = std::make_pair(1U, (unsigned)RndNone);
Instrs[PTX::FNEGri32] = std::make_pair(1U, (unsigned)RndNone);
Instrs[PTX::FNEGrr64] = std::make_pair(1U, (unsigned)RndNone);
Instrs[PTX::FNEGri64] = std::make_pair(1U, (unsigned)RndNone);
unsigned FDivRndMode = ST.fdivNeedsRoundingMode() ? RndNearestEven : RndNone;
Instrs[PTX::FDIVrr32] = std::make_pair(1U, FDivRndMode);
Instrs[PTX::FDIVri32] = std::make_pair(1U, FDivRndMode);
Instrs[PTX::FDIVrr64] = std::make_pair(1U, FDivRndMode);
Instrs[PTX::FDIVri64] = std::make_pair(1U, FDivRndMode);
unsigned FMADRndMode = ST.fmadNeedsRoundingMode() ? RndNearestEven : RndNone;
Instrs[PTX::FMADrrr32] = std::make_pair(1U, FMADRndMode);
Instrs[PTX::FMADrri32] = std::make_pair(1U, FMADRndMode);
Instrs[PTX::FMADrii32] = std::make_pair(1U, FMADRndMode);
Instrs[PTX::FMADrrr64] = std::make_pair(1U, FMADRndMode);
Instrs[PTX::FMADrri64] = std::make_pair(1U, FMADRndMode);
Instrs[PTX::FMADrii64] = std::make_pair(1U, FMADRndMode);
Instrs[PTX::FSQRTrr32] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::FSQRTri32] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::FSQRTrr64] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::FSQRTri64] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::FSINrr32] = std::make_pair(1U, (unsigned)RndApprox);
Instrs[PTX::FSINri32] = std::make_pair(1U, (unsigned)RndApprox);
Instrs[PTX::FSINrr64] = std::make_pair(1U, (unsigned)RndApprox);
Instrs[PTX::FSINri64] = std::make_pair(1U, (unsigned)RndApprox);
Instrs[PTX::FCOSrr32] = std::make_pair(1U, (unsigned)RndApprox);
Instrs[PTX::FCOSri32] = std::make_pair(1U, (unsigned)RndApprox);
Instrs[PTX::FCOSrr64] = std::make_pair(1U, (unsigned)RndApprox);
Instrs[PTX::FCOSri64] = std::make_pair(1U, (unsigned)RndApprox);
Instrs[PTX::CVTu16f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
Instrs[PTX::CVTs16f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
Instrs[PTX::CVTu16f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
Instrs[PTX::CVTs16f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
Instrs[PTX::CVTu32f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
Instrs[PTX::CVTs32f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
Instrs[PTX::CVTu32f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
Instrs[PTX::CVTs32f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
Instrs[PTX::CVTu64f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
Instrs[PTX::CVTs64f32] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
Instrs[PTX::CVTu64f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
Instrs[PTX::CVTs64f64] = std::make_pair(1U, (unsigned)RndTowardsZeroInt);
Instrs[PTX::CVTf32u16] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::CVTf32s16] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::CVTf32u32] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::CVTf32s32] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::CVTf32u64] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::CVTf32s64] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::CVTf32f64] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::CVTf64u16] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::CVTf64s16] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::CVTf64u32] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::CVTf64s32] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::CVTf64u64] = std::make_pair(1U, (unsigned)RndNearestEven);
Instrs[PTX::CVTf64s64] = std::make_pair(1U, (unsigned)RndNearestEven);
}
void PTXFPRoundingModePass::processInstruction(MachineInstr &MI) {
// Is this an instruction that needs a rounding mode?
if (Instrs.count(MI.getOpcode())) {
const RndModeDesc &Desc = Instrs[MI.getOpcode()];
// Get the rounding mode operand
MachineOperand &Op = MI.getOperand(Desc.first);
// Update the rounding mode if needed
if (Op.getImm() == PTXRoundingMode::RndDefault) {
Op.setImm(Desc.second);
}
}
}
FunctionPass *llvm::createPTXFPRoundingModePass(PTXTargetMachine &TM,
CodeGenOpt::Level OptLevel) {
return new PTXFPRoundingModePass(TM, OptLevel);
}

View File

@ -1,24 +0,0 @@
//===-- PTXFrameLowering.cpp - PTX Frame Information ----------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the PTX implementation of TargetFrameLowering class.
//
//===----------------------------------------------------------------------===//
#include "PTXFrameLowering.h"
#include "llvm/CodeGen/MachineFunction.h"
using namespace llvm;
void PTXFrameLowering::emitPrologue(MachineFunction &MF) const {
}
void PTXFrameLowering::emitEpilogue(MachineFunction &MF,
MachineBasicBlock &MBB) const {
}

View File

@ -1,44 +0,0 @@
//===-- PTXFrameLowering.h - Define frame lowering for PTX -----*- C++ -*--===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
//
//
//===----------------------------------------------------------------------===//
#ifndef PTX_FRAMEINFO_H
#define PTX_FRAMEINFO_H
#include "PTX.h"
#include "PTXSubtarget.h"
#include "llvm/Target/TargetFrameLowering.h"
namespace llvm {
class PTXSubtarget;
class PTXFrameLowering : public TargetFrameLowering {
protected:
const PTXSubtarget &STI;
public:
explicit PTXFrameLowering(const PTXSubtarget &sti)
: TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 2, -2),
STI(sti) {
}
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
void emitPrologue(MachineFunction &MF) const;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
bool hasFP(const MachineFunction &MF) const { return false; }
};
} // End llvm namespace
#endif

View File

@ -1,356 +0,0 @@
//===-- PTXISelDAGToDAG.cpp - A dag to dag inst selector for PTX ----------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines an instruction selector for the PTX target.
//
//===----------------------------------------------------------------------===//
#include "PTX.h"
#include "PTXMachineFunctionInfo.h"
#include "PTXTargetMachine.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
namespace {
// PTXDAGToDAGISel - PTX specific code to select PTX machine
// instructions for SelectionDAG operations.
class PTXDAGToDAGISel : public SelectionDAGISel {
public:
PTXDAGToDAGISel(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel);
virtual const char *getPassName() const {
return "PTX DAG->DAG Pattern Instruction Selection";
}
SDNode *Select(SDNode *Node);
// Complex Pattern Selectors.
bool SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2);
bool SelectADDRri(SDValue &Addr, SDValue &Base, SDValue &Offset);
bool SelectADDRii(SDValue &Addr, SDValue &Base, SDValue &Offset);
bool SelectADDRlocal(SDValue &Addr, SDValue &Base, SDValue &Offset);
// Include the pieces auto'gened from the target description
#include "PTXGenDAGISel.inc"
private:
// We need this only because we can't match intruction BRAdp
// pattern (PTXbrcond bb:$d, ...) in PTXInstrInfo.td
SDNode *SelectBRCOND(SDNode *Node);
SDNode *SelectREADPARAM(SDNode *Node);
SDNode *SelectWRITEPARAM(SDNode *Node);
SDNode *SelectFrameIndex(SDNode *Node);
bool isImm(const SDValue &operand);
bool SelectImm(const SDValue &operand, SDValue &imm);
const PTXSubtarget& getSubtarget() const;
}; // class PTXDAGToDAGISel
} // namespace
// createPTXISelDag - This pass converts a legalized DAG into a
// PTX-specific DAG, ready for instruction scheduling
FunctionPass *llvm::createPTXISelDag(PTXTargetMachine &TM,
CodeGenOpt::Level OptLevel) {
return new PTXDAGToDAGISel(TM, OptLevel);
}
PTXDAGToDAGISel::PTXDAGToDAGISel(PTXTargetMachine &TM,
CodeGenOpt::Level OptLevel)
: SelectionDAGISel(TM, OptLevel) {}
SDNode *PTXDAGToDAGISel::Select(SDNode *Node) {
switch (Node->getOpcode()) {
case ISD::BRCOND:
return SelectBRCOND(Node);
case PTXISD::READ_PARAM:
return SelectREADPARAM(Node);
case PTXISD::WRITE_PARAM:
return SelectWRITEPARAM(Node);
case ISD::FrameIndex:
return SelectFrameIndex(Node);
default:
return SelectCode(Node);
}
}
SDNode *PTXDAGToDAGISel::SelectBRCOND(SDNode *Node) {
assert(Node->getNumOperands() >= 3);
SDValue Chain = Node->getOperand(0);
SDValue Pred = Node->getOperand(1);
SDValue Target = Node->getOperand(2); // branch target
SDValue PredOp = CurDAG->getTargetConstant(PTXPredicate::Normal, MVT::i32);
DebugLoc dl = Node->getDebugLoc();
assert(Target.getOpcode() == ISD::BasicBlock);
assert(Pred.getValueType() == MVT::i1);
// Emit BRAdp
SDValue Ops[] = { Target, Pred, PredOp, Chain };
return CurDAG->getMachineNode(PTX::BRAdp, dl, MVT::Other, Ops, 4);
}
SDNode *PTXDAGToDAGISel::SelectREADPARAM(SDNode *Node) {
SDValue Chain = Node->getOperand(0);
SDValue Index = Node->getOperand(1);
int OpCode;
// Get the type of parameter we are reading
EVT VT = Node->getValueType(0);
assert(VT.isSimple() && "READ_PARAM only implemented for MVT types");
MVT Type = VT.getSimpleVT();
if (Type == MVT::i1)
OpCode = PTX::READPARAMPRED;
else if (Type == MVT::i16)
OpCode = PTX::READPARAMI16;
else if (Type == MVT::i32)
OpCode = PTX::READPARAMI32;
else if (Type == MVT::i64)
OpCode = PTX::READPARAMI64;
else if (Type == MVT::f32)
OpCode = PTX::READPARAMF32;
else {
assert(Type == MVT::f64 && "Unexpected type!");
OpCode = PTX::READPARAMF64;
}
SDValue Pred = CurDAG->getRegister(PTX::NoRegister, MVT::i1);
SDValue PredOp = CurDAG->getTargetConstant(PTXPredicate::None, MVT::i32);
DebugLoc dl = Node->getDebugLoc();
SDValue Ops[] = { Index, Pred, PredOp, Chain };
return CurDAG->getMachineNode(OpCode, dl, VT, Ops, 4);
}
SDNode *PTXDAGToDAGISel::SelectWRITEPARAM(SDNode *Node) {
SDValue Chain = Node->getOperand(0);
SDValue Value = Node->getOperand(1);
int OpCode;
//Node->dumpr(CurDAG);
// Get the type of parameter we are writing
EVT VT = Value->getValueType(0);
assert(VT.isSimple() && "WRITE_PARAM only implemented for MVT types");
MVT Type = VT.getSimpleVT();
if (Type == MVT::i1)
OpCode = PTX::WRITEPARAMPRED;
else if (Type == MVT::i16)
OpCode = PTX::WRITEPARAMI16;
else if (Type == MVT::i32)
OpCode = PTX::WRITEPARAMI32;
else if (Type == MVT::i64)
OpCode = PTX::WRITEPARAMI64;
else if (Type == MVT::f32)
OpCode = PTX::WRITEPARAMF32;
else if (Type == MVT::f64)
OpCode = PTX::WRITEPARAMF64;
else
llvm_unreachable("Invalid type in SelectWRITEPARAM");
SDValue Pred = CurDAG->getRegister(PTX::NoRegister, MVT::i1);
SDValue PredOp = CurDAG->getTargetConstant(PTXPredicate::None, MVT::i32);
DebugLoc dl = Node->getDebugLoc();
SDValue Ops[] = { Value, Pred, PredOp, Chain };
SDNode* Ret = CurDAG->getMachineNode(OpCode, dl, MVT::Other, Ops, 4);
//dbgs() << "SelectWRITEPARAM produced:\n\t";
//Ret->dumpr(CurDAG);
return Ret;
}
SDNode *PTXDAGToDAGISel::SelectFrameIndex(SDNode *Node) {
int FI = cast<FrameIndexSDNode>(Node)->getIndex();
//dbgs() << "Selecting FrameIndex at index " << FI << "\n";
//SDValue TFI = CurDAG->getTargetFrameIndex(FI, Node->getValueType(0));
PTXMachineFunctionInfo *MFI = MF->getInfo<PTXMachineFunctionInfo>();
SDValue FrameSymbol = CurDAG->getTargetExternalSymbol(MFI->getFrameSymbol(FI),
Node->getValueType(0));
return FrameSymbol.getNode();
}
// Match memory operand of the form [reg+reg]
bool PTXDAGToDAGISel::SelectADDRrr(SDValue &Addr, SDValue &R1, SDValue &R2) {
if (Addr.getOpcode() != ISD::ADD || Addr.getNumOperands() < 2 ||
isImm(Addr.getOperand(0)) || isImm(Addr.getOperand(1)))
return false;
assert(Addr.getValueType().isSimple() && "Type must be simple");
R1 = Addr;
R2 = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
return true;
}
// Match memory operand of the form [reg], [imm+reg], and [reg+imm]
bool PTXDAGToDAGISel::SelectADDRri(SDValue &Addr, SDValue &Base,
SDValue &Offset) {
// FrameIndex addresses are handled separately
//errs() << "SelectADDRri: ";
//Addr.getNode()->dumpr();
if (isa<FrameIndexSDNode>(Addr)) {
//errs() << "Failure\n";
return false;
}
if (CurDAG->isBaseWithConstantOffset(Addr)) {
Base = Addr.getOperand(0);
if (isa<FrameIndexSDNode>(Base)) {
//errs() << "Failure\n";
return false;
}
ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
//errs() << "Success\n";
return true;
}
/*if (Addr.getNumOperands() == 1) {
Base = Addr;
Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
errs() << "Success\n";
return true;
}*/
//errs() << "SelectADDRri fails on: ";
//Addr.getNode()->dumpr();
if (isImm(Addr)) {
//errs() << "Failure\n";
return false;
}
Base = Addr;
Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
//errs() << "Success\n";
return true;
/*if (Addr.getOpcode() != ISD::ADD) {
// let SelectADDRii handle the [imm] case
if (isImm(Addr))
return false;
// it is [reg]
assert(Addr.getValueType().isSimple() && "Type must be simple");
Base = Addr;
Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
return true;
}
if (Addr.getNumOperands() < 2)
return false;
// let SelectADDRii handle the [imm+imm] case
if (isImm(Addr.getOperand(0)) && isImm(Addr.getOperand(1)))
return false;
// try [reg+imm] and [imm+reg]
for (int i = 0; i < 2; i ++)
if (SelectImm(Addr.getOperand(1-i), Offset)) {
Base = Addr.getOperand(i);
return true;
}
// neither [reg+imm] nor [imm+reg]
return false;*/
}
// Match memory operand of the form [imm+imm] and [imm]
bool PTXDAGToDAGISel::SelectADDRii(SDValue &Addr, SDValue &Base,
SDValue &Offset) {
// is [imm+imm]?
if (Addr.getOpcode() == ISD::ADD) {
return SelectImm(Addr.getOperand(0), Base) &&
SelectImm(Addr.getOperand(1), Offset);
}
// is [imm]?
if (SelectImm(Addr, Base)) {
assert(Addr.getValueType().isSimple() && "Type must be simple");
Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
return true;
}
return false;
}
// Match memory operand of the form [reg], [imm+reg], and [reg+imm]
bool PTXDAGToDAGISel::SelectADDRlocal(SDValue &Addr, SDValue &Base,
SDValue &Offset) {
//errs() << "SelectADDRlocal: ";
//Addr.getNode()->dumpr();
if (isa<FrameIndexSDNode>(Addr)) {
Base = Addr;
Offset = CurDAG->getTargetConstant(0, Addr.getValueType().getSimpleVT());
//errs() << "Success\n";
return true;
}
if (CurDAG->isBaseWithConstantOffset(Addr)) {
Base = Addr.getOperand(0);
if (!isa<FrameIndexSDNode>(Base)) {
//errs() << "Failure\n";
return false;
}
ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1));
Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
//errs() << "Offset: ";
//Offset.getNode()->dumpr();
//errs() << "Success\n";
return true;
}
//errs() << "Failure\n";
return false;
}
bool PTXDAGToDAGISel::isImm(const SDValue &operand) {
return ConstantSDNode::classof(operand.getNode());
}
bool PTXDAGToDAGISel::SelectImm(const SDValue &operand, SDValue &imm) {
SDNode *node = operand.getNode();
if (!ConstantSDNode::classof(node))
return false;
ConstantSDNode *CN = cast<ConstantSDNode>(node);
imm = CurDAG->getTargetConstant(*CN->getConstantIntValue(),
operand.getValueType());
return true;
}
const PTXSubtarget& PTXDAGToDAGISel::getSubtarget() const
{
return TM.getSubtarget<PTXSubtarget>();
}

View File

@ -1,516 +0,0 @@
//===-- PTXISelLowering.cpp - PTX DAG Lowering Implementation -------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the PTXTargetLowering class.
//
//===----------------------------------------------------------------------===//
#include "PTXISelLowering.h"
#include "PTX.h"
#include "PTXMachineFunctionInfo.h"
#include "PTXRegisterInfo.h"
#include "PTXSubtarget.h"
#include "llvm/Function.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/CodeGen/CallingConvLower.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
//===----------------------------------------------------------------------===//
// TargetLowering Implementation
//===----------------------------------------------------------------------===//
PTXTargetLowering::PTXTargetLowering(TargetMachine &TM)
: TargetLowering(TM, new TargetLoweringObjectFileELF()) {
// Set up the register classes.
addRegisterClass(MVT::i1, &PTX::RegPredRegClass);
addRegisterClass(MVT::i16, &PTX::RegI16RegClass);
addRegisterClass(MVT::i32, &PTX::RegI32RegClass);
addRegisterClass(MVT::i64, &PTX::RegI64RegClass);
addRegisterClass(MVT::f32, &PTX::RegF32RegClass);
addRegisterClass(MVT::f64, &PTX::RegF64RegClass);
setBooleanContents(ZeroOrOneBooleanContent);
setBooleanVectorContents(ZeroOrOneBooleanContent); // FIXME: Is this correct?
setMinFunctionAlignment(2);
// Let LLVM use loads/stores for all mem* operations
maxStoresPerMemcpy = 4096;
maxStoresPerMemmove = 4096;
maxStoresPerMemset = 4096;
////////////////////////////////////
/////////// Expansion //////////////
////////////////////////////////////
// (any/zero/sign) extload => load + (any/zero/sign) extend
setLoadExtAction(ISD::EXTLOAD, MVT::i16, Expand);
setLoadExtAction(ISD::ZEXTLOAD, MVT::i16, Expand);
setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand);
// f32 extload => load + fextend
setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
// f64 truncstore => trunc + store
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
// sign_extend_inreg => sign_extend
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
// br_cc => brcond
setOperationAction(ISD::BR_CC, MVT::Other, Expand);
// select_cc => setcc
setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f32, Expand);
setOperationAction(ISD::SELECT_CC, MVT::f64, Expand);
////////////////////////////////////
//////////// Legal /////////////////
////////////////////////////////////
setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
////////////////////////////////////
//////////// Custom ////////////////
////////////////////////////////////
// customise setcc to use bitwise logic if possible
//setOperationAction(ISD::SETCC, MVT::i1, Custom);
setOperationAction(ISD::SETCC, MVT::i1, Legal);
// customize translation of memory addresses
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
setOperationAction(ISD::GlobalAddress, MVT::i64, Custom);
// Compute derived properties from the register classes
computeRegisterProperties();
}
EVT PTXTargetLowering::getSetCCResultType(EVT VT) const {
return MVT::i1;
}
SDValue PTXTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
switch (Op.getOpcode()) {
default:
llvm_unreachable("Unimplemented operand");
case ISD::SETCC:
return LowerSETCC(Op, DAG);
case ISD::GlobalAddress:
return LowerGlobalAddress(Op, DAG);
}
}
const char *PTXTargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
default:
llvm_unreachable("Unknown opcode");
case PTXISD::COPY_ADDRESS:
return "PTXISD::COPY_ADDRESS";
case PTXISD::LOAD_PARAM:
return "PTXISD::LOAD_PARAM";
case PTXISD::STORE_PARAM:
return "PTXISD::STORE_PARAM";
case PTXISD::READ_PARAM:
return "PTXISD::READ_PARAM";
case PTXISD::WRITE_PARAM:
return "PTXISD::WRITE_PARAM";
case PTXISD::EXIT:
return "PTXISD::EXIT";
case PTXISD::RET:
return "PTXISD::RET";
case PTXISD::CALL:
return "PTXISD::CALL";
}
}
//===----------------------------------------------------------------------===//
// Custom Lower Operation
//===----------------------------------------------------------------------===//
SDValue PTXTargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const {
assert(Op.getValueType() == MVT::i1 && "SetCC type must be 1-bit integer");
SDValue Op0 = Op.getOperand(0);
SDValue Op1 = Op.getOperand(1);
SDValue Op2 = Op.getOperand(2);
DebugLoc dl = Op.getDebugLoc();
//ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
// Look for X == 0, X == 1, X != 0, or X != 1
// We can simplify these to bitwise logic
//if (Op1.getOpcode() == ISD::Constant &&
// (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 ||
// cast<ConstantSDNode>(Op1)->isNullValue()) &&
// (CC == ISD::SETEQ || CC == ISD::SETNE)) {
//
// return DAG.getNode(ISD::AND, dl, MVT::i1, Op0, Op1);
//}
//ConstantSDNode* COp1 = cast<ConstantSDNode>(Op1);
//if(COp1 && COp1->getZExtValue() == 1) {
// if(CC == ISD::SETNE) {
// return DAG.getNode(PTX::XORripreds, dl, MVT::i1, Op0);
// }
//}
llvm_unreachable("setcc was not matched by a pattern!");
return DAG.getNode(ISD::SETCC, dl, MVT::i1, Op0, Op1, Op2);
}
SDValue PTXTargetLowering::
LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
EVT PtrVT = getPointerTy();
DebugLoc dl = Op.getDebugLoc();
const GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
assert(PtrVT.isSimple() && "Pointer must be to primitive type.");
SDValue targetGlobal = DAG.getTargetGlobalAddress(GV, dl, PtrVT);
SDValue movInstr = DAG.getNode(PTXISD::COPY_ADDRESS,
dl,
PtrVT.getSimpleVT(),
targetGlobal);
return movInstr;
}
//===----------------------------------------------------------------------===//
// Calling Convention Implementation
//===----------------------------------------------------------------------===//
SDValue PTXTargetLowering::
LowerFormalArguments(SDValue Chain,
CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl,
SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
if (isVarArg) llvm_unreachable("PTX does not support varargs");
MachineFunction &MF = DAG.getMachineFunction();
const PTXSubtarget& ST = getTargetMachine().getSubtarget<PTXSubtarget>();
PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
PTXParamManager &PM = MFI->getParamManager();
switch (CallConv) {
default:
llvm_unreachable("Unsupported calling convention");
case CallingConv::PTX_Kernel:
MFI->setKernel(true);
break;
case CallingConv::PTX_Device:
MFI->setKernel(false);
break;
}
// We do one of two things here:
// IsKernel || SM >= 2.0 -> Use param space for arguments
// SM < 2.0 -> Use registers for arguments
if (MFI->isKernel() || ST.useParamSpaceForDeviceArgs()) {
// We just need to emit the proper LOAD_PARAM ISDs
for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
assert((!MFI->isKernel() || Ins[i].VT != MVT::i1) &&
"Kernels cannot take pred operands");
unsigned ParamSize = Ins[i].VT.getStoreSizeInBits();
unsigned Param = PM.addArgumentParam(ParamSize);
const std::string &ParamName = PM.getParamName(Param);
SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(),
MVT::Other);
SDValue ArgValue = DAG.getNode(PTXISD::LOAD_PARAM, dl, Ins[i].VT, Chain,
ParamValue);
InVals.push_back(ArgValue);
}
}
else {
for (unsigned i = 0, e = Ins.size(); i != e; ++i) {
EVT RegVT = Ins[i].VT;
const TargetRegisterClass* TRC = getRegClassFor(RegVT);
unsigned RegType;
// Determine which register class we need
if (RegVT == MVT::i1)
RegType = PTXRegisterType::Pred;
else if (RegVT == MVT::i16)
RegType = PTXRegisterType::B16;
else if (RegVT == MVT::i32)
RegType = PTXRegisterType::B32;
else if (RegVT == MVT::i64)
RegType = PTXRegisterType::B64;
else if (RegVT == MVT::f32)
RegType = PTXRegisterType::F32;
else if (RegVT == MVT::f64)
RegType = PTXRegisterType::F64;
else
llvm_unreachable("Unknown parameter type");
// Use a unique index in the instruction to prevent instruction folding.
// Yes, this is a hack.
SDValue Index = DAG.getTargetConstant(i, MVT::i32);
unsigned Reg = MF.getRegInfo().createVirtualRegister(TRC);
SDValue ArgValue = DAG.getNode(PTXISD::READ_PARAM, dl, RegVT, Chain,
Index);
InVals.push_back(ArgValue);
MFI->addRegister(Reg, RegType, PTXRegisterSpace::Argument);
}
}
return Chain;
}
SDValue PTXTargetLowering::
LowerReturn(SDValue Chain,
CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl,
SelectionDAG &DAG) const {
if (isVarArg) llvm_unreachable("PTX does not support varargs");
switch (CallConv) {
default:
llvm_unreachable("Unsupported calling convention.");
case CallingConv::PTX_Kernel:
assert(Outs.size() == 0 && "Kernel must return void.");
return DAG.getNode(PTXISD::EXIT, dl, MVT::Other, Chain);
case CallingConv::PTX_Device:
assert(Outs.size() <= 1 && "Can at most return one value.");
break;
}
MachineFunction& MF = DAG.getMachineFunction();
PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
PTXParamManager &PM = MFI->getParamManager();
SDValue Flag;
const PTXSubtarget& ST = getTargetMachine().getSubtarget<PTXSubtarget>();
if (ST.useParamSpaceForDeviceArgs()) {
assert(Outs.size() < 2 && "Device functions can return at most one value");
if (Outs.size() == 1) {
unsigned ParamSize = OutVals[0].getValueType().getSizeInBits();
unsigned Param = PM.addReturnParam(ParamSize);
const std::string &ParamName = PM.getParamName(Param);
SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(),
MVT::Other);
Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain,
ParamValue, OutVals[0]);
}
} else {
for (unsigned i = 0, e = Outs.size(); i != e; ++i) {
EVT RegVT = Outs[i].VT;
const TargetRegisterClass* TRC;
unsigned RegType;
// Determine which register class we need
if (RegVT == MVT::i1) {
TRC = &PTX::RegPredRegClass;
RegType = PTXRegisterType::Pred;
} else if (RegVT == MVT::i16) {
TRC = &PTX::RegI16RegClass;
RegType = PTXRegisterType::B16;
} else if (RegVT == MVT::i32) {
TRC = &PTX::RegI32RegClass;
RegType = PTXRegisterType::B32;
} else if (RegVT == MVT::i64) {
TRC = &PTX::RegI64RegClass;
RegType = PTXRegisterType::B64;
} else if (RegVT == MVT::f32) {
TRC = &PTX::RegF32RegClass;
RegType = PTXRegisterType::F32;
} else if (RegVT == MVT::f64) {
TRC = &PTX::RegF64RegClass;
RegType = PTXRegisterType::F64;
} else {
llvm_unreachable("Unknown parameter type");
}
unsigned Reg = MF.getRegInfo().createVirtualRegister(TRC);
SDValue Copy = DAG.getCopyToReg(Chain, dl, Reg, OutVals[i]/*, Flag*/);
SDValue OutReg = DAG.getRegister(Reg, RegVT);
Chain = DAG.getNode(PTXISD::WRITE_PARAM, dl, MVT::Other, Copy, OutReg);
MFI->addRegister(Reg, RegType, PTXRegisterSpace::Return);
}
}
if (Flag.getNode() == 0) {
return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain);
}
else {
return DAG.getNode(PTXISD::RET, dl, MVT::Other, Chain, Flag);
}
}
SDValue
PTXTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
bool doesNotRet, bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const {
MachineFunction& MF = DAG.getMachineFunction();
PTXMachineFunctionInfo *PTXMFI = MF.getInfo<PTXMachineFunctionInfo>();
PTXParamManager &PM = PTXMFI->getParamManager();
MachineFrameInfo *MFI = MF.getFrameInfo();
assert(getTargetMachine().getSubtarget<PTXSubtarget>().callsAreHandled() &&
"Calls are not handled for the target device");
// Identify the callee function
const GlobalValue *GV = cast<GlobalAddressSDNode>(Callee)->getGlobal();
const Function *function = cast<Function>(GV);
// allow non-device calls only for printf
bool isPrintf = function->getName() == "printf" || function->getName() == "puts";
assert((isPrintf || function->getCallingConv() == CallingConv::PTX_Device) &&
"PTX function calls must be to PTX device functions");
unsigned outSize = isPrintf ? 2 : Outs.size();
std::vector<SDValue> Ops;
// The layout of the ops will be [Chain, #Ins, Ins, Callee, #Outs, Outs]
Ops.resize(outSize + Ins.size() + 4);
Ops[0] = Chain;
// Identify the callee function
Callee = DAG.getTargetGlobalAddress(GV, dl, getPointerTy());
Ops[Ins.size()+2] = Callee;
// #Outs
Ops[Ins.size()+3] = DAG.getTargetConstant(outSize, MVT::i32);
if (isPrintf) {
// first argument is the address of the global string variable in memory
unsigned Param0 = PM.addLocalParam(getPointerTy().getSizeInBits());
SDValue ParamValue0 = DAG.getTargetExternalSymbol(PM.getParamName(Param0).c_str(),
MVT::Other);
Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain,
ParamValue0, OutVals[0]);
Ops[Ins.size()+4] = ParamValue0;
// alignment is the maximum size of all the arguments
unsigned alignment = 0;
for (unsigned i = 1; i < OutVals.size(); ++i) {
alignment = std::max(alignment,
OutVals[i].getValueType().getSizeInBits());
}
// size is the alignment multiplied by the number of arguments
unsigned size = alignment * (OutVals.size() - 1);
// second argument is the address of the stack object (unless no arguments)
unsigned Param1 = PM.addLocalParam(getPointerTy().getSizeInBits());
SDValue ParamValue1 = DAG.getTargetExternalSymbol(PM.getParamName(Param1).c_str(),
MVT::Other);
Ops[Ins.size()+5] = ParamValue1;
if (size > 0)
{
// create a local stack object to store the arguments
unsigned StackObject = MFI->CreateStackObject(size / 8, alignment / 8, false);
SDValue FrameIndex = DAG.getFrameIndex(StackObject, getPointerTy());
// store each of the arguments to the stack in turn
for (unsigned int i = 1; i != OutVals.size(); i++) {
SDValue FrameAddr = DAG.getNode(ISD::ADD, dl, getPointerTy(), FrameIndex, DAG.getTargetConstant((i - 1) * 8, getPointerTy()));
Chain = DAG.getStore(Chain, dl, OutVals[i], FrameAddr,
MachinePointerInfo(),
false, false, 0);
}
// copy the address of the local frame index to get the address in non-local space
SDValue genericAddr = DAG.getNode(PTXISD::COPY_ADDRESS, dl, getPointerTy(), FrameIndex);
// store this address in the second argument
Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain, ParamValue1, genericAddr);
}
}
else
{
// Generate STORE_PARAM nodes for each function argument. In PTX, function
// arguments are explicitly stored into .param variables and passed as
// arguments. There is no register/stack-based calling convention in PTX.
for (unsigned i = 0; i != OutVals.size(); ++i) {
unsigned Size = OutVals[i].getValueType().getSizeInBits();
unsigned Param = PM.addLocalParam(Size);
const std::string &ParamName = PM.getParamName(Param);
SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(),
MVT::Other);
Chain = DAG.getNode(PTXISD::STORE_PARAM, dl, MVT::Other, Chain,
ParamValue, OutVals[i]);
Ops[i+Ins.size()+4] = ParamValue;
}
}
std::vector<SDValue> InParams;
// Generate list of .param variables to hold the return value(s).
Ops[1] = DAG.getTargetConstant(Ins.size(), MVT::i32);
for (unsigned i = 0; i < Ins.size(); ++i) {
unsigned Size = Ins[i].VT.getStoreSizeInBits();
unsigned Param = PM.addLocalParam(Size);
const std::string &ParamName = PM.getParamName(Param);
SDValue ParamValue = DAG.getTargetExternalSymbol(ParamName.c_str(),
MVT::Other);
Ops[i+2] = ParamValue;
InParams.push_back(ParamValue);
}
Ops[0] = Chain;
// Create the CALL node.
Chain = DAG.getNode(PTXISD::CALL, dl, MVT::Other, &Ops[0], Ops.size());
// Create the LOAD_PARAM nodes that retrieve the function return value(s).
for (unsigned i = 0; i < Ins.size(); ++i) {
SDValue Load = DAG.getNode(PTXISD::LOAD_PARAM, dl, Ins[i].VT, Chain,
InParams[i]);
InVals.push_back(Load);
}
return Chain;
}
unsigned PTXTargetLowering::getNumRegisters(LLVMContext &Context, EVT VT) {
// All arguments consist of one "register," regardless of the type.
return 1;
}

View File

@ -1,82 +0,0 @@
//===-- PTXISelLowering.h - PTX DAG Lowering Interface ----------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines the interfaces that PTX uses to lower LLVM code into a
// selection DAG.
//
//===----------------------------------------------------------------------===//
#ifndef PTX_ISEL_LOWERING_H
#define PTX_ISEL_LOWERING_H
#include "llvm/Target/TargetLowering.h"
namespace llvm {
namespace PTXISD {
enum NodeType {
FIRST_NUMBER = ISD::BUILTIN_OP_END,
LOAD_PARAM,
STORE_PARAM,
READ_PARAM,
WRITE_PARAM,
EXIT,
RET,
COPY_ADDRESS,
CALL
};
} // namespace PTXISD
class PTXTargetLowering : public TargetLowering {
public:
explicit PTXTargetLowering(TargetMachine &TM);
virtual const char *getTargetNodeName(unsigned Opcode) const;
virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const;
virtual SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG) const;
virtual SDValue
LowerFormalArguments(SDValue Chain,
CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl,
SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
virtual SDValue
LowerReturn(SDValue Chain,
CallingConv::ID CallConv,
bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
DebugLoc dl,
SelectionDAG &DAG) const;
virtual SDValue
LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
bool isVarArg, bool doesNotRet, bool &isTailCall,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
DebugLoc dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const;
virtual EVT getSetCCResultType(EVT VT) const;
virtual unsigned getNumRegisters(LLVMContext &Context, EVT VT);
private:
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
}; // class PTXTargetLowering
} // namespace llvm
#endif // PTX_ISEL_LOWERING_H

View File

@ -1,51 +0,0 @@
//===-- PTXInstrFormats.td - PTX Instruction Formats -------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
// Rounding Mode Specifier
/*class RoundingMode<bits<3> val> {
bits<3> Value = val;
}
def RndDefault : RoundingMode<0>;
def RndNearestEven : RoundingMode<1>;
def RndNearestZero : RoundingMode<2>;
def RndNegInf : RoundingMode<3>;
def RndPosInf : RoundingMode<4>;
def RndApprox : RoundingMode<5>;*/
// Rounding Mode Operand
def RndMode : Operand<i32> {
let PrintMethod = "printRoundingMode";
}
def RndDefault : PatLeaf<(i32 0)>;
// PTX Predicate operand, default to (0, 0) = (zero-reg, none).
// Leave PrintMethod empty; predicate printing is defined elsewhere.
def pred : PredicateOperand<OtherVT, (ops RegPred, i32imm),
(ops (i1 zero_reg), (i32 2))>;
def RndModeOperand : Operand<OtherVT> {
let MIOperandInfo = (ops i32imm);
}
// Instruction Types
let Namespace = "PTX" in {
class InstPTX<dag oops, dag iops, string asmstr, list<dag> pattern>
: Instruction {
dag OutOperandList = oops;
dag InOperandList = !con(iops, (ins pred:$_p));
let AsmString = asmstr; // Predicate printing is defined elsewhere.
let Pattern = pattern;
let isPredicable = 1;
}
}

View File

@ -1,359 +0,0 @@
//===-- PTXInstrInfo.cpp - PTX Instruction Information --------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the PTX implementation of the TargetInstrInfo class.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "ptx-instrinfo"
#include "PTXInstrInfo.h"
#include "PTX.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGNodes.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#define GET_INSTRINFO_CTOR
#include "PTXGenInstrInfo.inc"
using namespace llvm;
PTXInstrInfo::PTXInstrInfo(PTXTargetMachine &_TM)
: PTXGenInstrInfo(),
RI(_TM, *this), TM(_TM) {}
static const struct map_entry {
const TargetRegisterClass *cls;
const int opcode;
} map[] = {
{ &PTX::RegI16RegClass, PTX::MOVU16rr },
{ &PTX::RegI32RegClass, PTX::MOVU32rr },
{ &PTX::RegI64RegClass, PTX::MOVU64rr },
{ &PTX::RegF32RegClass, PTX::MOVF32rr },
{ &PTX::RegF64RegClass, PTX::MOVF64rr },
{ &PTX::RegPredRegClass, PTX::MOVPREDrr }
};
void PTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DstReg, unsigned SrcReg,
bool KillSrc) const {
const MachineRegisterInfo& MRI = MBB.getParent()->getRegInfo();
//assert(MRI.getRegClass(SrcReg) == MRI.getRegClass(DstReg) &&
// "Invalid register copy between two register classes");
for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++i) {
if (map[i].cls == MRI.getRegClass(DstReg)) {
const MCInstrDesc &MCID = get(map[i].opcode);
MachineInstr *MI = BuildMI(MBB, I, DL, MCID, DstReg).
addReg(SrcReg, getKillRegState(KillSrc));
AddDefaultPredicate(MI);
return;
}
}
llvm_unreachable("Impossible reg-to-reg copy");
}
bool PTXInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DstReg, unsigned SrcReg,
const TargetRegisterClass *DstRC,
const TargetRegisterClass *SrcRC,
DebugLoc DL) const {
if (DstRC != SrcRC)
return false;
for (int i = 0, e = sizeof(map)/sizeof(map[0]); i != e; ++ i)
if (DstRC == map[i].cls) {
const MCInstrDesc &MCID = get(map[i].opcode);
MachineInstr *MI = BuildMI(MBB, I, DL, MCID, DstReg).addReg(SrcReg);
AddDefaultPredicate(MI);
return true;
}
return false;
}
bool PTXInstrInfo::isMoveInstr(const MachineInstr& MI,
unsigned &SrcReg, unsigned &DstReg,
unsigned &SrcSubIdx, unsigned &DstSubIdx) const {
switch (MI.getOpcode()) {
default:
return false;
case PTX::MOVU16rr:
case PTX::MOVU32rr:
case PTX::MOVU64rr:
case PTX::MOVF32rr:
case PTX::MOVF64rr:
case PTX::MOVPREDrr:
assert(MI.getNumOperands() >= 2 &&
MI.getOperand(0).isReg() && MI.getOperand(1).isReg() &&
"Invalid register-register move instruction");
SrcSubIdx = DstSubIdx = 0; // No sub-registers
DstReg = MI.getOperand(0).getReg();
SrcReg = MI.getOperand(1).getReg();
return true;
}
}
// predicate support
bool PTXInstrInfo::isPredicated(const MachineInstr *MI) const {
int i = MI->findFirstPredOperandIdx();
return i != -1 && MI->getOperand(i).getReg() != PTX::NoRegister;
}
bool PTXInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
return !isPredicated(MI) && MI->isTerminator();
}
bool PTXInstrInfo::
PredicateInstruction(MachineInstr *MI,
const SmallVectorImpl<MachineOperand> &Pred) const {
if (Pred.size() < 2)
llvm_unreachable("lesser than 2 predicate operands are provided");
int i = MI->findFirstPredOperandIdx();
if (i == -1)
llvm_unreachable("missing predicate operand");
MI->getOperand(i).setReg(Pred[0].getReg());
MI->getOperand(i+1).setImm(Pred[1].getImm());
return true;
}
bool PTXInstrInfo::
SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
const SmallVectorImpl<MachineOperand> &Pred2) const {
const MachineOperand &PredReg1 = Pred1[0];
const MachineOperand &PredReg2 = Pred2[0];
if (PredReg1.getReg() != PredReg2.getReg())
return false;
const MachineOperand &PredOp1 = Pred1[1];
const MachineOperand &PredOp2 = Pred2[1];
if (PredOp1.getImm() != PredOp2.getImm())
return false;
return true;
}
bool PTXInstrInfo::
DefinesPredicate(MachineInstr *MI,
std::vector<MachineOperand> &Pred) const {
// If an instruction sets a predicate register, it defines a predicate.
// TODO supprot 5-operand format of setp instruction
if (MI->getNumOperands() < 1)
return false;
const MachineOperand &MO = MI->getOperand(0);
if (!MO.isReg() || RI.getRegClass(MO.getReg()) != &PTX::RegPredRegClass)
return false;
Pred.push_back(MO);
Pred.push_back(MachineOperand::CreateImm(PTXPredicate::None));
return true;
}
// branch support
bool PTXInstrInfo::
AnalyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify) const {
// TODO implement cases when AllowModify is true
if (MBB.empty())
return true;
MachineBasicBlock::iterator iter = MBB.end();
const MachineInstr& instLast1 = *--iter;
// for special case that MBB has only 1 instruction
const bool IsSizeOne = MBB.size() == 1;
// if IsSizeOne is true, *--iter and instLast2 are invalid
// we put a dummy value in instLast2 and desc2 since they are used
const MachineInstr& instLast2 = IsSizeOne ? instLast1 : *--iter;
DEBUG(dbgs() << "\n");
DEBUG(dbgs() << "AnalyzeBranch: opcode: " << instLast1.getOpcode() << "\n");
DEBUG(dbgs() << "AnalyzeBranch: MBB: " << MBB.getName().str() << "\n");
DEBUG(dbgs() << "AnalyzeBranch: TBB: " << TBB << "\n");
DEBUG(dbgs() << "AnalyzeBranch: FBB: " << FBB << "\n");
// this block ends with no branches
if (!IsAnyKindOfBranch(instLast1)) {
DEBUG(dbgs() << "AnalyzeBranch: ends with no branch\n");
return false;
}
// this block ends with only an unconditional branch
if (instLast1.isUnconditionalBranch() &&
// when IsSizeOne is true, it "absorbs" the evaluation of instLast2
(IsSizeOne || !IsAnyKindOfBranch(instLast2))) {
DEBUG(dbgs() << "AnalyzeBranch: ends with only uncond branch\n");
TBB = GetBranchTarget(instLast1);
return false;
}
// this block ends with a conditional branch and
// it falls through to a successor block
if (instLast1.isConditionalBranch() &&
IsAnySuccessorAlsoLayoutSuccessor(MBB)) {
DEBUG(dbgs() << "AnalyzeBranch: ends with cond branch and fall through\n");
TBB = GetBranchTarget(instLast1);
int i = instLast1.findFirstPredOperandIdx();
Cond.push_back(instLast1.getOperand(i));
Cond.push_back(instLast1.getOperand(i+1));
return false;
}
// when IsSizeOne is true, we are done
if (IsSizeOne)
return true;
// this block ends with a conditional branch
// followed by an unconditional branch
if (instLast2.isConditionalBranch() &&
instLast1.isUnconditionalBranch()) {
DEBUG(dbgs() << "AnalyzeBranch: ends with cond and uncond branch\n");
TBB = GetBranchTarget(instLast2);
FBB = GetBranchTarget(instLast1);
int i = instLast2.findFirstPredOperandIdx();
Cond.push_back(instLast2.getOperand(i));
Cond.push_back(instLast2.getOperand(i+1));
return false;
}
// branch cannot be understood
DEBUG(dbgs() << "AnalyzeBranch: cannot be understood\n");
return true;
}
unsigned PTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
unsigned count = 0;
while (!MBB.empty())
if (IsAnyKindOfBranch(MBB.back())) {
MBB.pop_back();
++count;
} else
break;
DEBUG(dbgs() << "RemoveBranch: MBB: " << MBB.getName().str() << "\n");
DEBUG(dbgs() << "RemoveBranch: remove " << count << " branch inst\n");
return count;
}
unsigned PTXInstrInfo::
InsertBranch(MachineBasicBlock &MBB,
MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
const SmallVectorImpl<MachineOperand> &Cond,
DebugLoc DL) const {
DEBUG(dbgs() << "InsertBranch: MBB: " << MBB.getName().str() << "\n");
DEBUG(if (TBB) dbgs() << "InsertBranch: TBB: " << TBB->getName().str()
<< "\n";
else dbgs() << "InsertBranch: TBB: (NULL)\n");
DEBUG(if (FBB) dbgs() << "InsertBranch: FBB: " << FBB->getName().str()
<< "\n";
else dbgs() << "InsertBranch: FBB: (NULL)\n");
DEBUG(dbgs() << "InsertBranch: Cond size: " << Cond.size() << "\n");
assert(TBB && "TBB is NULL");
if (FBB) {
BuildMI(&MBB, DL, get(PTX::BRAdp))
.addMBB(TBB).addReg(Cond[0].getReg()).addImm(Cond[1].getImm());
BuildMI(&MBB, DL, get(PTX::BRAd))
.addMBB(FBB).addReg(PTX::NoRegister).addImm(PTXPredicate::None);
return 2;
} else if (Cond.size()) {
BuildMI(&MBB, DL, get(PTX::BRAdp))
.addMBB(TBB).addReg(Cond[0].getReg()).addImm(Cond[1].getImm());
return 1;
} else {
BuildMI(&MBB, DL, get(PTX::BRAd))
.addMBB(TBB).addReg(PTX::NoRegister).addImm(PTXPredicate::None);
return 1;
}
}
// Memory operand folding for spills
void PTXInstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MII,
unsigned SrcReg, bool isKill, int FrameIdx,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
llvm_unreachable("storeRegToStackSlot should not be called for PTX");
}
void PTXInstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MII,
unsigned DestReg, int FrameIdx,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const {
llvm_unreachable("loadRegFromStackSlot should not be called for PTX");
}
// static helper routines
MachineSDNode *PTXInstrInfo::
GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
DebugLoc dl, EVT VT, SDValue Op1) {
SDValue predReg = DAG->getRegister(PTX::NoRegister, MVT::i1);
SDValue predOp = DAG->getTargetConstant(PTXPredicate::None, MVT::i32);
SDValue ops[] = { Op1, predReg, predOp };
return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops));
}
MachineSDNode *PTXInstrInfo::
GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
DebugLoc dl, EVT VT, SDValue Op1, SDValue Op2) {
SDValue predReg = DAG->getRegister(PTX::NoRegister, MVT::i1);
SDValue predOp = DAG->getTargetConstant(PTXPredicate::None, MVT::i32);
SDValue ops[] = { Op1, Op2, predReg, predOp };
return DAG->getMachineNode(Opcode, dl, VT, ops, array_lengthof(ops));
}
void PTXInstrInfo::AddDefaultPredicate(MachineInstr *MI) {
if (MI->findFirstPredOperandIdx() == -1) {
MI->addOperand(MachineOperand::CreateReg(PTX::NoRegister, /*IsDef=*/false));
MI->addOperand(MachineOperand::CreateImm(PTXPredicate::None));
}
}
bool PTXInstrInfo::IsAnyKindOfBranch(const MachineInstr& inst) {
return inst.isTerminator() || inst.isBranch() || inst.isIndirectBranch();
}
bool PTXInstrInfo::
IsAnySuccessorAlsoLayoutSuccessor(const MachineBasicBlock& MBB) {
for (MachineBasicBlock::const_succ_iterator
i = MBB.succ_begin(), e = MBB.succ_end(); i != e; ++i)
if (MBB.isLayoutSuccessor((const MachineBasicBlock*) &*i))
return true;
return false;
}
MachineBasicBlock *PTXInstrInfo::GetBranchTarget(const MachineInstr& inst) {
// FIXME So far all branch instructions put destination in 1st operand
const MachineOperand& target = inst.getOperand(0);
assert(target.isMBB() && "FIXME: detect branch target operand");
return target.getMBB();
}

View File

@ -1,133 +0,0 @@
//===-- PTXInstrInfo.h - PTX Instruction Information ------------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the PTX implementation of the TargetInstrInfo class.
//
//===----------------------------------------------------------------------===//
#ifndef PTX_INSTR_INFO_H
#define PTX_INSTR_INFO_H
#include "PTXRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#define GET_INSTRINFO_HEADER
#include "PTXGenInstrInfo.inc"
namespace llvm {
class PTXTargetMachine;
class MachineSDNode;
class SDValue;
class SelectionDAG;
class PTXInstrInfo : public PTXGenInstrInfo {
private:
const PTXRegisterInfo RI;
PTXTargetMachine &TM;
public:
explicit PTXInstrInfo(PTXTargetMachine &_TM);
virtual const PTXRegisterInfo &getRegisterInfo() const { return RI; }
virtual void copyPhysReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I, DebugLoc DL,
unsigned DstReg, unsigned SrcReg,
bool KillSrc) const;
virtual bool copyRegToReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DstReg, unsigned SrcReg,
const TargetRegisterClass *DstRC,
const TargetRegisterClass *SrcRC,
DebugLoc DL) const;
virtual bool isMoveInstr(const MachineInstr& MI,
unsigned &SrcReg, unsigned &DstReg,
unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
// predicate support
virtual bool isPredicated(const MachineInstr *MI) const;
virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const;
virtual
bool PredicateInstruction(MachineInstr *MI,
const SmallVectorImpl<MachineOperand> &Pred) const;
virtual
bool SubsumesPredicate(const SmallVectorImpl<MachineOperand> &Pred1,
const SmallVectorImpl<MachineOperand> &Pred2) const;
virtual bool DefinesPredicate(MachineInstr *MI,
std::vector<MachineOperand> &Pred) const;
// PTX is fully-predicable
virtual bool isPredicable(MachineInstr *MI) const { return true; }
// branch support
virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
MachineBasicBlock *&FBB,
SmallVectorImpl<MachineOperand> &Cond,
bool AllowModify = false) const;
virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
const SmallVectorImpl<MachineOperand> &Cond,
DebugLoc DL) const;
// Memory operand folding for spills
// TODO: Implement this eventually and get rid of storeRegToStackSlot and
// loadRegFromStackSlot. Doing so will get rid of the "stack" registers
// we currently use to spill, though I doubt the overall effect on ptxas
// output will be large. I have yet to see a case where ptxas is unable
// to see through the "stack" register usage and hence generates
// efficient code anyway.
// virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
// MachineInstr* MI,
// const SmallVectorImpl<unsigned> &Ops,
// int FrameIndex) const;
virtual void storeRegToStackSlot(MachineBasicBlock& MBB,
MachineBasicBlock::iterator MII,
unsigned SrcReg, bool isKill, int FrameIndex,
const TargetRegisterClass* RC,
const TargetRegisterInfo* TRI) const;
virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MII,
unsigned DestReg, int FrameIdx,
const TargetRegisterClass *RC,
const TargetRegisterInfo *TRI) const;
// static helper routines
static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
DebugLoc dl, EVT VT,
SDValue Op1);
static MachineSDNode *GetPTXMachineNode(SelectionDAG *DAG, unsigned Opcode,
DebugLoc dl, EVT VT,
SDValue Op1, SDValue Op2);
static void AddDefaultPredicate(MachineInstr *MI);
static bool IsAnyKindOfBranch(const MachineInstr& inst);
static bool IsAnySuccessorAlsoLayoutSuccessor(const MachineBasicBlock& MBB);
static MachineBasicBlock *GetBranchTarget(const MachineInstr& inst);
}; // class PTXInstrInfo
} // namespace llvm
#endif // PTX_INSTR_INFO_H

File diff suppressed because it is too large Load Diff

View File

@ -1,278 +0,0 @@
//===- PTXInstrLoadStore.td - PTX Load/Store Instruction Defs -*- tablegen-*-=//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file describes the PTX load/store instructions in TableGen format.
//
//===----------------------------------------------------------------------===//
// Addressing Predicates
// We have to differentiate between 32- and 64-bit pointer types
def Use32BitAddresses : Predicate<"!getSubtarget().is64Bit()">;
def Use64BitAddresses : Predicate<"getSubtarget().is64Bit()">;
//===----------------------------------------------------------------------===//
// Pattern Fragments for Loads/Stores
//===----------------------------------------------------------------------===//
def load_global : PatFrag<(ops node:$ptr), (load node:$ptr), [{
const Value *Src;
const PointerType *PT;
if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
(PT = dyn_cast<PointerType>(Src->getType())))
return PT->getAddressSpace() == PTXStateSpace::Global;
return false;
}]>;
def load_constant : PatFrag<(ops node:$ptr), (load node:$ptr), [{
const Value *Src;
const PointerType *PT;
if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
(PT = dyn_cast<PointerType>(Src->getType())))
return PT->getAddressSpace() == PTXStateSpace::Constant;
return false;
}]>;
def load_shared : PatFrag<(ops node:$ptr), (load node:$ptr), [{
const Value *Src;
const PointerType *PT;
if ((Src = cast<LoadSDNode>(N)->getSrcValue()) &&
(PT = dyn_cast<PointerType>(Src->getType())))
return PT->getAddressSpace() == PTXStateSpace::Shared;
return false;
}]>;
def store_global
: PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
const Value *Src;
const PointerType *PT;
if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
(PT = dyn_cast<PointerType>(Src->getType())))
return PT->getAddressSpace() == PTXStateSpace::Global;
return false;
}]>;
def store_shared
: PatFrag<(ops node:$d, node:$ptr), (store node:$d, node:$ptr), [{
const Value *Src;
const PointerType *PT;
if ((Src = cast<StoreSDNode>(N)->getSrcValue()) &&
(PT = dyn_cast<PointerType>(Src->getType())))
return PT->getAddressSpace() == PTXStateSpace::Shared;
return false;
}]>;
// Addressing modes.
def ADDRrr32 : ComplexPattern<i32, 2, "SelectADDRrr", [], []>;
def ADDRrr64 : ComplexPattern<i64, 2, "SelectADDRrr", [], []>;
def ADDRri32 : ComplexPattern<i32, 2, "SelectADDRri", [], []>;
def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri", [], []>;
def ADDRii32 : ComplexPattern<i32, 2, "SelectADDRii", [], []>;
def ADDRii64 : ComplexPattern<i64, 2, "SelectADDRii", [], []>;
def ADDRlocal32 : ComplexPattern<i32, 2, "SelectADDRlocal", [], []>;
def ADDRlocal64 : ComplexPattern<i64, 2, "SelectADDRlocal", [], []>;
// Address operands
def MEMri32 : Operand<i32> {
let PrintMethod = "printMemOperand";
let MIOperandInfo = (ops RegI32, i32imm);
}
def MEMri64 : Operand<i64> {
let PrintMethod = "printMemOperand";
let MIOperandInfo = (ops RegI64, i64imm);
}
def LOCALri32 : Operand<i32> {
let PrintMethod = "printMemOperand";
let MIOperandInfo = (ops i32imm, i32imm);
}
def LOCALri64 : Operand<i64> {
let PrintMethod = "printMemOperand";
let MIOperandInfo = (ops i64imm, i64imm);
}
def MEMii32 : Operand<i32> {
let PrintMethod = "printMemOperand";
let MIOperandInfo = (ops i32imm, i32imm);
}
def MEMii64 : Operand<i64> {
let PrintMethod = "printMemOperand";
let MIOperandInfo = (ops i64imm, i64imm);
}
// The operand here does not correspond to an actual address, so we
// can use i32 in 64-bit address modes.
def MEMpi : Operand<i32> {
let PrintMethod = "printParamOperand";
let MIOperandInfo = (ops i32imm);
}
def MEMret : Operand<i32> {
let PrintMethod = "printReturnOperand";
let MIOperandInfo = (ops i32imm);
}
// Load/store .param space
def PTXloadparam
: SDNode<"PTXISD::LOAD_PARAM", SDTypeProfile<1, 1, [SDTCisPtrTy<1>]>,
[SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>;
def PTXstoreparam
: SDNode<"PTXISD::STORE_PARAM", SDTypeProfile<0, 2, [SDTCisVT<0, i32>]>,
[SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>;
def PTXreadparam
: SDNode<"PTXISD::READ_PARAM", SDTypeProfile<1, 1, [SDTCisVT<1, i32>]>,
[SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>;
def PTXwriteparam
: SDNode<"PTXISD::WRITE_PARAM", SDTypeProfile<0, 1, []>,
[SDNPHasChain, SDNPOutGlue, SDNPOptInGlue]>;
//===----------------------------------------------------------------------===//
// Classes for loads/stores
//===----------------------------------------------------------------------===//
multiclass PTX_LD<string opstr, string typestr,
RegisterClass RC, PatFrag pat_load> {
def rr32 : InstPTX<(outs RC:$d),
(ins MEMri32:$a),
!strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
[(set RC:$d, (pat_load ADDRrr32:$a))]>,
Requires<[Use32BitAddresses]>;
def rr64 : InstPTX<(outs RC:$d),
(ins MEMri64:$a),
!strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
[(set RC:$d, (pat_load ADDRrr64:$a))]>,
Requires<[Use64BitAddresses]>;
def ri32 : InstPTX<(outs RC:$d),
(ins MEMri32:$a),
!strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
[(set RC:$d, (pat_load ADDRri32:$a))]>,
Requires<[Use32BitAddresses]>;
def ri64 : InstPTX<(outs RC:$d),
(ins MEMri64:$a),
!strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
[(set RC:$d, (pat_load ADDRri64:$a))]>,
Requires<[Use64BitAddresses]>;
def ii32 : InstPTX<(outs RC:$d),
(ins MEMii32:$a),
!strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
[(set RC:$d, (pat_load ADDRii32:$a))]>,
Requires<[Use32BitAddresses]>;
def ii64 : InstPTX<(outs RC:$d),
(ins MEMii64:$a),
!strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
[(set RC:$d, (pat_load ADDRii64:$a))]>,
Requires<[Use64BitAddresses]>;
}
multiclass PTX_ST<string opstr, string typestr, RegisterClass RC,
PatFrag pat_store> {
def rr32 : InstPTX<(outs),
(ins RC:$d, MEMri32:$a),
!strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
[(pat_store RC:$d, ADDRrr32:$a)]>,
Requires<[Use32BitAddresses]>;
def rr64 : InstPTX<(outs),
(ins RC:$d, MEMri64:$a),
!strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
[(pat_store RC:$d, ADDRrr64:$a)]>,
Requires<[Use64BitAddresses]>;
def ri32 : InstPTX<(outs),
(ins RC:$d, MEMri32:$a),
!strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
[(pat_store RC:$d, ADDRri32:$a)]>,
Requires<[Use32BitAddresses]>;
def ri64 : InstPTX<(outs),
(ins RC:$d, MEMri64:$a),
!strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
[(pat_store RC:$d, ADDRri64:$a)]>,
Requires<[Use64BitAddresses]>;
def ii32 : InstPTX<(outs),
(ins RC:$d, MEMii32:$a),
!strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
[(pat_store RC:$d, ADDRii32:$a)]>,
Requires<[Use32BitAddresses]>;
def ii64 : InstPTX<(outs),
(ins RC:$d, MEMii64:$a),
!strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
[(pat_store RC:$d, ADDRii64:$a)]>,
Requires<[Use64BitAddresses]>;
}
multiclass PTX_LOCAL_LD_ST<string typestr, RegisterClass RC> {
def LDri32 : InstPTX<(outs RC:$d), (ins LOCALri32:$a),
!strconcat("ld.local", !strconcat(typestr, "\t$d, [$a]")),
[(set RC:$d, (load_global ADDRlocal32:$a))]>;
def LDri64 : InstPTX<(outs RC:$d), (ins LOCALri64:$a),
!strconcat("ld.local", !strconcat(typestr, "\t$d, [$a]")),
[(set RC:$d, (load_global ADDRlocal64:$a))]>;
def STri32 : InstPTX<(outs), (ins RC:$d, LOCALri32:$a),
!strconcat("st.local", !strconcat(typestr, "\t[$a], $d")),
[(store_global RC:$d, ADDRlocal32:$a)]>;
def STri64 : InstPTX<(outs), (ins RC:$d, LOCALri64:$a),
!strconcat("st.local", !strconcat(typestr, "\t[$a], $d")),
[(store_global RC:$d, ADDRlocal64:$a)]>;
}
multiclass PTX_PARAM_LD_ST<string typestr, RegisterClass RC> {
let hasSideEffects = 1 in {
def LDpi : InstPTX<(outs RC:$d), (ins i32imm:$a),
!strconcat("ld.param", !strconcat(typestr, "\t$d, [$a]")),
[(set RC:$d, (PTXloadparam texternalsym:$a))]>;
def STpi : InstPTX<(outs), (ins i32imm:$d, RC:$a),
!strconcat("st.param", !strconcat(typestr, "\t[$d], $a")),
[(PTXstoreparam texternalsym:$d, RC:$a)]>;
}
}
multiclass PTX_LD_ALL<string opstr, PatFrag pat_load> {
defm u16 : PTX_LD<opstr, ".u16", RegI16, pat_load>;
defm u32 : PTX_LD<opstr, ".u32", RegI32, pat_load>;
defm u64 : PTX_LD<opstr, ".u64", RegI64, pat_load>;
defm f32 : PTX_LD<opstr, ".f32", RegF32, pat_load>;
defm f64 : PTX_LD<opstr, ".f64", RegF64, pat_load>;
}
multiclass PTX_ST_ALL<string opstr, PatFrag pat_store> {
defm u16 : PTX_ST<opstr, ".u16", RegI16, pat_store>;
defm u32 : PTX_ST<opstr, ".u32", RegI32, pat_store>;
defm u64 : PTX_ST<opstr, ".u64", RegI64, pat_store>;
defm f32 : PTX_ST<opstr, ".f32", RegF32, pat_store>;
defm f64 : PTX_ST<opstr, ".f64", RegF64, pat_store>;
}
//===----------------------------------------------------------------------===//
// Instruction definitions for loads/stores
//===----------------------------------------------------------------------===//
// Global/shared stores
defm STg : PTX_ST_ALL<"st.global", store_global>;
defm STs : PTX_ST_ALL<"st.shared", store_shared>;
// Global/shared/constant loads
defm LDg : PTX_LD_ALL<"ld.global", load_global>;
defm LDc : PTX_LD_ALL<"ld.const", load_constant>;
defm LDs : PTX_LD_ALL<"ld.shared", load_shared>;
// Param loads/stores
defm PARAMPRED : PTX_PARAM_LD_ST<".pred", RegPred>;
defm PARAMU16 : PTX_PARAM_LD_ST<".u16", RegI16>;
defm PARAMU32 : PTX_PARAM_LD_ST<".u32", RegI32>;
defm PARAMU64 : PTX_PARAM_LD_ST<".u64", RegI64>;
defm PARAMF32 : PTX_PARAM_LD_ST<".f32", RegF32>;
defm PARAMF64 : PTX_PARAM_LD_ST<".f64", RegF64>;
// Local loads/stores
defm LOCALPRED : PTX_LOCAL_LD_ST<".pred", RegPred>;
defm LOCALU16 : PTX_LOCAL_LD_ST<".u16", RegI16>;
defm LOCALU32 : PTX_LOCAL_LD_ST<".u32", RegI32>;
defm LOCALU64 : PTX_LOCAL_LD_ST<".u64", RegI64>;
defm LOCALF32 : PTX_LOCAL_LD_ST<".f32", RegF32>;
defm LOCALF64 : PTX_LOCAL_LD_ST<".f64", RegF64>;

View File

@ -1,110 +0,0 @@
//===-- PTXIntrinsicInstrInfo.td - Defines PTX intrinsics --*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines all of the PTX-specific intrinsic instructions.
//
//===----------------------------------------------------------------------===//
// PTX Special Purpose Register Accessor Intrinsics
class PTX_READ_SPECIAL_REGISTER_R64<string regname, Intrinsic intop>
: InstPTX<(outs RegI64:$d), (ins),
!strconcat("mov.u64\t$d, %", regname),
[(set RegI64:$d, (intop))]>;
class PTX_READ_SPECIAL_REGISTER_R32<string regname, Intrinsic intop>
: InstPTX<(outs RegI32:$d), (ins),
!strconcat("mov.u32\t$d, %", regname),
[(set RegI32:$d, (intop))]>;
// TODO Add read vector-version of special registers
//def PTX_READ_TID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"tid",
// int_ptx_read_tid_r64>;
def PTX_READ_TID_X : PTX_READ_SPECIAL_REGISTER_R32<"tid.x",
int_ptx_read_tid_x>;
def PTX_READ_TID_Y : PTX_READ_SPECIAL_REGISTER_R32<"tid.y",
int_ptx_read_tid_y>;
def PTX_READ_TID_Z : PTX_READ_SPECIAL_REGISTER_R32<"tid.z",
int_ptx_read_tid_z>;
def PTX_READ_TID_W : PTX_READ_SPECIAL_REGISTER_R32<"tid.w",
int_ptx_read_tid_w>;
//def PTX_READ_NTID_R64 : PTX_READ_SPECIAL_REGISTER_R64<"ntid",
// int_ptx_read_ntid_r64>;
def PTX_READ_NTID_X : PTX_READ_SPECIAL_REGISTER_R32<"ntid.x",
int_ptx_read_ntid_x>;
def PTX_READ_NTID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ntid.y",
int_ptx_read_ntid_y>;
def PTX_READ_NTID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ntid.z",
int_ptx_read_ntid_z>;
def PTX_READ_NTID_W : PTX_READ_SPECIAL_REGISTER_R32<"ntid.w",
int_ptx_read_ntid_w>;
def PTX_READ_LANEID : PTX_READ_SPECIAL_REGISTER_R32<"laneid",
int_ptx_read_laneid>;
def PTX_READ_WARPID : PTX_READ_SPECIAL_REGISTER_R32<"warpid",
int_ptx_read_warpid>;
def PTX_READ_NWARPID : PTX_READ_SPECIAL_REGISTER_R32<"nwarpid",
int_ptx_read_nwarpid>;
//def PTX_READ_CTAID_R64 :
//PTX_READ_SPECIAL_REGISTER_R64<"ctaid", int_ptx_read_ctaid_r64>;
def PTX_READ_CTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.x",
int_ptx_read_ctaid_x>;
def PTX_READ_CTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.y",
int_ptx_read_ctaid_y>;
def PTX_READ_CTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.z",
int_ptx_read_ctaid_z>;
def PTX_READ_CTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"ctaid.w",
int_ptx_read_ctaid_w>;
//def PTX_READ_NCTAID_R64 :
//PTX_READ_SPECIAL_REGISTER_R64<"nctaid", int_ptx_read_nctaid_r64>;
def PTX_READ_NCTAID_X : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.x",
int_ptx_read_nctaid_x>;
def PTX_READ_NCTAID_Y : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.y",
int_ptx_read_nctaid_y>;
def PTX_READ_NCTAID_Z : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.z",
int_ptx_read_nctaid_z>;
def PTX_READ_NCTAID_W : PTX_READ_SPECIAL_REGISTER_R32<"nctaid.w",
int_ptx_read_nctaid_w>;
def PTX_READ_SMID : PTX_READ_SPECIAL_REGISTER_R32<"smid",
int_ptx_read_smid>;
def PTX_READ_NSMID : PTX_READ_SPECIAL_REGISTER_R32<"nsmid",
int_ptx_read_nsmid>;
def PTX_READ_GRIDID : PTX_READ_SPECIAL_REGISTER_R32<"gridid",
int_ptx_read_gridid>;
def PTX_READ_LANEMASK_EQ
: PTX_READ_SPECIAL_REGISTER_R32<"lanemask_eq", int_ptx_read_lanemask_eq>;
def PTX_READ_LANEMASK_LE
: PTX_READ_SPECIAL_REGISTER_R32<"lanemask_le", int_ptx_read_lanemask_le>;
def PTX_READ_LANEMASK_LT
: PTX_READ_SPECIAL_REGISTER_R32<"lanemask_lt", int_ptx_read_lanemask_lt>;
def PTX_READ_LANEMASK_GE
: PTX_READ_SPECIAL_REGISTER_R32<"lanemask_ge", int_ptx_read_lanemask_ge>;
def PTX_READ_LANEMASK_GT
: PTX_READ_SPECIAL_REGISTER_R32<"lanemask_gt", int_ptx_read_lanemask_gt>;
def PTX_READ_CLOCK
: PTX_READ_SPECIAL_REGISTER_R32<"clock", int_ptx_read_clock>;
def PTX_READ_CLOCK64
: PTX_READ_SPECIAL_REGISTER_R64<"clock64", int_ptx_read_clock64>;
def PTX_READ_PM0 : PTX_READ_SPECIAL_REGISTER_R32<"pm0", int_ptx_read_pm0>;
def PTX_READ_PM1 : PTX_READ_SPECIAL_REGISTER_R32<"pm1", int_ptx_read_pm1>;
def PTX_READ_PM2 : PTX_READ_SPECIAL_REGISTER_R32<"pm2", int_ptx_read_pm2>;
def PTX_READ_PM3 : PTX_READ_SPECIAL_REGISTER_R32<"pm3", int_ptx_read_pm3>;
// PTX Parallel Synchronization and Communication Intrinsics
def PTX_BAR_SYNC : InstPTX<(outs), (ins i32imm:$i), "bar.sync\t$i",
[(int_ptx_bar_sync imm:$i)]>;

View File

@ -1,556 +0,0 @@
//===-- PTXMCAsmStreamer.cpp - PTX Text Assembly Output -------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "llvm/ADT/OwningPtr.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Twine.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCContext.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstPrinter.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/Format.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/PathV2.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
namespace {
class PTXMCAsmStreamer : public MCStreamer {
formatted_raw_ostream &OS;
const MCAsmInfo &MAI;
OwningPtr<MCInstPrinter> InstPrinter;
OwningPtr<MCCodeEmitter> Emitter;
SmallString<128> CommentToEmit;
raw_svector_ostream CommentStream;
unsigned IsVerboseAsm : 1;
unsigned ShowInst : 1;
public:
PTXMCAsmStreamer(MCContext &Context,
formatted_raw_ostream &os,
bool isVerboseAsm, bool useLoc,
MCInstPrinter *printer,
MCCodeEmitter *emitter,
bool showInst)
: MCStreamer(Context), OS(os), MAI(Context.getAsmInfo()),
InstPrinter(printer), Emitter(emitter), CommentStream(CommentToEmit),
IsVerboseAsm(isVerboseAsm),
ShowInst(showInst) {
if (InstPrinter && IsVerboseAsm)
InstPrinter->setCommentStream(CommentStream);
}
~PTXMCAsmStreamer() {}
inline void EmitEOL() {
// If we don't have any comments, just emit a \n.
if (!IsVerboseAsm) {
OS << '\n';
return;
}
EmitCommentsAndEOL();
}
void EmitCommentsAndEOL();
/// isVerboseAsm - Return true if this streamer supports verbose assembly at
/// all.
virtual bool isVerboseAsm() const { return IsVerboseAsm; }
/// hasRawTextSupport - We support EmitRawText.
virtual bool hasRawTextSupport() const { return true; }
/// AddComment - Add a comment that can be emitted to the generated .s
/// file if applicable as a QoI issue to make the output of the compiler
/// more readable. This only affects the MCAsmStreamer, and only when
/// verbose assembly output is enabled.
virtual void AddComment(const Twine &T);
/// AddEncodingComment - Add a comment showing the encoding of an instruction.
virtual void AddEncodingComment(const MCInst &Inst);
/// GetCommentOS - Return a raw_ostream that comments can be written to.
/// Unlike AddComment, you are required to terminate comments with \n if you
/// use this method.
virtual raw_ostream &GetCommentOS() {
if (!IsVerboseAsm)
return nulls(); // Discard comments unless in verbose asm mode.
return CommentStream;
}
/// AddBlankLine - Emit a blank line to a .s file to pretty it up.
virtual void AddBlankLine() {
EmitEOL();
}
/// @name MCStreamer Interface
/// @{
virtual void ChangeSection(const MCSection *Section);
virtual void InitSections() { /* PTX does not use sections */ }
virtual void EmitLabel(MCSymbol *Symbol);
virtual void EmitAssemblerFlag(MCAssemblerFlag Flag);
virtual void EmitThumbFunc(MCSymbol *Func);
virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value);
virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol);
virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta,
const MCSymbol *LastLabel,
const MCSymbol *Label,
unsigned PointerSize);
virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute);
virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue);
virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol);
virtual void EmitCOFFSymbolStorageClass(int StorageClass);
virtual void EmitCOFFSymbolType(int Type);
virtual void EndCOFFSymbolDef();
virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value);
virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment);
/// EmitLocalCommonSymbol - Emit a local common (.lcomm) symbol.
///
/// @param Symbol - The common symbol to emit.
/// @param Size - The size of the common symbol.
/// @param ByteAlignment - The alignment of the common symbol in bytes.
virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment);
virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
unsigned Size = 0, unsigned ByteAlignment = 0);
virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol,
uint64_t Size, unsigned ByteAlignment = 0);
virtual void EmitBytes(StringRef Data, unsigned AddrSpace);
virtual void EmitValueImpl(const MCExpr *Value, unsigned Size,
unsigned AddrSpace);
virtual void EmitULEB128Value(const MCExpr *Value);
virtual void EmitSLEB128Value(const MCExpr *Value);
virtual void EmitGPRel32Value(const MCExpr *Value);
virtual void EmitFill(uint64_t NumBytes, uint8_t FillValue,
unsigned AddrSpace);
virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
unsigned ValueSize = 1,
unsigned MaxBytesToEmit = 0);
virtual void EmitCodeAlignment(unsigned ByteAlignment,
unsigned MaxBytesToEmit = 0);
virtual bool EmitValueToOffset(const MCExpr *Offset,
unsigned char Value = 0);
virtual void EmitFileDirective(StringRef Filename);
virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Directory,
StringRef Filename);
virtual void EmitInstruction(const MCInst &Inst);
/// EmitRawText - If this file is backed by an assembly streamer, this dumps
/// the specified string in the output .s file. This capability is
/// indicated by the hasRawTextSupport() predicate.
virtual void EmitRawText(StringRef String);
virtual void FinishImpl();
/// @}
}; // class PTXMCAsmStreamer
}
/// TODO: Add appropriate implementation of Emit*() methods when needed
void PTXMCAsmStreamer::AddComment(const Twine &T) {
if (!IsVerboseAsm) return;
// Make sure that CommentStream is flushed.
CommentStream.flush();
T.toVector(CommentToEmit);
// Each comment goes on its own line.
CommentToEmit.push_back('\n');
// Tell the comment stream that the vector changed underneath it.
CommentStream.resync();
}
void PTXMCAsmStreamer::EmitCommentsAndEOL() {
if (CommentToEmit.empty() && CommentStream.GetNumBytesInBuffer() == 0) {
OS << '\n';
return;
}
CommentStream.flush();
StringRef Comments = CommentToEmit.str();
assert(Comments.back() == '\n' &&
"Comment array not newline terminated");
do {
// Emit a line of comments.
OS.PadToColumn(MAI.getCommentColumn());
size_t Position = Comments.find('\n');
OS << MAI.getCommentString() << ' ' << Comments.substr(0, Position) << '\n';
Comments = Comments.substr(Position+1);
} while (!Comments.empty());
CommentToEmit.clear();
// Tell the comment stream that the vector changed underneath it.
CommentStream.resync();
}
static inline int64_t truncateToSize(int64_t Value, unsigned Bytes) {
assert(Bytes && "Invalid size!");
return Value & ((uint64_t) (int64_t) -1 >> (64 - Bytes * 8));
}
void PTXMCAsmStreamer::ChangeSection(const MCSection *Section) {
assert(Section && "Cannot switch to a null section!");
}
void PTXMCAsmStreamer::EmitLabel(MCSymbol *Symbol) {
assert(Symbol->isUndefined() && "Cannot define a symbol twice!");
assert(!Symbol->isVariable() && "Cannot emit a variable symbol!");
assert(getCurrentSection() && "Cannot emit before setting section!");
OS << *Symbol << MAI.getLabelSuffix();
EmitEOL();
Symbol->setSection(*getCurrentSection());
}
void PTXMCAsmStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) {}
void PTXMCAsmStreamer::EmitThumbFunc(MCSymbol *Func) {}
void PTXMCAsmStreamer::EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) {
OS << *Symbol << " = " << *Value;
EmitEOL();
// FIXME: Lift context changes into super class.
Symbol->setVariableValue(Value);
}
void PTXMCAsmStreamer::EmitWeakReference(MCSymbol *Alias,
const MCSymbol *Symbol) {
OS << ".weakref " << *Alias << ", " << *Symbol;
EmitEOL();
}
void PTXMCAsmStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta,
const MCSymbol *LastLabel,
const MCSymbol *Label,
unsigned PointerSize) {
report_fatal_error("Unimplemented.");
}
void PTXMCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol,
MCSymbolAttr Attribute) {}
void PTXMCAsmStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {}
void PTXMCAsmStreamer::BeginCOFFSymbolDef(const MCSymbol *Symbol) {}
void PTXMCAsmStreamer::EmitCOFFSymbolStorageClass (int StorageClass) {}
void PTXMCAsmStreamer::EmitCOFFSymbolType (int Type) {}
void PTXMCAsmStreamer::EndCOFFSymbolDef() {}
void PTXMCAsmStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {}
void PTXMCAsmStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment) {}
void PTXMCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size,
unsigned ByteAlignment) {}
void PTXMCAsmStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
unsigned Size, unsigned ByteAlignment) {}
void PTXMCAsmStreamer::EmitTBSSSymbol(const MCSection *Section,
MCSymbol *Symbol,
uint64_t Size, unsigned ByteAlignment) {}
static inline char toOctal(int X) { return (X&7)+'0'; }
static void PrintQuotedString(StringRef Data, raw_ostream &OS) {
OS << '"';
for (unsigned i = 0, e = Data.size(); i != e; ++i) {
unsigned char C = Data[i];
if (C == '"' || C == '\\') {
OS << '\\' << (char)C;
continue;
}
if (isprint((unsigned char)C)) {
OS << (char)C;
continue;
}
switch (C) {
case '\b': OS << "\\b"; break;
case '\f': OS << "\\f"; break;
case '\n': OS << "\\n"; break;
case '\r': OS << "\\r"; break;
case '\t': OS << "\\t"; break;
default:
OS << '\\';
OS << toOctal(C >> 6);
OS << toOctal(C >> 3);
OS << toOctal(C >> 0);
break;
}
}
OS << '"';
}
void PTXMCAsmStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
assert(getCurrentSection() && "Cannot emit contents before setting section!");
if (Data.empty()) return;
if (Data.size() == 1) {
OS << MAI.getData8bitsDirective(AddrSpace);
OS << (unsigned)(unsigned char)Data[0];
EmitEOL();
return;
}
// If the data ends with 0 and the target supports .asciz, use it, otherwise
// use .ascii
if (MAI.getAscizDirective() && Data.back() == 0) {
OS << MAI.getAscizDirective();
Data = Data.substr(0, Data.size()-1);
} else {
OS << MAI.getAsciiDirective();
}
OS << ' ';
PrintQuotedString(Data, OS);
EmitEOL();
}
void PTXMCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size,
unsigned AddrSpace) {
assert(getCurrentSection() && "Cannot emit contents before setting section!");
const char *Directive = 0;
switch (Size) {
default: break;
case 1: Directive = MAI.getData8bitsDirective(AddrSpace); break;
case 2: Directive = MAI.getData16bitsDirective(AddrSpace); break;
case 4: Directive = MAI.getData32bitsDirective(AddrSpace); break;
case 8:
Directive = MAI.getData64bitsDirective(AddrSpace);
// If the target doesn't support 64-bit data, emit as two 32-bit halves.
if (Directive) break;
int64_t IntValue;
if (!Value->EvaluateAsAbsolute(IntValue))
report_fatal_error("Don't know how to emit this value.");
if (getContext().getAsmInfo().isLittleEndian()) {
EmitIntValue((uint32_t)(IntValue >> 0 ), 4, AddrSpace);
EmitIntValue((uint32_t)(IntValue >> 32), 4, AddrSpace);
} else {
EmitIntValue((uint32_t)(IntValue >> 32), 4, AddrSpace);
EmitIntValue((uint32_t)(IntValue >> 0 ), 4, AddrSpace);
}
return;
}
assert(Directive && "Invalid size for machine code value!");
OS << Directive << *Value;
EmitEOL();
}
void PTXMCAsmStreamer::EmitULEB128Value(const MCExpr *Value) {
assert(MAI.hasLEB128() && "Cannot print a .uleb");
OS << ".uleb128 " << *Value;
EmitEOL();
}
void PTXMCAsmStreamer::EmitSLEB128Value(const MCExpr *Value) {
assert(MAI.hasLEB128() && "Cannot print a .sleb");
OS << ".sleb128 " << *Value;
EmitEOL();
}
void PTXMCAsmStreamer::EmitGPRel32Value(const MCExpr *Value) {
assert(MAI.getGPRel32Directive() != 0);
OS << MAI.getGPRel32Directive() << *Value;
EmitEOL();
}
/// EmitFill - Emit NumBytes bytes worth of the value specified by
/// FillValue. This implements directives such as '.space'.
void PTXMCAsmStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue,
unsigned AddrSpace) {
if (NumBytes == 0) return;
if (AddrSpace == 0)
if (const char *ZeroDirective = MAI.getZeroDirective()) {
OS << ZeroDirective << NumBytes;
if (FillValue != 0)
OS << ',' << (int)FillValue;
EmitEOL();
return;
}
// Emit a byte at a time.
MCStreamer::EmitFill(NumBytes, FillValue, AddrSpace);
}
void PTXMCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment,
int64_t Value,
unsigned ValueSize,
unsigned MaxBytesToEmit) {
// Some assemblers don't support non-power of two alignments, so we always
// emit alignments as a power of two if possible.
if (isPowerOf2_32(ByteAlignment)) {
switch (ValueSize) {
default: llvm_unreachable("Invalid size for machine code value!");
case 1: OS << MAI.getAlignDirective(); break;
// FIXME: use MAI for this!
case 2: OS << ".p2alignw "; break;
case 4: OS << ".p2alignl "; break;
case 8: llvm_unreachable("Unsupported alignment size!");
}
if (MAI.getAlignmentIsInBytes())
OS << ByteAlignment;
else
OS << Log2_32(ByteAlignment);
if (Value || MaxBytesToEmit) {
OS << ", 0x";
OS.write_hex(truncateToSize(Value, ValueSize));
if (MaxBytesToEmit)
OS << ", " << MaxBytesToEmit;
}
EmitEOL();
return;
}
// Non-power of two alignment. This is not widely supported by assemblers.
// FIXME: Parameterize this based on MAI.
switch (ValueSize) {
default: llvm_unreachable("Invalid size for machine code value!");
case 1: OS << ".balign"; break;
case 2: OS << ".balignw"; break;
case 4: OS << ".balignl"; break;
case 8: llvm_unreachable("Unsupported alignment size!");
}
OS << ' ' << ByteAlignment;
OS << ", " << truncateToSize(Value, ValueSize);
if (MaxBytesToEmit)
OS << ", " << MaxBytesToEmit;
EmitEOL();
}
void PTXMCAsmStreamer::EmitCodeAlignment(unsigned ByteAlignment,
unsigned MaxBytesToEmit) {}
bool PTXMCAsmStreamer::EmitValueToOffset(const MCExpr *Offset,
unsigned char Value) {return false;}
void PTXMCAsmStreamer::EmitFileDirective(StringRef Filename) {
assert(MAI.hasSingleParameterDotFile());
OS << "\t.file\t";
PrintQuotedString(Filename, OS);
EmitEOL();
}
// FIXME: should we inherit from MCAsmStreamer?
bool PTXMCAsmStreamer::EmitDwarfFileDirective(unsigned FileNo,
StringRef Directory,
StringRef Filename) {
if (!Directory.empty()) {
if (sys::path::is_absolute(Filename))
return EmitDwarfFileDirective(FileNo, "", Filename);
SmallString<128> FullPathName = Directory;
sys::path::append(FullPathName, Filename);
return EmitDwarfFileDirective(FileNo, "", FullPathName);
}
OS << "\t.file\t" << FileNo << ' ';
PrintQuotedString(Filename, OS);
EmitEOL();
return this->MCStreamer::EmitDwarfFileDirective(FileNo, Directory, Filename);
}
void PTXMCAsmStreamer::AddEncodingComment(const MCInst &Inst) {}
void PTXMCAsmStreamer::EmitInstruction(const MCInst &Inst) {
assert(getCurrentSection() && "Cannot emit contents before setting section!");
// Show the encoding in a comment if we have a code emitter.
if (Emitter)
AddEncodingComment(Inst);
// Show the MCInst if enabled.
if (ShowInst) {
Inst.dump_pretty(GetCommentOS(), &MAI, InstPrinter.get(), "\n ");
GetCommentOS() << "\n";
}
// If we have an AsmPrinter, use that to print, otherwise print the MCInst.
if (InstPrinter)
InstPrinter->printInst(&Inst, OS, "");
else
Inst.print(OS, &MAI);
EmitEOL();
}
/// EmitRawText - If this file is backed by an assembly streamer, this dumps
/// the specified string in the output .s file. This capability is
/// indicated by the hasRawTextSupport() predicate.
void PTXMCAsmStreamer::EmitRawText(StringRef String) {
if (!String.empty() && String.back() == '\n')
String = String.substr(0, String.size()-1);
OS << String;
EmitEOL();
}
void PTXMCAsmStreamer::FinishImpl() {}
namespace llvm {
MCStreamer *createPTXAsmStreamer(MCContext &Context,
formatted_raw_ostream &OS,
bool isVerboseAsm, bool useLoc, bool useCFI,
bool useDwarfDirectory,
MCInstPrinter *IP,
MCCodeEmitter *CE, MCAsmBackend *MAB,
bool ShowInst) {
return new PTXMCAsmStreamer(Context, OS, isVerboseAsm, useLoc,
IP, CE, ShowInst);
}
}

View File

@ -1,32 +0,0 @@
//===-- PTXMCInstLower.cpp - Convert PTX MachineInstr to an MCInst --------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains code to lower PTX MachineInstrs to their corresponding
// MCInst records.
//
//===----------------------------------------------------------------------===//
#include "PTX.h"
#include "PTXAsmPrinter.h"
#include "llvm/Constants.h"
#include "llvm/CodeGen/MachineBasicBlock.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/Target/Mangler.h"
void llvm::LowerPTXMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
PTXAsmPrinter &AP) {
OutMI.setOpcode(MI->getOpcode());
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
MCOperand MCOp;
OutMI.addOperand(AP.lowerOperand(MO));
}
}

View File

@ -1,85 +0,0 @@
//===-- PTXMFInfoExtract.cpp - Extract PTX machine function info ----------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines an information extractor for PTX machine functions.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "ptx-mf-info-extract"
#include "PTX.h"
#include "PTXTargetMachine.h"
#include "PTXMachineFunctionInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
// NOTE: PTXMFInfoExtract must after register allocation!
namespace {
/// PTXMFInfoExtract - PTX specific code to extract of PTX machine
/// function information for PTXAsmPrinter
///
class PTXMFInfoExtract : public MachineFunctionPass {
private:
static char ID;
public:
PTXMFInfoExtract(PTXTargetMachine &TM, CodeGenOpt::Level OptLevel)
: MachineFunctionPass(ID) {}
virtual bool runOnMachineFunction(MachineFunction &MF);
virtual const char *getPassName() const {
return "PTX Machine Function Info Extractor";
}
}; // class PTXMFInfoExtract
} // end anonymous namespace
using namespace llvm;
char PTXMFInfoExtract::ID = 0;
bool PTXMFInfoExtract::runOnMachineFunction(MachineFunction &MF) {
PTXMachineFunctionInfo *MFI = MF.getInfo<PTXMachineFunctionInfo>();
MachineRegisterInfo &MRI = MF.getRegInfo();
// Generate list of all virtual registers used in this function
for (unsigned i = 0; i < MRI.getNumVirtRegs(); ++i) {
unsigned Reg = TargetRegisterInfo::index2VirtReg(i);
const TargetRegisterClass *TRC = MRI.getRegClass(Reg);
unsigned RegType;
if (TRC == &PTX::RegPredRegClass)
RegType = PTXRegisterType::Pred;
else if (TRC == &PTX::RegI16RegClass)
RegType = PTXRegisterType::B16;
else if (TRC == &PTX::RegI32RegClass)
RegType = PTXRegisterType::B32;
else if (TRC == &PTX::RegI64RegClass)
RegType = PTXRegisterType::B64;
else if (TRC == &PTX::RegF32RegClass)
RegType = PTXRegisterType::F32;
else if (TRC == &PTX::RegF64RegClass)
RegType = PTXRegisterType::F64;
else
llvm_unreachable("Unkown register class.");
MFI->addRegister(Reg, RegType, PTXRegisterSpace::Reg);
}
return false;
}
FunctionPass *llvm::createPTXMFInfoExtract(PTXTargetMachine &TM,
CodeGenOpt::Level OptLevel) {
return new PTXMFInfoExtract(TM, OptLevel);
}

View File

@ -1,14 +0,0 @@
//===-- PTXMachineFuctionInfo.cpp - PTX machine function info -------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "PTXMachineFunctionInfo.h"
using namespace llvm;
void PTXMachineFunctionInfo::anchor() { }

View File

@ -1,202 +0,0 @@
//===-- PTXMachineFuctionInfo.h - PTX machine function info ------*- C++ -*-==//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file declares PTX-specific per-machine-function information.
//
//===----------------------------------------------------------------------===//
#ifndef PTX_MACHINE_FUNCTION_INFO_H
#define PTX_MACHINE_FUNCTION_INFO_H
#include "PTX.h"
#include "PTXParamManager.h"
#include "PTXRegisterInfo.h"
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
namespace llvm {
/// PTXMachineFunctionInfo - This class is derived from MachineFunction and
/// contains private PTX target-specific information for each MachineFunction.
///
class PTXMachineFunctionInfo : public MachineFunctionInfo {
virtual void anchor();
bool IsKernel;
DenseSet<unsigned> RegArgs;
DenseSet<unsigned> RegRets;
typedef DenseMap<int, std::string> FrameMap;
FrameMap FrameSymbols;
struct RegisterInfo {
unsigned Reg;
unsigned Type;
unsigned Space;
unsigned Offset;
unsigned Encoded;
};
typedef DenseMap<unsigned, RegisterInfo> RegisterInfoMap;
RegisterInfoMap RegInfo;
PTXParamManager ParamManager;
public:
typedef DenseSet<unsigned>::const_iterator reg_iterator;
PTXMachineFunctionInfo(MachineFunction &MF)
: IsKernel(false) {
}
/// getParamManager - Returns the PTXParamManager instance for this function.
PTXParamManager& getParamManager() { return ParamManager; }
const PTXParamManager& getParamManager() const { return ParamManager; }
/// setKernel/isKernel - Gets/sets a flag that indicates if this function is
/// a PTX kernel function.
void setKernel(bool _IsKernel=true) { IsKernel = _IsKernel; }
bool isKernel() const { return IsKernel; }
/// argreg_begin/argreg_end - Returns iterators to the set of registers
/// containing function arguments.
reg_iterator argreg_begin() const { return RegArgs.begin(); }
reg_iterator argreg_end() const { return RegArgs.end(); }
/// retreg_begin/retreg_end - Returns iterators to the set of registers
/// containing the function return values.
reg_iterator retreg_begin() const { return RegRets.begin(); }
reg_iterator retreg_end() const { return RegRets.end(); }
/// addRegister - Adds a virtual register to the set of all used registers
void addRegister(unsigned Reg, unsigned RegType, unsigned RegSpace) {
if (!RegInfo.count(Reg)) {
RegisterInfo Info;
Info.Reg = Reg;
Info.Type = RegType;
Info.Space = RegSpace;
// Determine register offset
Info.Offset = 0;
for(RegisterInfoMap::const_iterator i = RegInfo.begin(),
e = RegInfo.end(); i != e; ++i) {
const RegisterInfo& RI = i->second;
if (RI.Space == RegSpace)
if (RI.Space != PTXRegisterSpace::Reg || RI.Type == Info.Type)
Info.Offset++;
}
// Encode the register data into a single register number
Info.Encoded = (Info.Offset << 6) | (Info.Type << 3) | Info.Space;
RegInfo[Reg] = Info;
if (RegSpace == PTXRegisterSpace::Argument)
RegArgs.insert(Reg);
else if (RegSpace == PTXRegisterSpace::Return)
RegRets.insert(Reg);
}
}
/// countRegisters - Returns the number of registers of the given type and
/// space.
unsigned countRegisters(unsigned RegType, unsigned RegSpace) const {
unsigned Count = 0;
for(RegisterInfoMap::const_iterator i = RegInfo.begin(), e = RegInfo.end();
i != e; ++i) {
const RegisterInfo& RI = i->second;
if (RI.Type == RegType && RI.Space == RegSpace)
Count++;
}
return Count;
}
/// getEncodedRegister - Returns the encoded value of the register.
unsigned getEncodedRegister(unsigned Reg) const {
return RegInfo.lookup(Reg).Encoded;
}
/// addRetReg - Adds a register to the set of return-value registers.
void addRetReg(unsigned Reg) {
if (!RegRets.count(Reg)) {
RegRets.insert(Reg);
}
}
/// addArgReg - Adds a register to the set of function argument registers.
void addArgReg(unsigned Reg) {
RegArgs.insert(Reg);
}
/// getRegisterName - Returns the name of the specified virtual register. This
/// name is used during PTX emission.
std::string getRegisterName(unsigned Reg) const {
if (RegInfo.count(Reg)) {
const RegisterInfo& RI = RegInfo.lookup(Reg);
std::string Name;
raw_string_ostream NameStr(Name);
decodeRegisterName(NameStr, RI.Encoded);
NameStr.flush();
return Name;
}
else if (Reg == PTX::NoRegister)
return "%noreg";
else
llvm_unreachable("Register not in register name map");
}
/// getEncodedRegisterName - Returns the name of the encoded register.
std::string getEncodedRegisterName(unsigned EncodedReg) const {
std::string Name;
raw_string_ostream NameStr(Name);
decodeRegisterName(NameStr, EncodedReg);
NameStr.flush();
return Name;
}
/// getRegisterType - Returns the type of the specified virtual register.
unsigned getRegisterType(unsigned Reg) const {
if (RegInfo.count(Reg))
return RegInfo.lookup(Reg).Type;
else
llvm_unreachable("Unknown register");
}
/// getOffsetForRegister - Returns the offset of the virtual register
unsigned getOffsetForRegister(unsigned Reg) const {
if (RegInfo.count(Reg))
return RegInfo.lookup(Reg).Offset;
else
return 0;
}
/// getFrameSymbol - Returns the symbol name for the given FrameIndex.
const char* getFrameSymbol(int FrameIndex) {
if (FrameSymbols.count(FrameIndex)) {
return FrameSymbols.lookup(FrameIndex).c_str();
} else {
std::string Name = "__local";
Name += utostr(FrameIndex);
// The whole point of caching this name is to ensure the pointer we pass
// to any getExternalSymbol() calls will remain valid for the lifetime of
// the back-end instance. This is to work around an issue in SelectionDAG
// where symbol names are expected to be life-long strings.
FrameSymbols[FrameIndex] = Name;
return FrameSymbols[FrameIndex].c_str();
}
}
}; // class PTXMachineFunctionInfo
} // namespace llvm
#endif // PTX_MACHINE_FUNCTION_INFO_H

View File

@ -1,73 +0,0 @@
//===-- PTXParamManager.cpp - Manager for .param variables ----------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the PTXParamManager class.
//
//===----------------------------------------------------------------------===//
#include "PTXParamManager.h"
#include "PTX.h"
#include "llvm/ADT/StringExtras.h"
using namespace llvm;
PTXParamManager::PTXParamManager() {
}
unsigned PTXParamManager::addArgumentParam(unsigned Size) {
PTXParam Param;
Param.Type = PTX_PARAM_TYPE_ARGUMENT;
Param.Size = Size;
std::string Name;
Name = "__param_";
Name += utostr(ArgumentParams.size()+1);
Param.Name = Name;
unsigned Index = AllParams.size();
AllParams[Index] = Param;
ArgumentParams.push_back(Index);
return Index;
}
unsigned PTXParamManager::addReturnParam(unsigned Size) {
PTXParam Param;
Param.Type = PTX_PARAM_TYPE_RETURN;
Param.Size = Size;
std::string Name;
Name = "__ret_";
Name += utostr(ReturnParams.size()+1);
Param.Name = Name;
unsigned Index = AllParams.size();
AllParams[Index] = Param;
ReturnParams.push_back(Index);
return Index;
}
unsigned PTXParamManager::addLocalParam(unsigned Size) {
PTXParam Param;
Param.Type = PTX_PARAM_TYPE_LOCAL;
Param.Size = Size;
std::string Name;
Name = "__localparam_";
Name += utostr(LocalParams.size()+1);
Param.Name = Name;
unsigned Index = AllParams.size();
AllParams[Index] = Param;
LocalParams.push_back(Index);
return Index;
}

View File

@ -1,87 +0,0 @@
//===-- PTXParamManager.h - Manager for .param variables --------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines the PTXParamManager class, which manages all defined .param
// variables for a particular function.
//
//===----------------------------------------------------------------------===//
#ifndef PTX_PARAM_MANAGER_H
#define PTX_PARAM_MANAGER_H
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallVector.h"
#include <string>
namespace llvm {
/// PTXParamManager - This class manages all .param variables defined for a
/// particular function.
class PTXParamManager {
private:
/// PTXParamType - Type of a .param variable
enum PTXParamType {
PTX_PARAM_TYPE_ARGUMENT,
PTX_PARAM_TYPE_RETURN,
PTX_PARAM_TYPE_LOCAL
};
/// PTXParam - Definition of a PTX .param variable
struct PTXParam {
PTXParamType Type;
unsigned Size;
std::string Name;
};
DenseMap<unsigned, PTXParam> AllParams;
SmallVector<unsigned, 4> ArgumentParams;
SmallVector<unsigned, 4> ReturnParams;
SmallVector<unsigned, 4> LocalParams;
public:
typedef SmallVector<unsigned, 4>::const_iterator param_iterator;
PTXParamManager();
param_iterator arg_begin() const { return ArgumentParams.begin(); }
param_iterator arg_end() const { return ArgumentParams.end(); }
param_iterator ret_begin() const { return ReturnParams.begin(); }
param_iterator ret_end() const { return ReturnParams.end(); }
param_iterator local_begin() const { return LocalParams.begin(); }
param_iterator local_end() const { return LocalParams.end(); }
/// addArgumentParam - Returns a new .param used as an argument.
unsigned addArgumentParam(unsigned Size);
/// addReturnParam - Returns a new .param used as a return argument.
unsigned addReturnParam(unsigned Size);
/// addLocalParam - Returns a new .param used as a local .param variable.
unsigned addLocalParam(unsigned Size);
/// getParamName - Returns the name of the parameter as a string.
const std::string &getParamName(unsigned Param) const {
assert(AllParams.count(Param) == 1 && "Param has not been defined!");
return AllParams.find(Param)->second.Name;
}
/// getParamSize - Returns the size of the parameter in bits.
unsigned getParamSize(unsigned Param) const {
assert(AllParams.count(Param) == 1 && "Param has not been defined!");
return AllParams.find(Param)->second.Size;
}
};
}
#endif

View File

@ -1,53 +0,0 @@
//===-- PTXRegAlloc.cpp - PTX Register Allocator --------------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains a register allocator for PTX code.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "ptx-reg-alloc"
#include "PTX.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/RegAllocRegistry.h"
using namespace llvm;
namespace {
// Special register allocator for PTX.
class PTXRegAlloc : public MachineFunctionPass {
public:
static char ID;
PTXRegAlloc() : MachineFunctionPass(ID) {}
virtual const char* getPassName() const {
return "PTX Register Allocator";
}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
virtual bool runOnMachineFunction(MachineFunction &MF) {
// We do not actually do anything (at least not yet).
return false;
}
};
char PTXRegAlloc::ID = 0;
static RegisterRegAlloc
ptxRegAlloc("ptx", "PTX register allocator", createPTXRegisterAllocator);
}
FunctionPass *llvm::createPTXRegisterAllocator() {
return new PTXRegAlloc();
}

View File

@ -1,38 +0,0 @@
//===-- PTXRegisterInfo.cpp - PTX Register Information --------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the PTX implementation of the TargetRegisterInfo class.
//
//===----------------------------------------------------------------------===//
#include "PTXRegisterInfo.h"
#include "PTX.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
#define GET_REGINFO_TARGET_DESC
#include "PTXGenRegisterInfo.inc"
using namespace llvm;
PTXRegisterInfo::PTXRegisterInfo(PTXTargetMachine &TM,
const TargetInstrInfo &tii)
// PTX does not have a return address register.
: PTXGenRegisterInfo(0), TII(tii) {
}
void PTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator /*II*/,
int /*SPAdj*/,
RegScavenger * /*RS*/) const {
llvm_unreachable("FrameIndex should have been previously eliminated!");
}

View File

@ -1,56 +0,0 @@
//===-- PTXRegisterInfo.h - PTX Register Information Impl -------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file contains the PTX implementation of the MRegisterInfo class.
//
//===----------------------------------------------------------------------===//
#ifndef PTX_REGISTER_INFO_H
#define PTX_REGISTER_INFO_H
#include "llvm/Support/ErrorHandling.h"
#include "llvm/ADT/BitVector.h"
#define GET_REGINFO_HEADER
#include "PTXGenRegisterInfo.inc"
namespace llvm {
class PTXTargetMachine;
class MachineFunction;
struct PTXRegisterInfo : public PTXGenRegisterInfo {
private:
const TargetInstrInfo &TII;
public:
PTXRegisterInfo(PTXTargetMachine &TM,
const TargetInstrInfo &tii);
virtual const uint16_t
*getCalleeSavedRegs(const MachineFunction *MF = 0) const {
static const uint16_t CalleeSavedRegs[] = { 0 };
return CalleeSavedRegs; // save nothing
}
virtual BitVector getReservedRegs(const MachineFunction &MF) const {
BitVector Reserved(getNumRegs());
return Reserved; // reserve no regs
}
virtual void eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj,
RegScavenger *RS = NULL) const;
virtual unsigned getFrameRegister(const MachineFunction &MF) const {
llvm_unreachable("PTX does not have a frame register");
}
}; // struct PTXRegisterInfo
} // namespace llvm
#endif // PTX_REGISTER_INFO_H

View File

@ -1,36 +0,0 @@
//===-- PTXRegisterInfo.td - PTX Register defs -------------*- tablegen -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//===----------------------------------------------------------------------===//
// Declarations that describe the PTX register file
//===----------------------------------------------------------------------===//
class PTXReg<string n> : Register<n> {
let Namespace = "PTX";
}
//===----------------------------------------------------------------------===//
// Registers
//===----------------------------------------------------------------------===//
// The generated register info code throws warnings for empty register classes
// (e.g. zero-length arrays), so we use a dummy register here just to prevent
// these warnings.
def DUMMY_REG : PTXReg<"R0">;
//===----------------------------------------------------------------------===//
// Register classes
//===----------------------------------------------------------------------===//
def RegPred : RegisterClass<"PTX", [i1], 8, (add DUMMY_REG)>;
def RegI16 : RegisterClass<"PTX", [i16], 16, (add DUMMY_REG)>;
def RegI32 : RegisterClass<"PTX", [i32], 32, (add DUMMY_REG)>;
def RegI64 : RegisterClass<"PTX", [i64], 64, (add DUMMY_REG)>;
def RegF32 : RegisterClass<"PTX", [f32], 32, (add DUMMY_REG)>;
def RegF64 : RegisterClass<"PTX", [f64], 64, (add DUMMY_REG)>;

View File

@ -1,150 +0,0 @@
//===-- PTXSelectionDAGInfo.cpp - PTX SelectionDAG Info -------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the PTXSelectionDAGInfo class.
//
//===----------------------------------------------------------------------===//
#define DEBUG_TYPE "ptx-selectiondag-info"
#include "PTXTargetMachine.h"
#include "llvm/DerivedTypes.h"
#include "llvm/CodeGen/SelectionDAG.h"
using namespace llvm;
PTXSelectionDAGInfo::PTXSelectionDAGInfo(const TargetMachine &TM)
: TargetSelectionDAGInfo(TM),
Subtarget(&TM.getSubtarget<PTXSubtarget>()) {
}
PTXSelectionDAGInfo::~PTXSelectionDAGInfo() {
}
SDValue
PTXSelectionDAGInfo::EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
SDValue Chain,
SDValue Dst, SDValue Src,
SDValue Size, unsigned Align,
bool isVolatile, bool AlwaysInline,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) const {
// Do repeated 4-byte loads and stores. To be improved.
// This requires 4-byte alignment.
if ((Align & 3) != 0)
return SDValue();
// This requires the copy size to be a constant, preferably
// within a subtarget-specific limit.
ConstantSDNode *ConstantSize = dyn_cast<ConstantSDNode>(Size);
if (!ConstantSize)
return SDValue();
uint64_t SizeVal = ConstantSize->getZExtValue();
// Always inline memcpys. In PTX, we do not have a C library that provides
// a memcpy function.
//if (!AlwaysInline)
// return SDValue();
unsigned BytesLeft = SizeVal & 3;
unsigned NumMemOps = SizeVal >> 2;
unsigned EmittedNumMemOps = 0;
EVT VT = MVT::i32;
unsigned VTSize = 4;
unsigned i = 0;
const unsigned MAX_LOADS_IN_LDM = 6;
SDValue TFOps[MAX_LOADS_IN_LDM];
SDValue Loads[MAX_LOADS_IN_LDM];
uint64_t SrcOff = 0, DstOff = 0;
EVT PointerType = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
// Emit up to MAX_LOADS_IN_LDM loads, then a TokenFactor barrier, then the
// same number of stores. The loads and stores will get combined into
// ldm/stm later on.
while (EmittedNumMemOps < NumMemOps) {
for (i = 0;
i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
Loads[i] = DAG.getLoad(VT, dl, Chain,
DAG.getNode(ISD::ADD, dl, PointerType, Src,
DAG.getConstant(SrcOff, PointerType)),
SrcPtrInfo.getWithOffset(SrcOff), isVolatile,
false, false, 0);
TFOps[i] = Loads[i].getValue(1);
SrcOff += VTSize;
}
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
for (i = 0;
i < MAX_LOADS_IN_LDM && EmittedNumMemOps + i < NumMemOps; ++i) {
TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
DAG.getNode(ISD::ADD, dl, PointerType, Dst,
DAG.getConstant(DstOff, PointerType)),
DstPtrInfo.getWithOffset(DstOff),
isVolatile, false, 0);
DstOff += VTSize;
}
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
EmittedNumMemOps += i;
}
if (BytesLeft == 0)
return Chain;
// Issue loads / stores for the trailing (1 - 3) bytes.
unsigned BytesLeftSave = BytesLeft;
i = 0;
while (BytesLeft) {
if (BytesLeft >= 2) {
VT = MVT::i16;
VTSize = 2;
} else {
VT = MVT::i8;
VTSize = 1;
}
Loads[i] = DAG.getLoad(VT, dl, Chain,
DAG.getNode(ISD::ADD, dl, PointerType, Src,
DAG.getConstant(SrcOff, PointerType)),
SrcPtrInfo.getWithOffset(SrcOff), false, false,
false, 0);
TFOps[i] = Loads[i].getValue(1);
++i;
SrcOff += VTSize;
BytesLeft -= VTSize;
}
Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
i = 0;
BytesLeft = BytesLeftSave;
while (BytesLeft) {
if (BytesLeft >= 2) {
VT = MVT::i16;
VTSize = 2;
} else {
VT = MVT::i8;
VTSize = 1;
}
TFOps[i] = DAG.getStore(Chain, dl, Loads[i],
DAG.getNode(ISD::ADD, dl, PointerType, Dst,
DAG.getConstant(DstOff, PointerType)),
DstPtrInfo.getWithOffset(DstOff), false, false, 0);
++i;
DstOff += VTSize;
BytesLeft -= VTSize;
}
return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &TFOps[0], i);
}
SDValue PTXSelectionDAGInfo::
EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
SDValue Chain, SDValue Dst,
SDValue Src, SDValue Size,
unsigned Align, bool isVolatile,
MachinePointerInfo DstPtrInfo) const {
llvm_unreachable("memset lowering not implemented for PTX yet");
}

View File

@ -1,53 +0,0 @@
//===-- PTXSelectionDAGInfo.h - PTX SelectionDAG Info -----------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file defines the PTX subclass for TargetSelectionDAGInfo.
//
//===----------------------------------------------------------------------===//
#ifndef PTXSELECTIONDAGINFO_H
#define PTXSELECTIONDAGINFO_H
#include "llvm/Target/TargetSelectionDAGInfo.h"
namespace llvm {
/// PTXSelectionDAGInfo - TargetSelectionDAGInfo sub-class for the PTX target.
/// At the moment, this is mostly just a copy of ARMSelectionDAGInfo.
class PTXSelectionDAGInfo : public TargetSelectionDAGInfo {
/// Subtarget - Keep a pointer to the PTXSubtarget around so that we can
/// make the right decision when generating code for different targets.
const PTXSubtarget *Subtarget;
public:
explicit PTXSelectionDAGInfo(const TargetMachine &TM);
~PTXSelectionDAGInfo();
virtual
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
SDValue Chain,
SDValue Dst, SDValue Src,
SDValue Size, unsigned Align,
bool isVolatile, bool AlwaysInline,
MachinePointerInfo DstPtrInfo,
MachinePointerInfo SrcPtrInfo) const;
virtual
SDValue EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
SDValue Chain,
SDValue Op1, SDValue Op2,
SDValue Op3, unsigned Align,
bool isVolatile,
MachinePointerInfo DstPtrInfo) const;
};
}
#endif

View File

@ -1,68 +0,0 @@
//===-- PTXSubtarget.cpp - PTX Subtarget Information ----------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file implements the PTX specific subclass of TargetSubtargetInfo.
//
//===----------------------------------------------------------------------===//
#include "PTXSubtarget.h"
#include "PTX.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/TargetRegistry.h"
#define GET_SUBTARGETINFO_TARGET_DESC
#define GET_SUBTARGETINFO_CTOR
#include "PTXGenSubtargetInfo.inc"
using namespace llvm;
void PTXSubtarget::anchor() { }
PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &CPU,
const std::string &FS, bool is64Bit)
: PTXGenSubtargetInfo(TT, CPU, FS),
PTXTarget(PTX_COMPUTE_1_0),
PTXVersion(PTX_VERSION_2_0),
SupportsDouble(false),
SupportsFMA(true),
Is64Bit(is64Bit) {
std::string TARGET = CPU;
if (TARGET.empty())
TARGET = "generic";
ParseSubtargetFeatures(TARGET, FS);
}
std::string PTXSubtarget::getTargetString() const {
switch(PTXTarget) {
default: llvm_unreachable("Unknown PTX target");
case PTX_SM_1_0: return "sm_10";
case PTX_SM_1_1: return "sm_11";
case PTX_SM_1_2: return "sm_12";
case PTX_SM_1_3: return "sm_13";
case PTX_SM_2_0: return "sm_20";
case PTX_SM_2_1: return "sm_21";
case PTX_SM_2_2: return "sm_22";
case PTX_SM_2_3: return "sm_23";
case PTX_COMPUTE_1_0: return "compute_10";
case PTX_COMPUTE_1_1: return "compute_11";
case PTX_COMPUTE_1_2: return "compute_12";
case PTX_COMPUTE_1_3: return "compute_13";
case PTX_COMPUTE_2_0: return "compute_20";
}
}
std::string PTXSubtarget::getPTXVersionString() const {
switch(PTXVersion) {
case PTX_VERSION_2_0: return "2.0";
case PTX_VERSION_2_1: return "2.1";
case PTX_VERSION_2_2: return "2.2";
case PTX_VERSION_2_3: return "2.3";
}
llvm_unreachable("Invalid PTX version");
}

View File

@ -1,131 +0,0 @@
//===-- PTXSubtarget.h - Define Subtarget for the PTX -----------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file declares the PTX specific subclass of TargetSubtargetInfo.
//
//===----------------------------------------------------------------------===//
#ifndef PTX_SUBTARGET_H
#define PTX_SUBTARGET_H
#include "llvm/Target/TargetSubtargetInfo.h"
#define GET_SUBTARGETINFO_HEADER
#include "PTXGenSubtargetInfo.inc"
namespace llvm {
class StringRef;
class PTXSubtarget : public PTXGenSubtargetInfo {
virtual void anchor();
public:
/**
* Enumeration of Shader Models supported by the back-end.
*/
enum PTXTargetEnum {
PTX_COMPUTE_1_0, /*< Compute Compatibility 1.0 */
PTX_COMPUTE_1_1, /*< Compute Compatibility 1.1 */
PTX_COMPUTE_1_2, /*< Compute Compatibility 1.2 */
PTX_COMPUTE_1_3, /*< Compute Compatibility 1.3 */
PTX_COMPUTE_2_0, /*< Compute Compatibility 2.0 */
PTX_LAST_COMPUTE,
PTX_SM_1_0, /*< Shader Model 1.0 */
PTX_SM_1_1, /*< Shader Model 1.1 */
PTX_SM_1_2, /*< Shader Model 1.2 */
PTX_SM_1_3, /*< Shader Model 1.3 */
PTX_SM_2_0, /*< Shader Model 2.0 */
PTX_SM_2_1, /*< Shader Model 2.1 */
PTX_SM_2_2, /*< Shader Model 2.2 */
PTX_SM_2_3, /*< Shader Model 2.3 */
PTX_LAST_SM
};
/**
* Enumeration of PTX versions supported by the back-end.
*
* Currently, PTX 2.0 is the minimum supported version.
*/
enum PTXVersionEnum {
PTX_VERSION_2_0, /*< PTX Version 2.0 */
PTX_VERSION_2_1, /*< PTX Version 2.1 */
PTX_VERSION_2_2, /*< PTX Version 2.2 */
PTX_VERSION_2_3 /*< PTX Version 2.3 */
};
private:
/// Shader Model supported on the target GPU.
PTXTargetEnum PTXTarget;
/// PTX Language Version.
PTXVersionEnum PTXVersion;
// The native .f64 type is supported on the hardware.
bool SupportsDouble;
// Support the fused-multiply add (FMA) and multiply-add (MAD)
// instructions
bool SupportsFMA;
// Use .u64 instead of .u32 for addresses.
bool Is64Bit;
public:
PTXSubtarget(const std::string &TT, const std::string &CPU,
const std::string &FS, bool is64Bit);
// Target architecture accessors
std::string getTargetString() const;
std::string getPTXVersionString() const;
bool supportsDouble() const { return SupportsDouble; }
bool is64Bit() const { return Is64Bit; }
bool supportsFMA() const { return SupportsFMA; }
bool supportsPTX21() const { return PTXVersion >= PTX_VERSION_2_1; }
bool supportsPTX22() const { return PTXVersion >= PTX_VERSION_2_2; }
bool supportsPTX23() const { return PTXVersion >= PTX_VERSION_2_3; }
bool fdivNeedsRoundingMode() const {
return (PTXTarget >= PTX_SM_1_3 && PTXTarget < PTX_LAST_SM) ||
(PTXTarget >= PTX_COMPUTE_1_3 && PTXTarget < PTX_LAST_COMPUTE);
}
bool fmadNeedsRoundingMode() const {
return (PTXTarget >= PTX_SM_1_3 && PTXTarget < PTX_LAST_SM) ||
(PTXTarget >= PTX_COMPUTE_1_3 && PTXTarget < PTX_LAST_COMPUTE);
}
bool useParamSpaceForDeviceArgs() const {
return (PTXTarget >= PTX_SM_2_0 && PTXTarget < PTX_LAST_SM) ||
(PTXTarget >= PTX_COMPUTE_2_0 && PTXTarget < PTX_LAST_COMPUTE);
}
bool callsAreHandled() const {
return (PTXTarget >= PTX_SM_2_0 && PTXTarget < PTX_LAST_SM) ||
(PTXTarget >= PTX_COMPUTE_2_0 && PTXTarget < PTX_LAST_COMPUTE);
}
bool emitPtrAttribute() const {
return PTXVersion >= PTX_VERSION_2_2;
}
void ParseSubtargetFeatures(StringRef CPU, StringRef FS);
}; // class PTXSubtarget
} // namespace llvm
#endif // PTX_SUBTARGET_H

View File

@ -1,165 +0,0 @@
//===-- PTXTargetMachine.cpp - Define TargetMachine for PTX ---------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// Top-level implementation for the PTX target.
//
//===----------------------------------------------------------------------===//
#include "PTXTargetMachine.h"
#include "PTX.h"
#include "llvm/PassManager.h"
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/Verifier.h"
#include "llvm/Assembly/PrintModulePass.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/MachineFunctionAnalysis.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/Passes.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSubtargetInfo.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/TargetRegistry.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtargetInfo.h"
#include "llvm/Transforms/Scalar.h"
using namespace llvm;
namespace llvm {
MCStreamer *createPTXAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS,
bool isVerboseAsm, bool useLoc,
bool useCFI, bool useDwarfDirectory,
MCInstPrinter *InstPrint,
MCCodeEmitter *CE,
MCAsmBackend *MAB,
bool ShowInst);
}
extern "C" void LLVMInitializePTXTarget() {
RegisterTargetMachine<PTX32TargetMachine> X(ThePTX32Target);
RegisterTargetMachine<PTX64TargetMachine> Y(ThePTX64Target);
TargetRegistry::RegisterAsmStreamer(ThePTX32Target, createPTXAsmStreamer);
TargetRegistry::RegisterAsmStreamer(ThePTX64Target, createPTXAsmStreamer);
}
namespace {
const char* DataLayout32 =
"e-p:32:32-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64";
const char* DataLayout64 =
"e-p:64:64-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64";
}
// DataLayout and FrameLowering are filled with dummy data
PTXTargetMachine::PTXTargetMachine(const Target &T,
StringRef TT, StringRef CPU, StringRef FS,
const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL,
bool is64Bit)
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
DataLayout(is64Bit ? DataLayout64 : DataLayout32),
Subtarget(TT, CPU, FS, is64Bit),
FrameLowering(Subtarget),
InstrInfo(*this),
TSInfo(*this),
TLInfo(*this) {
}
void PTX32TargetMachine::anchor() { }
PTX32TargetMachine::PTX32TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
: PTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {
}
void PTX64TargetMachine::anchor() { }
PTX64TargetMachine::PTX64TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS,
const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL)
: PTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {
}
namespace llvm {
/// PTX Code Generator Pass Configuration Options.
class PTXPassConfig : public TargetPassConfig {
public:
PTXPassConfig(PTXTargetMachine *TM, PassManagerBase &PM)
: TargetPassConfig(TM, PM) {}
PTXTargetMachine &getPTXTargetMachine() const {
return getTM<PTXTargetMachine>();
}
bool addInstSelector();
FunctionPass *createTargetRegisterAllocator(bool);
void addOptimizedRegAlloc(FunctionPass *RegAllocPass);
bool addPostRegAlloc();
void addMachineLateOptimization();
bool addPreEmitPass();
};
} // namespace
TargetPassConfig *PTXTargetMachine::createPassConfig(PassManagerBase &PM) {
PTXPassConfig *PassConfig = new PTXPassConfig(this, PM);
PassConfig->disablePass(PrologEpilogCodeInserterID);
return PassConfig;
}
bool PTXPassConfig::addInstSelector() {
PM->add(createPTXISelDag(getPTXTargetMachine(), getOptLevel()));
return false;
}
FunctionPass *PTXPassConfig::createTargetRegisterAllocator(bool /*Optimized*/) {
return createPTXRegisterAllocator();
}
// Modify the optimized compilation path to bypass optimized register alloction.
void PTXPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
addFastRegAlloc(RegAllocPass);
}
bool PTXPassConfig::addPostRegAlloc() {
// PTXMFInfoExtract must after register allocation!
//PM->add(createPTXMFInfoExtract(getPTXTargetMachine()));
return false;
}
/// Add passes that optimize machine instructions after register allocation.
void PTXPassConfig::addMachineLateOptimization() {
if (addPass(BranchFolderPassID) != &NoPassID)
printAndVerify("After BranchFolding");
if (addPass(TailDuplicateID) != &NoPassID)
printAndVerify("After TailDuplicate");
}
bool PTXPassConfig::addPreEmitPass() {
PM->add(createPTXMFInfoExtract(getPTXTargetMachine(), getOptLevel()));
PM->add(createPTXFPRoundingModePass(getPTXTargetMachine(), getOptLevel()));
return true;
}

View File

@ -1,104 +0,0 @@
//===-- PTXTargetMachine.h - Define TargetMachine for PTX -------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This file declares the PTX specific subclass of TargetMachine.
//
//===----------------------------------------------------------------------===//
#ifndef PTX_TARGET_MACHINE_H
#define PTX_TARGET_MACHINE_H
#include "PTXISelLowering.h"
#include "PTXInstrInfo.h"
#include "PTXFrameLowering.h"
#include "PTXSelectionDAGInfo.h"
#include "PTXSubtarget.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetFrameLowering.h"
#include "llvm/Target/TargetMachine.h"
namespace llvm {
class PTXTargetMachine : public LLVMTargetMachine {
private:
const TargetData DataLayout;
PTXSubtarget Subtarget; // has to be initialized before FrameLowering
PTXFrameLowering FrameLowering;
PTXInstrInfo InstrInfo;
PTXSelectionDAGInfo TSInfo;
PTXTargetLowering TLInfo;
public:
PTXTargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL,
bool is64Bit);
virtual const TargetData *getTargetData() const { return &DataLayout; }
virtual const TargetFrameLowering *getFrameLowering() const {
return &FrameLowering;
}
virtual const PTXInstrInfo *getInstrInfo() const { return &InstrInfo; }
virtual const TargetRegisterInfo *getRegisterInfo() const {
return &InstrInfo.getRegisterInfo(); }
virtual const PTXTargetLowering *getTargetLowering() const {
return &TLInfo; }
virtual const PTXSelectionDAGInfo* getSelectionDAGInfo() const {
return &TSInfo;
}
virtual const PTXSubtarget *getSubtargetImpl() const { return &Subtarget; }
// Emission of machine code through JITCodeEmitter is not supported.
virtual bool addPassesToEmitMachineCode(PassManagerBase &,
JITCodeEmitter &,
bool = true) {
return true;
}
// Emission of machine code through MCJIT is not supported.
virtual bool addPassesToEmitMC(PassManagerBase &,
MCContext *&,
raw_ostream &,
bool = true) {
return true;
}
// Pass Pipeline Configuration
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
}; // class PTXTargetMachine
class PTX32TargetMachine : public PTXTargetMachine {
virtual void anchor();
public:
PTX32TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
}; // class PTX32TargetMachine
class PTX64TargetMachine : public PTXTargetMachine {
virtual void anchor();
public:
PTX64TargetMachine(const Target &T, StringRef TT,
StringRef CPU, StringRef FS, const TargetOptions &Options,
Reloc::Model RM, CodeModel::Model CM,
CodeGenOpt::Level OL);
}; // class PTX32TargetMachine
} // namespace llvm
#endif // PTX_TARGET_MACHINE_H

View File

@ -1,7 +0,0 @@
include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
add_llvm_library(LLVMPTXInfo
PTXTargetInfo.cpp
)
add_dependencies(LLVMPTXInfo PTXCommonTableGen)

View File

@ -1,23 +0,0 @@
;===- ./lib/Target/PTX/TargetInfo/LLVMBuild.txt ----------------*- Conf -*--===;
;
; The LLVM Compiler Infrastructure
;
; This file is distributed under the University of Illinois Open Source
; License. See LICENSE.TXT for details.
;
;===------------------------------------------------------------------------===;
;
; This is an LLVMBuild description file for the components in this subdirectory.
;
; For more information on the LLVMBuild system, please see:
;
; http://llvm.org/docs/LLVMBuild.html
;
;===------------------------------------------------------------------------===;
[component_0]
type = Library
name = PTXInfo
parent = PTX
required_libraries = MC Support Target
add_to_library_groups = PTX

View File

@ -1,15 +0,0 @@
##===- lib/Target/PTX/TargetInfo/Makefile ------------------*- Makefile -*-===##
#
# The LLVM Compiler Infrastructure
#
# This file is distributed under the University of Illinois Open Source
# License. See LICENSE.TXT for details.
#
##===----------------------------------------------------------------------===##
LEVEL = ../../../..
LIBRARYNAME = LLVMPTXInfo
# Hack: we need to include 'main' target directory to grab private headers
CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
include $(LEVEL)/Makefile.common

View File

@ -1,25 +0,0 @@
//===-- PTXTargetInfo.cpp - PTX Target Implementation ---------------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
#include "PTX.h"
#include "llvm/Module.h"
#include "llvm/Support/TargetRegistry.h"
using namespace llvm;
Target llvm::ThePTX32Target;
Target llvm::ThePTX64Target;
extern "C" void LLVMInitializePTXTargetInfo() {
// see llvm/ADT/Triple.h
RegisterTarget<Triple::ptx32> X32(ThePTX32Target, "ptx32",
"PTX (32-bit) [Experimental]");
RegisterTarget<Triple::ptx64> X64(ThePTX64Target, "ptx64",
"PTX (64-bit) [Experimental]");
}

View File

@ -309,7 +309,6 @@ AC_CACHE_CHECK([target architecture],[llvm_cv_target_arch],
msp430-*) llvm_cv_target_arch="MSP430" ;;
hexagon-*) llvm_cv_target_arch="Hexagon" ;;
mblaze-*) llvm_cv_target_arch="MBlaze" ;;
ptx-*) llvm_cv_target_arch="PTX" ;;
nvptx-*) llvm_cv_target_arch="NVPTX" ;;
*) llvm_cv_target_arch="Unknown" ;;
esac])
@ -457,7 +456,6 @@ else
MSP430) AC_SUBST(TARGET_HAS_JIT,0) ;;
Hexagon) AC_SUBST(TARGET_HAS_JIT,0) ;;
MBlaze) AC_SUBST(TARGET_HAS_JIT,0) ;;
PTX) AC_SUBST(TARGET_HAS_JIT,0) ;;
NVPTX) AC_SUBST(TARGET_HAS_JIT,0) ;;
*) AC_SUBST(TARGET_HAS_JIT,0) ;;
esac
@ -569,13 +567,13 @@ TARGETS_TO_BUILD=""
AC_ARG_ENABLE([targets],AS_HELP_STRING([--enable-targets],
[Build specific host targets: all or target1,target2,... Valid targets are:
host, x86, x86_64, sparc, powerpc, arm, mips, spu, hexagon,
xcore, msp430, ptx, nvptx, cbe, and cpp (default=all)]),,
xcore, msp430, nvptx, cbe, and cpp (default=all)]),,
enableval=all)
if test "$enableval" = host-only ; then
enableval=host
fi
case "$enableval" in
all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 Hexagon CppBackend MBlaze PTX NVPTX" ;;
all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 Hexagon CppBackend MBlaze NVPTX" ;;
*)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
case "$a_target" in
x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
@ -590,7 +588,6 @@ case "$enableval" in
hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
cpp) TARGETS_TO_BUILD="CppBackend $TARGETS_TO_BUILD" ;;
mblaze) TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
ptx) TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
nvptx) TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
host) case "$llvm_cv_target_arch" in
x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
@ -604,7 +601,6 @@ case "$enableval" in
XCore) TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
MSP430) TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
Hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
PTX) TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
NVPTX) TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
*) AC_MSG_ERROR([Can not set target to build]) ;;
esac ;;

View File

@ -1402,8 +1402,7 @@ Optional Features:
--enable-targets Build specific host targets: all or
target1,target2,... Valid targets are: host, x86,
x86_64, sparc, powerpc, arm, mips, spu, hexagon,
xcore, msp430, ptx, nvptx, cbe, and cpp
(default=all)
xcore, msp430, nvptx, cbe, and cpp (default=all)
--enable-bindings Build specific language bindings:
all,auto,none,{binding-name} (default=auto)
--enable-libffi Check for the presence of libffi (default is NO)
@ -3846,7 +3845,6 @@ else
msp430-*) llvm_cv_target_arch="MSP430" ;;
hexagon-*) llvm_cv_target_arch="Hexagon" ;;
mblaze-*) llvm_cv_target_arch="MBlaze" ;;
ptx-*) llvm_cv_target_arch="PTX" ;;
nvptx-*) llvm_cv_target_arch="NVPTX" ;;
*) llvm_cv_target_arch="Unknown" ;;
esac
@ -5069,8 +5067,6 @@ else
Hexagon) TARGET_HAS_JIT=0
;;
MBlaze) TARGET_HAS_JIT=0
;;
PTX) TARGET_HAS_JIT=0
;;
NVPTX) TARGET_HAS_JIT=0
;;
@ -5258,7 +5254,7 @@ if test "$enableval" = host-only ; then
enableval=host
fi
case "$enableval" in
all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 Hexagon CppBackend MBlaze PTX NVPTX" ;;
all) TARGETS_TO_BUILD="X86 Sparc PowerPC ARM Mips CellSPU XCore MSP430 Hexagon CppBackend MBlaze NVPTX" ;;
*)for a_target in `echo $enableval|sed -e 's/,/ /g' ` ; do
case "$a_target" in
x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
@ -5273,7 +5269,6 @@ case "$enableval" in
hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
cpp) TARGETS_TO_BUILD="CppBackend $TARGETS_TO_BUILD" ;;
mblaze) TARGETS_TO_BUILD="MBlaze $TARGETS_TO_BUILD" ;;
ptx) TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
nvptx) TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
host) case "$llvm_cv_target_arch" in
x86) TARGETS_TO_BUILD="X86 $TARGETS_TO_BUILD" ;;
@ -5287,7 +5282,6 @@ case "$enableval" in
XCore) TARGETS_TO_BUILD="XCore $TARGETS_TO_BUILD" ;;
MSP430) TARGETS_TO_BUILD="MSP430 $TARGETS_TO_BUILD" ;;
Hexagon) TARGETS_TO_BUILD="Hexagon $TARGETS_TO_BUILD" ;;
PTX) TARGETS_TO_BUILD="PTX $TARGETS_TO_BUILD" ;;
NVPTX) TARGETS_TO_BUILD="NVPTX $TARGETS_TO_BUILD" ;;
*) { { echo "$as_me:$LINENO: error: Can not set target to build" >&5
echo "$as_me: error: Can not set target to build" >&2;}
@ -10313,7 +10307,7 @@ else
lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2
lt_status=$lt_dlunknown
cat > conftest.$ac_ext <<EOF
#line 10316 "configure"
#line 10310 "configure"
#include "confdefs.h"
#if HAVE_DLFCN_H

View File

@ -1,24 +0,0 @@
; RUN: llc < %s -march=ptx32 | FileCheck %s
@A = common global [1536 x [1536 x float]] zeroinitializer, align 4
@B = common global [1536 x [1536 x float]] zeroinitializer, align 4
define internal ptx_device void @init_array(i32 %x, i32 %y) {
%arrayidx103 = getelementptr [1536 x [1536 x float]]* @A, i32 0, i32 %x, i32 %y
%arrayidx224 = getelementptr [1536 x [1536 x float]]* @B, i32 0, i32 %x, i32 %y
%mul5 = mul i32 %x, %y
%rem = srem i32 %mul5, 1024
%add = add nsw i32 %rem, 1
; CHECK: cvt.rn.f64.s32 %fd{{[0-9]+}}, %r{{[0-9]+}}
%conv = sitofp i32 %add to double
%div = fmul double %conv, 5.000000e-01
%conv7 = fptrunc double %div to float
store float %conv7, float* %arrayidx103, align 4
%rem14 = srem i32 %mul5, 1024
%add15 = add nsw i32 %rem14, 1
%conv16 = sitofp i32 %add15 to double
%div17 = fmul double %conv16, 5.000000e-01
%conv18 = fptrunc double %div17 to float
store float %conv18, float* %arrayidx224, align 4
ret void
}

View File

@ -1,71 +0,0 @@
; RUN: llc < %s -march=ptx32 | FileCheck %s
define ptx_device i16 @t1_u16(i16 %x, i16 %y) {
; CHECK: add.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}}, %rh{{[0-9]+}};
; CHECK: ret;
%z = add i16 %x, %y
ret i16 %z
}
define ptx_device i32 @t1_u32(i32 %x, i32 %y) {
; CHECK: add.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}};
; CHECK: ret;
%z = add i32 %x, %y
ret i32 %z
}
define ptx_device i64 @t1_u64(i64 %x, i64 %y) {
; CHECK: add.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}}, %rd{{[0-9]+}};
; CHECK: ret;
%z = add i64 %x, %y
ret i64 %z
}
define ptx_device float @t1_f32(float %x, float %y) {
; CHECK: add.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
; CHECK: ret;
%z = fadd float %x, %y
ret float %z
}
define ptx_device double @t1_f64(double %x, double %y) {
; CHECK: add.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}
; CHECK: ret;
%z = fadd double %x, %y
ret double %z
}
define ptx_device i16 @t2_u16(i16 %x) {
; CHECK: add.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}}, 1;
; CHECK: ret;
%z = add i16 %x, 1
ret i16 %z
}
define ptx_device i32 @t2_u32(i32 %x) {
; CHECK: add.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, 1;
; CHECK: ret;
%z = add i32 %x, 1
ret i32 %z
}
define ptx_device i64 @t2_u64(i64 %x) {
; CHECK: add.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}}, 1;
; CHECK: ret;
%z = add i64 %x, 1
ret i64 %z
}
define ptx_device float @t2_f32(float %x) {
; CHECK: add.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, 0D3FF0000000000000;
; CHECK: ret;
%z = fadd float %x, 1.0
ret float %z
}
define ptx_device double @t2_f64(double %x) {
; CHECK: add.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, 0D3FF0000000000000;
; CHECK: ret;
%z = fadd double %x, 1.0
ret double %z
}

View File

@ -1,24 +0,0 @@
; RUN: llc < %s -march=ptx32 -mattr=sm20 | FileCheck %s
; XFAIL: *
%complex = type { float, float }
define ptx_device %complex @complex_add(%complex %a, %complex %b) {
entry:
; CHECK: ld.param.f32 r[[R0:[0-9]+]], [__param_1];
; CHECK-NEXT: ld.param.f32 r[[R2:[0-9]+]], [__param_3];
; CHECK-NEXT: ld.param.f32 r[[R1:[0-9]+]], [__param_2];
; CHECK-NEXT: ld.param.f32 r[[R3:[0-9]+]], [__param_4];
; CHECK-NEXT: add.rn.f32 r[[R0]], r[[R0]], r[[R2]];
; CHECK-NEXT: add.rn.f32 r[[R1]], r[[R1]], r[[R3]];
; CHECK-NEXT: ret;
%a.real = extractvalue %complex %a, 0
%a.imag = extractvalue %complex %a, 1
%b.real = extractvalue %complex %b, 0
%b.imag = extractvalue %complex %b, 1
%ret.real = fadd float %a.real, %b.real
%ret.imag = fadd float %a.imag, %b.imag
%ret.0 = insertvalue %complex undef, float %ret.real, 0
%ret.1 = insertvalue %complex %ret.0, float %ret.imag, 1
ret %complex %ret.1
}

View File

@ -1,24 +0,0 @@
; RUN: llc < %s -march=ptx32 | FileCheck %s
; preds
define ptx_device i32 @t1_and_preds(i1 %x, i1 %y) {
; CHECK: and.pred %p{{[0-9]+}}, %p{{[0-9]+}}, %p{{[0-9]+}}
%c = and i1 %x, %y
%d = zext i1 %c to i32
ret i32 %d
}
define ptx_device i32 @t1_or_preds(i1 %x, i1 %y) {
; CHECK: or.pred %p{{[0-9]+}}, %p{{[0-9]+}}, %p{{[0-9]+}}
%a = or i1 %x, %y
%b = zext i1 %a to i32
ret i32 %b
}
define ptx_device i32 @t1_xor_preds(i1 %x, i1 %y) {
; CHECK: xor.pred %p{{[0-9]+}}, %p{{[0-9]+}}, %p{{[0-9]+}}
%a = xor i1 %x, %y
%b = zext i1 %a to i32
ret i32 %b
}

View File

@ -1,24 +0,0 @@
; RUN: llc < %s -march=ptx32 | FileCheck %s
define ptx_device void @test_bra_direct() {
; CHECK: bra $L__BB0_1;
entry:
br label %loop
loop:
br label %loop
}
define ptx_device i32 @test_bra_cond_direct(i32 %x, i32 %y) {
entry:
; CHECK: setp.le.u32 %p0, %r[[R0:[0-9]+]], %r[[R1:[0-9]+]]
%p = icmp ugt i32 %x, %y
; CHECK-NEXT: @%p0 bra
; CHECK-NOT: bra
br i1 %p, label %clause.if, label %clause.else
clause.if:
; CHECK: mov.u32 %ret{{[0-9]+}}, %r[[R0]]
ret i32 %x
clause.else:
; CHECK: mov.u32 %ret{{[0-9]+}}, %r[[R1]]
ret i32 %y
}

View File

@ -1,290 +0,0 @@
; RUN: llc < %s -march=ptx32 | FileCheck %s
; preds
; (note: we convert back to i32 to return)
define ptx_device i32 @cvt_pred_i16(i16 %x, i1 %y) {
; CHECK: setp.gt.u16 %p[[P0:[0-9]+]], %rh{{[0-9]+}}, 0
; CHECK: and.pred %p2, %p[[P0:[0-9]+]], %p{{[0-9]+}};
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0:[0-9]+]];
; CHECK: ret;
%a = trunc i16 %x to i1
%b = and i1 %a, %y
%c = zext i1 %b to i32
ret i32 %c
}
define ptx_device i32 @cvt_pred_i32(i32 %x, i1 %y) {
; CHECK: setp.gt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 0
; CHECK: and.pred %p2, %p[[P0:[0-9]+]], %p{{[0-9]+}};
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0:[0-9]+]];
; CHECK: ret;
%a = trunc i32 %x to i1
%b = and i1 %a, %y
%c = zext i1 %b to i32
ret i32 %c
}
define ptx_device i32 @cvt_pred_i64(i64 %x, i1 %y) {
; CHECK: setp.gt.u64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, 0
; CHECK: and.pred %p2, %p[[P0:[0-9]+]], %p{{[0-9]+}};
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0:[0-9]+]];
; CHECK: ret;
%a = trunc i64 %x to i1
%b = and i1 %a, %y
%c = zext i1 %b to i32
ret i32 %c
}
define ptx_device i32 @cvt_pred_f32(float %x, i1 %y) {
; CHECK: setp.gt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 0
; CHECK: and.pred %p2, %p[[P0:[0-9]+]], %p{{[0-9]+}};
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0:[0-9]+]];
; CHECK: ret;
%a = fptoui float %x to i1
%b = and i1 %a, %y
%c = zext i1 %b to i32
ret i32 %c
}
define ptx_device i32 @cvt_pred_f64(double %x, i1 %y) {
; CHECK: setp.gt.u64 %p[[P0:[0-9]+]], %rd{{[0-9]+}}, 0
; CHECK: and.pred %p2, %p[[P0:[0-9]+]], %p{{[0-9]+}};
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0:[0-9]+]];
; CHECK: ret;
%a = fptoui double %x to i1
%b = and i1 %a, %y
%c = zext i1 %b to i32
ret i32 %c
}
; i16
define ptx_device i16 @cvt_i16_preds(i1 %x) {
; CHECK: selp.u16 %ret{{[0-9]+}}, 1, 0, %p{{[0-9]+}};
; CHECK: ret;
%a = zext i1 %x to i16
ret i16 %a
}
define ptx_device i16 @cvt_i16_i32(i32 %x) {
; CHECK: cvt.u16.u32 %ret{{[0-9]+}}, %r{{[0-9]+}};
; CHECK: ret;
%a = trunc i32 %x to i16
ret i16 %a
}
define ptx_device i16 @cvt_i16_i64(i64 %x) {
; CHECK: cvt.u16.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}};
; CHECK: ret;
%a = trunc i64 %x to i16
ret i16 %a
}
define ptx_device i16 @cvt_i16_f32(float %x) {
; CHECK: cvt.rzi.u16.f32 %ret{{[0-9]+}}, %f{{[0-9]+}};
; CHECK: ret;
%a = fptoui float %x to i16
ret i16 %a
}
define ptx_device i16 @cvt_i16_f64(double %x) {
; CHECK: cvt.rzi.u16.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}};
; CHECK: ret;
%a = fptoui double %x to i16
ret i16 %a
}
; i32
define ptx_device i32 @cvt_i32_preds(i1 %x) {
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p{{[0-9]+}};
; CHECK: ret;
%a = zext i1 %x to i32
ret i32 %a
}
define ptx_device i32 @cvt_i32_i16(i16 %x) {
; CHECK: cvt.u32.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}};
; CHECK: ret;
%a = zext i16 %x to i32
ret i32 %a
}
define ptx_device i32 @cvt_i32_i64(i64 %x) {
; CHECK: cvt.u32.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}};
; CHECK: ret;
%a = trunc i64 %x to i32
ret i32 %a
}
define ptx_device i32 @cvt_i32_f32(float %x) {
; CHECK: cvt.rzi.u32.f32 %ret{{[0-9]+}}, %f{{[0-9]+}};
; CHECK: ret;
%a = fptoui float %x to i32
ret i32 %a
}
define ptx_device i32 @cvt_i32_f64(double %x) {
; CHECK: cvt.rzi.u32.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}};
; CHECK: ret;
%a = fptoui double %x to i32
ret i32 %a
}
; i64
define ptx_device i64 @cvt_i64_preds(i1 %x) {
; CHECK: selp.u64 %ret{{[0-9]+}}, 1, 0, %p{{[0-9]+}};
; CHECK: ret;
%a = zext i1 %x to i64
ret i64 %a
}
define ptx_device i64 @cvt_i64_i16(i16 %x) {
; CHECK: cvt.u64.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}};
; CHECK: ret;
%a = zext i16 %x to i64
ret i64 %a
}
define ptx_device i64 @cvt_i64_i32(i32 %x) {
; CHECK: cvt.u64.u32 %ret{{[0-9]+}}, %r{{[0-9]+}};
; CHECK: ret;
%a = zext i32 %x to i64
ret i64 %a
}
define ptx_device i64 @cvt_i64_f32(float %x) {
; CHECK: cvt.rzi.u64.f32 %ret{{[0-9]+}}, %f{{[0-9]+}};
; CHECK: ret;
%a = fptoui float %x to i64
ret i64 %a
}
define ptx_device i64 @cvt_i64_f64(double %x) {
; CHECK: cvt.rzi.u64.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}};
; CHECK: ret;
%a = fptoui double %x to i64
ret i64 %a
}
; f32
define ptx_device float @cvt_f32_preds(i1 %x) {
; CHECK: mov.b32 %f0, 0;
; CHECK: mov.b32 %f1, 1065353216;
; CHECK: selp.f32 %ret{{[0-9]+}}, %f1, %f0, %p{{[0-9]+}};
; CHECK: ret;
%a = uitofp i1 %x to float
ret float %a
}
define ptx_device float @cvt_f32_i16(i16 %x) {
; CHECK: cvt.rn.f32.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}};
; CHECK: ret;
%a = uitofp i16 %x to float
ret float %a
}
define ptx_device float @cvt_f32_i32(i32 %x) {
; CHECK: cvt.rn.f32.u32 %ret{{[0-9]+}}, %r{{[0-9]+}};
; CHECK: ret;
%a = uitofp i32 %x to float
ret float %a
}
define ptx_device float @cvt_f32_i64(i64 %x) {
; CHECK: cvt.rn.f32.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}};
; CHECK: ret;
%a = uitofp i64 %x to float
ret float %a
}
define ptx_device float @cvt_f32_f64(double %x) {
; CHECK: cvt.rn.f32.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}};
; CHECK: ret;
%a = fptrunc double %x to float
ret float %a
}
define ptx_device float @cvt_f32_s16(i16 %x) {
; CHECK: cvt.rn.f32.s16 %ret{{[0-9]+}}, %rh{{[0-9]+}}
; CHECK: ret
%a = sitofp i16 %x to float
ret float %a
}
define ptx_device float @cvt_f32_s32(i32 %x) {
; CHECK: cvt.rn.f32.s32 %ret{{[0-9]+}}, %r{{[0-9]+}}
; CHECK: ret
%a = sitofp i32 %x to float
ret float %a
}
define ptx_device float @cvt_f32_s64(i64 %x) {
; CHECK: cvt.rn.f32.s64 %ret{{[0-9]+}}, %rd{{[0-9]+}}
; CHECK: ret
%a = sitofp i64 %x to float
ret float %a
}
; f64
define ptx_device double @cvt_f64_preds(i1 %x) {
; CHECK: mov.b64 %fd0, 0;
; CHECK: mov.b64 %fd1, 4575657221408423936;
; CHECK: selp.f64 %ret{{[0-9]+}}, %fd1, %fd0, %p{{[0-9]+}};
; CHECK: ret;
%a = uitofp i1 %x to double
ret double %a
}
define ptx_device double @cvt_f64_i16(i16 %x) {
; CHECK: cvt.rn.f64.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}};
; CHECK: ret;
%a = uitofp i16 %x to double
ret double %a
}
define ptx_device double @cvt_f64_i32(i32 %x) {
; CHECK: cvt.rn.f64.u32 %ret{{[0-9]+}}, %r{{[0-9]+}};
; CHECK: ret;
%a = uitofp i32 %x to double
ret double %a
}
define ptx_device double @cvt_f64_i64(i64 %x) {
; CHECK: cvt.rn.f64.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}};
; CHECK: ret;
%a = uitofp i64 %x to double
ret double %a
}
define ptx_device double @cvt_f64_f32(float %x) {
; CHECK: cvt.f64.f32 %ret{{[0-9]+}}, %f{{[0-9]+}};
; CHECK: ret;
%a = fpext float %x to double
ret double %a
}
define ptx_device double @cvt_f64_s16(i16 %x) {
; CHECK: cvt.rn.f64.s16 %ret{{[0-9]+}}, %rh{{[0-9]+}}
; CHECK: ret
%a = sitofp i16 %x to double
ret double %a
}
define ptx_device double @cvt_f64_s32(i32 %x) {
; CHECK: cvt.rn.f64.s32 %ret{{[0-9]+}}, %r{{[0-9]+}}
; CHECK: ret
%a = sitofp i32 %x to double
ret double %a
}
define ptx_device double @cvt_f64_s64(i64 %x) {
; CHECK: cvt.rn.f64.s64 %ret{{[0-9]+}}, %rd{{[0-9]+}}
; CHECK: ret
%a = sitofp i64 %x to double
ret double %a
}

View File

@ -1,14 +0,0 @@
; RUN: llc < %s -march=ptx32 | FileCheck %s
define ptx_kernel void @t1() {
; CHECK: exit;
; CHECK-NOT: ret;
ret void
}
define ptx_kernel void @t2(i32* %p, i32 %x) {
store i32 %x, i32* %p
; CHECK: exit;
; CHECK-NOT: ret;
ret void
}

View File

@ -1,15 +0,0 @@
; RUN: llc < %s -march=ptx32 -mattr=+sm10 | FileCheck %s
define ptx_device float @t1_f32(float %x, float %y) {
; CHECK: div.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}};
; CHECK: ret;
%a = fdiv float %x, %y
ret float %a
}
define ptx_device double @t1_f64(double %x, double %y) {
; CHECK: div.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}};
; CHECK: ret;
%a = fdiv double %x, %y
ret double %a
}

View File

@ -1,15 +0,0 @@
; RUN: llc < %s -march=ptx32 -mattr=+sm13 | FileCheck %s
define ptx_device float @t1_f32(float %x, float %y) {
; CHECK: div.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}};
; CHECK: ret;
%a = fdiv float %x, %y
ret float %a
}
define ptx_device double @t1_f64(double %x, double %y) {
; CHECK: div.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}};
; CHECK: ret;
%a = fdiv double %x, %y
ret double %a
}

View File

@ -1,15 +0,0 @@
; RUN: llc < %s -march=ptx32 | FileCheck %s
define ptx_device float @t1_f32(float %x) {
; CHECK: neg.f32 %ret{{[0-9]+}}, %f{{[0-9]+}};
; CHECK: ret;
%y = fsub float -0.000000e+00, %x
ret float %y
}
define ptx_device double @t1_f64(double %x) {
; CHECK: neg.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}};
; CHECK: ret;
%y = fsub double -0.000000e+00, %x
ret double %y
}

View File

@ -1,281 +0,0 @@
; RUN: llc < %s -march=ptx32 -mattr=+ptx20 | FileCheck %s
define ptx_device i32 @test_tid_x() {
; CHECK: mov.u32 %ret0, %tid.x;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.tid.x()
ret i32 %x
}
define ptx_device i32 @test_tid_y() {
; CHECK: mov.u32 %ret0, %tid.y;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.tid.y()
ret i32 %x
}
define ptx_device i32 @test_tid_z() {
; CHECK: mov.u32 %ret0, %tid.z;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.tid.z()
ret i32 %x
}
define ptx_device i32 @test_tid_w() {
; CHECK: mov.u32 %ret0, %tid.w;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.tid.w()
ret i32 %x
}
define ptx_device i32 @test_ntid_x() {
; CHECK: mov.u32 %ret0, %ntid.x;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.ntid.x()
ret i32 %x
}
define ptx_device i32 @test_ntid_y() {
; CHECK: mov.u32 %ret0, %ntid.y;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.ntid.y()
ret i32 %x
}
define ptx_device i32 @test_ntid_z() {
; CHECK: mov.u32 %ret0, %ntid.z;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.ntid.z()
ret i32 %x
}
define ptx_device i32 @test_ntid_w() {
; CHECK: mov.u32 %ret0, %ntid.w;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.ntid.w()
ret i32 %x
}
define ptx_device i32 @test_laneid() {
; CHECK: mov.u32 %ret0, %laneid;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.laneid()
ret i32 %x
}
define ptx_device i32 @test_warpid() {
; CHECK: mov.u32 %ret0, %warpid;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.warpid()
ret i32 %x
}
define ptx_device i32 @test_nwarpid() {
; CHECK: mov.u32 %ret0, %nwarpid;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.nwarpid()
ret i32 %x
}
define ptx_device i32 @test_ctaid_x() {
; CHECK: mov.u32 %ret0, %ctaid.x;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.ctaid.x()
ret i32 %x
}
define ptx_device i32 @test_ctaid_y() {
; CHECK: mov.u32 %ret0, %ctaid.y;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.ctaid.y()
ret i32 %x
}
define ptx_device i32 @test_ctaid_z() {
; CHECK: mov.u32 %ret0, %ctaid.z;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.ctaid.z()
ret i32 %x
}
define ptx_device i32 @test_ctaid_w() {
; CHECK: mov.u32 %ret0, %ctaid.w;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.ctaid.w()
ret i32 %x
}
define ptx_device i32 @test_nctaid_x() {
; CHECK: mov.u32 %ret0, %nctaid.x;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.nctaid.x()
ret i32 %x
}
define ptx_device i32 @test_nctaid_y() {
; CHECK: mov.u32 %ret0, %nctaid.y;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.nctaid.y()
ret i32 %x
}
define ptx_device i32 @test_nctaid_z() {
; CHECK: mov.u32 %ret0, %nctaid.z;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.nctaid.z()
ret i32 %x
}
define ptx_device i32 @test_nctaid_w() {
; CHECK: mov.u32 %ret0, %nctaid.w;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.nctaid.w()
ret i32 %x
}
define ptx_device i32 @test_smid() {
; CHECK: mov.u32 %ret0, %smid;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.smid()
ret i32 %x
}
define ptx_device i32 @test_nsmid() {
; CHECK: mov.u32 %ret0, %nsmid;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.nsmid()
ret i32 %x
}
define ptx_device i32 @test_gridid() {
; CHECK: mov.u32 %ret0, %gridid;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.gridid()
ret i32 %x
}
define ptx_device i32 @test_lanemask_eq() {
; CHECK: mov.u32 %ret0, %lanemask_eq;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.lanemask.eq()
ret i32 %x
}
define ptx_device i32 @test_lanemask_le() {
; CHECK: mov.u32 %ret0, %lanemask_le;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.lanemask.le()
ret i32 %x
}
define ptx_device i32 @test_lanemask_lt() {
; CHECK: mov.u32 %ret0, %lanemask_lt;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.lanemask.lt()
ret i32 %x
}
define ptx_device i32 @test_lanemask_ge() {
; CHECK: mov.u32 %ret0, %lanemask_ge;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.lanemask.ge()
ret i32 %x
}
define ptx_device i32 @test_lanemask_gt() {
; CHECK: mov.u32 %ret0, %lanemask_gt;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.lanemask.gt()
ret i32 %x
}
define ptx_device i32 @test_clock() {
; CHECK: mov.u32 %ret0, %clock;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.clock()
ret i32 %x
}
define ptx_device i64 @test_clock64() {
; CHECK: mov.u64 %ret0, %clock64;
; CHECK: ret;
%x = call i64 @llvm.ptx.read.clock64()
ret i64 %x
}
define ptx_device i32 @test_pm0() {
; CHECK: mov.u32 %ret0, %pm0;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.pm0()
ret i32 %x
}
define ptx_device i32 @test_pm1() {
; CHECK: mov.u32 %ret0, %pm1;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.pm1()
ret i32 %x
}
define ptx_device i32 @test_pm2() {
; CHECK: mov.u32 %ret0, %pm2;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.pm2()
ret i32 %x
}
define ptx_device i32 @test_pm3() {
; CHECK: mov.u32 %ret0, %pm3;
; CHECK: ret;
%x = call i32 @llvm.ptx.read.pm3()
ret i32 %x
}
define ptx_device void @test_bar_sync() {
; CHECK: bar.sync 0
; CHECK: ret;
call void @llvm.ptx.bar.sync(i32 0)
ret void
}
declare i32 @llvm.ptx.read.tid.x()
declare i32 @llvm.ptx.read.tid.y()
declare i32 @llvm.ptx.read.tid.z()
declare i32 @llvm.ptx.read.tid.w()
declare i32 @llvm.ptx.read.ntid.x()
declare i32 @llvm.ptx.read.ntid.y()
declare i32 @llvm.ptx.read.ntid.z()
declare i32 @llvm.ptx.read.ntid.w()
declare i32 @llvm.ptx.read.laneid()
declare i32 @llvm.ptx.read.warpid()
declare i32 @llvm.ptx.read.nwarpid()
declare i32 @llvm.ptx.read.ctaid.x()
declare i32 @llvm.ptx.read.ctaid.y()
declare i32 @llvm.ptx.read.ctaid.z()
declare i32 @llvm.ptx.read.ctaid.w()
declare i32 @llvm.ptx.read.nctaid.x()
declare i32 @llvm.ptx.read.nctaid.y()
declare i32 @llvm.ptx.read.nctaid.z()
declare i32 @llvm.ptx.read.nctaid.w()
declare i32 @llvm.ptx.read.smid()
declare i32 @llvm.ptx.read.nsmid()
declare i32 @llvm.ptx.read.gridid()
declare i32 @llvm.ptx.read.lanemask.eq()
declare i32 @llvm.ptx.read.lanemask.le()
declare i32 @llvm.ptx.read.lanemask.lt()
declare i32 @llvm.ptx.read.lanemask.ge()
declare i32 @llvm.ptx.read.lanemask.gt()
declare i32 @llvm.ptx.read.clock()
declare i64 @llvm.ptx.read.clock64()
declare i32 @llvm.ptx.read.pm0()
declare i32 @llvm.ptx.read.pm1()
declare i32 @llvm.ptx.read.pm2()
declare i32 @llvm.ptx.read.pm3()
declare void @llvm.ptx.bar.sync(i32 %i)

View File

@ -1,382 +0,0 @@
; RUN: llc < %s -march=ptx32 | FileCheck %s
;CHECK: .extern .global .b16 array_i16[10];
@array_i16 = external global [10 x i16]
;CHECK: .extern .const .b16 array_constant_i16[10];
@array_constant_i16 = external addrspace(1) constant [10 x i16]
;CHECK: .extern .shared .b16 array_shared_i16[10];
@array_shared_i16 = external addrspace(4) global [10 x i16]
;CHECK: .extern .global .b32 array_i32[10];
@array_i32 = external global [10 x i32]
;CHECK: .extern .const .b32 array_constant_i32[10];
@array_constant_i32 = external addrspace(1) constant [10 x i32]
;CHECK: .extern .shared .b32 array_shared_i32[10];
@array_shared_i32 = external addrspace(4) global [10 x i32]
;CHECK: .extern .global .b64 array_i64[10];
@array_i64 = external global [10 x i64]
;CHECK: .extern .const .b64 array_constant_i64[10];
@array_constant_i64 = external addrspace(1) constant [10 x i64]
;CHECK: .extern .shared .b64 array_shared_i64[10];
@array_shared_i64 = external addrspace(4) global [10 x i64]
;CHECK: .extern .global .b32 array_float[10];
@array_float = external global [10 x float]
;CHECK: .extern .const .b32 array_constant_float[10];
@array_constant_float = external addrspace(1) constant [10 x float]
;CHECK: .extern .shared .b32 array_shared_float[10];
@array_shared_float = external addrspace(4) global [10 x float]
;CHECK: .extern .global .b64 array_double[10];
@array_double = external global [10 x double]
;CHECK: .extern .const .b64 array_constant_double[10];
@array_constant_double = external addrspace(1) constant [10 x double]
;CHECK: .extern .shared .b64 array_shared_double[10];
@array_shared_double = external addrspace(4) global [10 x double]
define ptx_device i16 @t1_u16(i16* %p) {
entry:
;CHECK: ld.global.u16 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
;CHECK: ret;
%x = load i16* %p
ret i16 %x
}
define ptx_device i32 @t1_u32(i32* %p) {
entry:
;CHECK: ld.global.u32 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
;CHECK: ret;
%x = load i32* %p
ret i32 %x
}
define ptx_device i64 @t1_u64(i64* %p) {
entry:
;CHECK: ld.global.u64 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
;CHECK: ret;
%x = load i64* %p
ret i64 %x
}
define ptx_device float @t1_f32(float* %p) {
entry:
;CHECK: ld.global.f32 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
;CHECK: ret;
%x = load float* %p
ret float %x
}
define ptx_device double @t1_f64(double* %p) {
entry:
;CHECK: ld.global.f64 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
;CHECK: ret;
%x = load double* %p
ret double %x
}
define ptx_device i16 @t2_u16(i16* %p) {
entry:
;CHECK: ld.global.u16 %ret{{[0-9]+}}, [%r{{[0-9]+}}+2];
;CHECK: ret;
%i = getelementptr i16* %p, i32 1
%x = load i16* %i
ret i16 %x
}
define ptx_device i32 @t2_u32(i32* %p) {
entry:
;CHECK: ld.global.u32 %ret{{[0-9]+}}, [%r{{[0-9]+}}+4];
;CHECK: ret;
%i = getelementptr i32* %p, i32 1
%x = load i32* %i
ret i32 %x
}
define ptx_device i64 @t2_u64(i64* %p) {
entry:
;CHECK: ld.global.u64 %ret{{[0-9]+}}, [%r{{[0-9]+}}+8];
;CHECK: ret;
%i = getelementptr i64* %p, i32 1
%x = load i64* %i
ret i64 %x
}
define ptx_device float @t2_f32(float* %p) {
entry:
;CHECK: ld.global.f32 %ret{{[0-9]+}}, [%r{{[0-9]+}}+4];
;CHECK: ret;
%i = getelementptr float* %p, i32 1
%x = load float* %i
ret float %x
}
define ptx_device double @t2_f64(double* %p) {
entry:
;CHECK: ld.global.f64 %ret{{[0-9]+}}, [%r{{[0-9]+}}+8];
;CHECK: ret;
%i = getelementptr double* %p, i32 1
%x = load double* %i
ret double %x
}
define ptx_device i16 @t3_u16(i16* %p, i32 %q) {
entry:
;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 1;
;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
;CHECK: ld.global.u16 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
%i = getelementptr i16* %p, i32 %q
%x = load i16* %i
ret i16 %x
}
define ptx_device i32 @t3_u32(i32* %p, i32 %q) {
entry:
;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 2;
;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
;CHECK: ld.global.u32 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
%i = getelementptr i32* %p, i32 %q
%x = load i32* %i
ret i32 %x
}
define ptx_device i64 @t3_u64(i64* %p, i32 %q) {
entry:
;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 3;
;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
;CHECK: ld.global.u64 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
%i = getelementptr i64* %p, i32 %q
%x = load i64* %i
ret i64 %x
}
define ptx_device float @t3_f32(float* %p, i32 %q) {
entry:
;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 2;
;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
;CHECK: ld.global.f32 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
%i = getelementptr float* %p, i32 %q
%x = load float* %i
ret float %x
}
define ptx_device double @t3_f64(double* %p, i32 %q) {
entry:
;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 3;
;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
;CHECK: ld.global.f64 %ret{{[0-9]+}}, [%r{{[0-9]+}}];
%i = getelementptr double* %p, i32 %q
%x = load double* %i
ret double %x
}
define ptx_device i16 @t4_global_u16() {
entry:
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i16;
;CHECK: ld.global.u16 %ret{{[0-9]+}}, [%r[[R0]]];
;CHECK: ret;
%i = getelementptr [10 x i16]* @array_i16, i32 0, i32 0
%x = load i16* %i
ret i16 %x
}
define ptx_device i32 @t4_global_u32() {
entry:
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i32;
;CHECK: ld.global.u32 %ret{{[0-9]+}}, [%r[[R0]]];
;CHECK: ret;
%i = getelementptr [10 x i32]* @array_i32, i32 0, i32 0
%x = load i32* %i
ret i32 %x
}
define ptx_device i64 @t4_global_u64() {
entry:
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i64;
;CHECK: ld.global.u64 %ret{{[0-9]+}}, [%r[[R0]]];
;CHECK: ret;
%i = getelementptr [10 x i64]* @array_i64, i32 0, i32 0
%x = load i64* %i
ret i64 %x
}
define ptx_device float @t4_global_f32() {
entry:
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_float;
;CHECK: ld.global.f32 %ret{{[0-9]+}}, [%r[[R0]]];
;CHECK: ret;
%i = getelementptr [10 x float]* @array_float, i32 0, i32 0
%x = load float* %i
ret float %x
}
define ptx_device double @t4_global_f64() {
entry:
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_double;
;CHECK: ld.global.f64 %ret{{[0-9]+}}, [%r[[R0]]];
;CHECK: ret;
%i = getelementptr [10 x double]* @array_double, i32 0, i32 0
%x = load double* %i
ret double %x
}
define ptx_device i16 @t4_const_u16() {
entry:
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_constant_i16;
;CHECK: ld.const.u16 %ret{{[0-9]+}}, [%r[[R0]]];
;CHECK: ret;
%i = getelementptr [10 x i16] addrspace(1)* @array_constant_i16, i32 0, i32 0
%x = load i16 addrspace(1)* %i
ret i16 %x
}
define ptx_device i32 @t4_const_u32() {
entry:
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_constant_i32;
;CHECK: ld.const.u32 %ret{{[0-9]+}}, [%r[[R0]]];
;CHECK: ret;
%i = getelementptr [10 x i32] addrspace(1)* @array_constant_i32, i32 0, i32 0
%x = load i32 addrspace(1)* %i
ret i32 %x
}
define ptx_device i64 @t4_const_u64() {
entry:
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_constant_i64;
;CHECK: ld.const.u64 %ret{{[0-9]+}}, [%r[[R0]]];
;CHECK: ret;
%i = getelementptr [10 x i64] addrspace(1)* @array_constant_i64, i32 0, i32 0
%x = load i64 addrspace(1)* %i
ret i64 %x
}
define ptx_device float @t4_const_f32() {
entry:
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_constant_float;
;CHECK: ld.const.f32 %ret{{[0-9]+}}, [%r[[R0]]];
;CHECK: ret;
%i = getelementptr [10 x float] addrspace(1)* @array_constant_float, i32 0, i32 0
%x = load float addrspace(1)* %i
ret float %x
}
define ptx_device double @t4_const_f64() {
entry:
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_constant_double;
;CHECK: ld.const.f64 %ret{{[0-9]+}}, [%r[[R0]]];
;CHECK: ret;
%i = getelementptr [10 x double] addrspace(1)* @array_constant_double, i32 0, i32 0
%x = load double addrspace(1)* %i
ret double %x
}
define ptx_device i16 @t4_shared_u16() {
entry:
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_i16;
;CHECK: ld.shared.u16 %ret{{[0-9]+}}, [%r[[R0]]];
;CHECK: ret;
%i = getelementptr [10 x i16] addrspace(4)* @array_shared_i16, i32 0, i32 0
%x = load i16 addrspace(4)* %i
ret i16 %x
}
define ptx_device i32 @t4_shared_u32() {
entry:
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_i32;
;CHECK: ld.shared.u32 %ret{{[0-9]+}}, [%r[[R0]]];
;CHECK: ret;
%i = getelementptr [10 x i32] addrspace(4)* @array_shared_i32, i32 0, i32 0
%x = load i32 addrspace(4)* %i
ret i32 %x
}
define ptx_device i64 @t4_shared_u64() {
entry:
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_i64;
;CHECK: ld.shared.u64 %ret{{[0-9]+}}, [%r[[R0]]];
;CHECK: ret;
%i = getelementptr [10 x i64] addrspace(4)* @array_shared_i64, i32 0, i32 0
%x = load i64 addrspace(4)* %i
ret i64 %x
}
define ptx_device float @t4_shared_f32() {
entry:
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_float;
;CHECK: ld.shared.f32 %ret{{[0-9]+}}, [%r[[R0]]];
;CHECK: ret;
%i = getelementptr [10 x float] addrspace(4)* @array_shared_float, i32 0, i32 0
%x = load float addrspace(4)* %i
ret float %x
}
define ptx_device double @t4_shared_f64() {
entry:
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_double;
;CHECK: ld.shared.f64 %ret{{[0-9]+}}, [%r[[R0]]];
;CHECK: ret;
%i = getelementptr [10 x double] addrspace(4)* @array_shared_double, i32 0, i32 0
%x = load double addrspace(4)* %i
ret double %x
}
define ptx_device i16 @t5_u16() {
entry:
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i16;
;CHECK: ld.global.u16 %ret{{[0-9]+}}, [%r[[R0]]+2];
;CHECK: ret;
%i = getelementptr [10 x i16]* @array_i16, i32 0, i32 1
%x = load i16* %i
ret i16 %x
}
define ptx_device i32 @t5_u32() {
entry:
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i32;
;CHECK: ld.global.u32 %ret{{[0-9]+}}, [%r[[R0]]+4];
;CHECK: ret;
%i = getelementptr [10 x i32]* @array_i32, i32 0, i32 1
%x = load i32* %i
ret i32 %x
}
define ptx_device i64 @t5_u64() {
entry:
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i64;
;CHECK: ld.global.u64 %ret{{[0-9]+}}, [%r[[R0]]+8];
;CHECK: ret;
%i = getelementptr [10 x i64]* @array_i64, i32 0, i32 1
%x = load i64* %i
ret i64 %x
}
define ptx_device float @t5_f32() {
entry:
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_float;
;CHECK: ld.global.f32 %ret{{[0-9]+}}, [%r[[R0]]+4];
;CHECK: ret;
%i = getelementptr [10 x float]* @array_float, i32 0, i32 1
%x = load float* %i
ret float %x
}
define ptx_device double @t5_f64() {
entry:
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_double;
;CHECK: ld.global.f64 %ret{{[0-9]+}}, [%r[[R0]]+8];
;CHECK: ret;
%i = getelementptr [10 x double]* @array_double, i32 0, i32 1
%x = load double* %i
ret double %x
}

View File

@ -1,6 +0,0 @@
config.suffixes = ['.ll', '.c', '.cpp']
targets = set(config.root.targets_to_build.split())
if not 'PTX' in targets:
config.unsupported = True

View File

@ -1,56 +0,0 @@
; RUN: llc < %s -march=ptx32 -mattr=+ptx20 | FileCheck %s
define ptx_device float @test_sqrt_f32(float %x) {
entry:
; CHECK: sqrt.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}};
; CHECK: ret;
%y = call float @llvm.sqrt.f32(float %x)
ret float %y
}
define ptx_device double @test_sqrt_f64(double %x) {
entry:
; CHECK: sqrt.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}};
; CHECK: ret;
%y = call double @llvm.sqrt.f64(double %x)
ret double %y
}
define ptx_device float @test_sin_f32(float %x) {
entry:
; CHECK: sin.approx.f32 %ret{{[0-9]+}}, %f{{[0-9]+}};
; CHECK: ret;
%y = call float @llvm.sin.f32(float %x)
ret float %y
}
define ptx_device double @test_sin_f64(double %x) {
entry:
; CHECK: sin.approx.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}};
; CHECK: ret;
%y = call double @llvm.sin.f64(double %x)
ret double %y
}
define ptx_device float @test_cos_f32(float %x) {
entry:
; CHECK: cos.approx.f32 %ret{{[0-9]+}}, %f{{[0-9]+}};
; CHECK: ret;
%y = call float @llvm.cos.f32(float %x)
ret float %y
}
define ptx_device double @test_cos_f64(double %x) {
entry:
; CHECK: cos.approx.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}};
; CHECK: ret;
%y = call double @llvm.cos.f64(double %x)
ret double %y
}
declare float @llvm.sqrt.f32(float)
declare double @llvm.sqrt.f64(double)
declare float @llvm.sin.f32(float)
declare double @llvm.sin.f64(double)
declare float @llvm.cos.f32(float)
declare double @llvm.cos.f64(double)

View File

@ -1,24 +0,0 @@
; RUN: llc < %s -march=ptx32 -mattr=+ptx20,+sm20 | FileCheck %s -check-prefix=FMA
; RUN: llc < %s -march=ptx32 -mattr=+ptx20,+sm20,+no-fma | FileCheck %s -check-prefix=MUL
; RUN: llc < %s -march=ptx64 -mattr=+ptx20,+sm20 | FileCheck %s -check-prefix=FMA
; RUN: llc < %s -march=ptx64 -mattr=+ptx20,+sm20,+no-fma | FileCheck %s -check-prefix=MUL
define ptx_device float @test_mul_add_f(float %x, float %y, float %z) {
entry:
; FMA: mad.rn.f32
; MUL: mul.rn.f32
; MUL: add.rn.f32
%a = fmul float %x, %y
%b = fadd float %a, %z
ret float %b
}
define ptx_device double @test_mul_add_d(double %x, double %y, double %z) {
entry:
; FMA: mad.rn.f64
; MUL: mul.rn.f64
; MUL: add.rn.f64
%a = fmul double %x, %y
%b = fadd double %a, %z
ret double %b
}

View File

@ -1,17 +0,0 @@
; RUN: llc < %s -march=ptx32 -mattr=+sm13 | FileCheck %s
define ptx_device float @t1_f32(float %x, float %y, float %z) {
; CHECK: mad.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}};
; CHECK: ret;
%a = fmul float %x, %y
%b = fadd float %a, %z
ret float %b
}
define ptx_device double @t1_f64(double %x, double %y, double %z) {
; CHECK: mad.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}};
; CHECK: ret;
%a = fmul double %x, %y
%b = fadd double %a, %z
ret double %b
}

View File

@ -1,62 +0,0 @@
; RUN: llc < %s -march=ptx32 | FileCheck %s
define ptx_device i16 @t1_u16() {
; CHECK: mov.u16 %ret{{[0-9]+}}, 0;
; CHECK: ret;
ret i16 0
}
define ptx_device i32 @t1_u32() {
; CHECK: mov.u32 %ret{{[0-9]+}}, 0;
; CHECK: ret;
ret i32 0
}
define ptx_device i64 @t1_u64() {
; CHECK: mov.u64 %ret{{[0-9]+}}, 0;
; CHECK: ret;
ret i64 0
}
define ptx_device float @t1_f32() {
; CHECK: mov.f32 %ret{{[0-9]+}}, 0D0000000000000000;
; CHECK: ret;
ret float 0.0
}
define ptx_device double @t1_f64() {
; CHECK: mov.f64 %ret{{[0-9]+}}, 0D0000000000000000;
; CHECK: ret;
ret double 0.0
}
define ptx_device i16 @t2_u16(i16 %x) {
; CHECK: mov.b16 %ret{{[0-9]+}}, %arg{{[0-9]+}};
; CHECK: ret;
ret i16 %x
}
define ptx_device i32 @t2_u32(i32 %x) {
; CHECK: mov.b32 %ret{{[0-9]+}}, %arg{{[0-9]+}};
; CHECK: ret;
ret i32 %x
}
define ptx_device i64 @t2_u64(i64 %x) {
; CHECK: mov.b64 %ret{{[0-9]+}}, %arg{{[0-9]+}};
; CHECK: ret;
ret i64 %x
}
define ptx_device float @t3_f32(float %x) {
; CHECK: mov.f32 %ret{{[0-9]+}}, %arg{{[0-9]+}};
; CHECK: ret;
ret float %x
}
define ptx_device double @t3_f64(double %x) {
; CHECK: mov.f64 %ret{{[0-9]+}}, %arg{{[0-9]+}};
; CHECK: ret;
ret double %x
}

View File

@ -1,39 +0,0 @@
; RUN: llc < %s -march=ptx32 | FileCheck %s
;define ptx_device i32 @t1(i32 %x, i32 %y) {
; %z = mul i32 %x, %y
; ret i32 %z
;}
;define ptx_device i32 @t2(i32 %x) {
; %z = mul i32 %x, 1
; ret i32 %z
;}
define ptx_device float @t1_f32(float %x, float %y) {
; CHECK: mul.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
; CHECK: ret;
%z = fmul float %x, %y
ret float %z
}
define ptx_device double @t1_f64(double %x, double %y) {
; CHECK: mul.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}
; CHECK: ret;
%z = fmul double %x, %y
ret double %z
}
define ptx_device float @t2_f32(float %x) {
; CHECK: mul.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, 0D4014000000000000;
; CHECK: ret;
%z = fmul float %x, 5.0
ret float %z
}
define ptx_device double @t2_f64(double %x) {
; CHECK: mul.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, 0D4014000000000000;
; CHECK: ret;
%z = fmul double %x, 5.0
ret double %z
}

View File

@ -1,13 +0,0 @@
; RUN: llc < %s -march=ptx32 -mattr=ptx20 | grep ".version 2.0"
; RUN: llc < %s -march=ptx32 -mattr=ptx21 | grep ".version 2.1"
; RUN: llc < %s -march=ptx32 -mattr=ptx22 | grep ".version 2.2"
; RUN: llc < %s -march=ptx32 -mattr=ptx23 | grep ".version 2.3"
; RUN: llc < %s -march=ptx32 -mattr=sm10 | grep ".target sm_10"
; RUN: llc < %s -march=ptx32 -mattr=sm13 | grep ".target sm_13"
; RUN: llc < %s -march=ptx32 -mattr=sm20 | grep ".target sm_20"
; RUN: llc < %s -march=ptx32 -mattr=ptx23 | grep ".address_size 32"
; RUN: llc < %s -march=ptx64 -mattr=ptx23 | grep ".address_size 64"
define ptx_device void @t1() {
ret void
}

View File

@ -1,8 +0,0 @@
; RUN: llc < %s -march=ptx32 | FileCheck %s
; CHECK: .func (.reg .b32 %ret{{[0-9]+}}) test_parameter_order (.reg .f32 %arg{{[0-9]+}}, .reg .b32 %arg{{[0-9]+}}, .reg .b32 %arg{{[0-9]+}}, .reg .f32 %arg{{[0-9]+}})
define ptx_device i32 @test_parameter_order(float %a, i32 %b, i32 %c, float %d) {
; CHECK: sub.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
%result = sub i32 %b, %c
ret i32 %result
}

View File

@ -1,25 +0,0 @@
; RUN: llc < %s -march=ptx64 -mattr=+ptx20,+sm20 | FileCheck %s
declare i32 @printf(i8*, ...)
@str = private unnamed_addr constant [6 x i8] c"test\0A\00"
define ptx_device void @t1_printf() {
; CHECK: mov.u64 %rd{{[0-9]+}}, $L__str;
; CHECK: call.uni (__localparam_{{[0-9]+}}), vprintf, (__localparam_{{[0-9]+}}, __localparam_{{[0-9]+}});
; CHECK: ret;
%1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x i8]* @str, i64 0, i64 0))
ret void
}
@str2 = private unnamed_addr constant [11 x i8] c"test = %f\0A\00"
define ptx_device void @t2_printf() {
; CHECK: .local .align 8 .b8 __local{{[0-9]+}}[{{[0-9]+}}];
; CHECK: mov.u64 %rd{{[0-9]+}}, $L__str2;
; CHECK: cvta.local.u64 %rd{{[0-9]+}}, __local{{[0-9+]}};
; CHECK: call.uni (__localparam_{{[0-9]+}}), vprintf, (__localparam_{{[0-9]+}}, __localparam_{{[0-9]+}});
; CHECK: ret;
%1 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([11 x i8]* @str2, i64 0, i64 0), double 0x3FF3333340000000)
ret void
}

View File

@ -1,7 +0,0 @@
; RUN: llc < %s -march=ptx32 | FileCheck %s
define ptx_device void @t1() {
; CHECK: ret;
; CHECK-NOT: exit;
ret void
}

View File

@ -1,25 +0,0 @@
; RUN: llc < %s -march=ptx32 | FileCheck %s
define ptx_device i32 @test_selp_i32(i1 %x, i32 %y, i32 %z) {
; CHECK: selp.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}, %p{{[0-9]+}};
%a = select i1 %x, i32 %y, i32 %z
ret i32 %a
}
define ptx_device i64 @test_selp_i64(i1 %x, i64 %y, i64 %z) {
; CHECK: selp.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}}, %rd{{[0-9]+}}, %p{{[0-9]+}};
%a = select i1 %x, i64 %y, i64 %z
ret i64 %a
}
define ptx_device float @test_selp_f32(i1 %x, float %y, float %z) {
; CHECK: selp.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %p{{[0-9]+}};
%a = select i1 %x, float %y, float %z
ret float %a
}
define ptx_device double @test_selp_f64(i1 %x, double %y, double %z) {
; CHECK: selp.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %p{{[0-9]+}};
%a = select i1 %x, double %y, double %z
ret double %a
}

View File

@ -1,206 +0,0 @@
; RUN: llc < %s -march=ptx32 | FileCheck %s
define ptx_device i32 @test_setp_eq_u32_rr(i32 %x, i32 %y) {
; CHECK: setp.eq.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
; CHECK: ret;
%p = icmp eq i32 %x, %y
%z = zext i1 %p to i32
ret i32 %z
}
define ptx_device i32 @test_setp_ne_u32_rr(i32 %x, i32 %y) {
; CHECK: setp.ne.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
; CHECK: ret;
%p = icmp ne i32 %x, %y
%z = zext i1 %p to i32
ret i32 %z
}
define ptx_device i32 @test_setp_lt_u32_rr(i32 %x, i32 %y) {
; CHECK: setp.lt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
; CHECK: ret;
%p = icmp ult i32 %x, %y
%z = zext i1 %p to i32
ret i32 %z
}
define ptx_device i32 @test_setp_le_u32_rr(i32 %x, i32 %y) {
; CHECK: setp.le.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
; CHECK: ret;
%p = icmp ule i32 %x, %y
%z = zext i1 %p to i32
ret i32 %z
}
define ptx_device i32 @test_setp_gt_u32_rr(i32 %x, i32 %y) {
; CHECK: setp.gt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
; CHECK: ret;
%p = icmp ugt i32 %x, %y
%z = zext i1 %p to i32
ret i32 %z
}
define ptx_device i32 @test_setp_ge_u32_rr(i32 %x, i32 %y) {
; CHECK: setp.ge.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
; CHECK: ret;
%p = icmp uge i32 %x, %y
%z = zext i1 %p to i32
ret i32 %z
}
define ptx_device i32 @test_setp_lt_s32_rr(i32 %x, i32 %y) {
; CHECK: setp.lt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
; CHECK: ret;
%p = icmp slt i32 %x, %y
%z = zext i1 %p to i32
ret i32 %z
}
define ptx_device i32 @test_setp_le_s32_rr(i32 %x, i32 %y) {
; CHECK: setp.le.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
; CHECK: ret;
%p = icmp sle i32 %x, %y
%z = zext i1 %p to i32
ret i32 %z
}
define ptx_device i32 @test_setp_gt_s32_rr(i32 %x, i32 %y) {
; CHECK: setp.gt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
; CHECK: ret;
%p = icmp sgt i32 %x, %y
%z = zext i1 %p to i32
ret i32 %z
}
define ptx_device i32 @test_setp_ge_s32_rr(i32 %x, i32 %y) {
; CHECK: setp.ge.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
; CHECK: ret;
%p = icmp sge i32 %x, %y
%z = zext i1 %p to i32
ret i32 %z
}
define ptx_device i32 @test_setp_eq_u32_ri(i32 %x) {
; CHECK: setp.eq.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 1;
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
; CHECK: ret;
%p = icmp eq i32 %x, 1
%z = zext i1 %p to i32
ret i32 %z
}
define ptx_device i32 @test_setp_ne_u32_ri(i32 %x) {
; CHECK: setp.ne.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 1;
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
; CHECK: ret;
%p = icmp ne i32 %x, 1
%z = zext i1 %p to i32
ret i32 %z
}
define ptx_device i32 @test_setp_lt_u32_ri(i32 %x) {
; CHECK: setp.eq.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 0;
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
; CHECK: ret;
%p = icmp ult i32 %x, 1
%z = zext i1 %p to i32
ret i32 %z
}
define ptx_device i32 @test_setp_le_u32_ri(i32 %x) {
; CHECK: setp.lt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 2;
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
; CHECK: ret;
%p = icmp ule i32 %x, 1
%z = zext i1 %p to i32
ret i32 %z
}
define ptx_device i32 @test_setp_gt_u32_ri(i32 %x) {
; CHECK: setp.gt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 1;
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
; CHECK: ret;
%p = icmp ugt i32 %x, 1
%z = zext i1 %p to i32
ret i32 %z
}
define ptx_device i32 @test_setp_ge_u32_ri(i32 %x) {
; CHECK: setp.ne.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 0;
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
; CHECK: ret;
%p = icmp uge i32 %x, 1
%z = zext i1 %p to i32
ret i32 %z
}
define ptx_device i32 @test_setp_lt_s32_ri(i32 %x) {
; CHECK: setp.lt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 1;
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
; CHECK: ret;
%p = icmp slt i32 %x, 1
%z = zext i1 %p to i32
ret i32 %z
}
define ptx_device i32 @test_setp_le_s32_ri(i32 %x) {
; CHECK: setp.lt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 2;
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
; CHECK: ret;
%p = icmp sle i32 %x, 1
%z = zext i1 %p to i32
ret i32 %z
}
define ptx_device i32 @test_setp_gt_s32_ri(i32 %x) {
; CHECK: setp.gt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 1;
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
; CHECK: ret;
%p = icmp sgt i32 %x, 1
%z = zext i1 %p to i32
ret i32 %z
}
define ptx_device i32 @test_setp_ge_s32_ri(i32 %x) {
; CHECK: setp.gt.s32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 0;
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p[[P0]];
; CHECK: ret;
%p = icmp sge i32 %x, 1
%z = zext i1 %p to i32
ret i32 %z
}
define ptx_device i32 @test_setp_4_op_format_1(i32 %x, i32 %y, i32 %u, i32 %v) {
; CHECK: setp.gt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, %r{{[0-9]+}};
; CHECK: setp.eq.and.u32 %p1, %r{{[0-9]+}}, %r{{[0-9]+}}, %p[[P0]];
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p1;
; CHECK: ret;
%c = icmp eq i32 %x, %y
%d = icmp ugt i32 %u, %v
%e = and i1 %c, %d
%z = zext i1 %e to i32
ret i32 %z
}
define ptx_device i32 @test_setp_4_op_format_2(i32 %x, i32 %y, i32 %w) {
; CHECK: setp.gt.u32 %p[[P0:[0-9]+]], %r{{[0-9]+}}, 0;
; CHECK: setp.eq.and.u32 %p1, %r{{[0-9]+}}, %r{{[0-9]+}}, !%p[[P0]];
; CHECK: selp.u32 %ret{{[0-9]+}}, 1, 0, %p1;
; CHECK: ret;
%c = trunc i32 %w to i1
%d = icmp eq i32 %x, %y
%e = xor i1 %c, 1
%f = and i1 %d, %e
%z = zext i1 %f to i32
ret i32 %z
}

View File

@ -1,22 +0,0 @@
; RUN: llc < %s -march=ptx32 | FileCheck %s
define ptx_device i32 @t1(i32 %x, i32 %y) {
; CHECK: shl.b32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
%z = shl i32 %x, %y
; CHECK: ret;
ret i32 %z
}
define ptx_device i32 @t2(i32 %x) {
; CHECK: shl.b32 %ret{{[0-9]+}}, %r{{[0-9]+}}, 3
%z = shl i32 %x, 3
; CHECK: ret;
ret i32 %z
}
define ptx_device i32 @t3(i32 %x) {
; CHECK: shl.b32 %ret{{[0-9]+}}, 3, %r{{[0-9]+}}
%z = shl i32 3, %x
; CHECK: ret;
ret i32 %z
}

View File

@ -1,43 +0,0 @@
; RUN: llc < %s -march=ptx32 | FileCheck %s
define ptx_device i32 @t1(i32 %x, i32 %y) {
; CHECK: shr.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
%z = lshr i32 %x, %y
; CHECK: ret;
ret i32 %z
}
define ptx_device i32 @t2(i32 %x) {
; CHECK: shr.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, 3
%z = lshr i32 %x, 3
; CHECK: ret;
ret i32 %z
}
define ptx_device i32 @t3(i32 %x) {
; CHECK: shr.u32 %ret{{[0-9]+}}, 3, %r{{[0-9]+}}
%z = lshr i32 3, %x
; CHECK: ret;
ret i32 %z
}
define ptx_device i32 @t4(i32 %x, i32 %y) {
; CHECK: shr.s32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}}
%z = ashr i32 %x, %y
; CHECK: ret;
ret i32 %z
}
define ptx_device i32 @t5(i32 %x) {
; CHECK: shr.s32 %ret{{[0-9]+}}, %r{{[0-9]+}}, 3
%z = ashr i32 %x, 3
; CHECK: ret;
ret i32 %z
}
define ptx_device i32 @t6(i32 %x) {
; CHECK: shr.s32 %ret{{[0-9]+}}, -3, %r{{[0-9]+}}
%z = ashr i32 -3, %x
; CHECK: ret;
ret i32 %z
}

View File

@ -1,27 +0,0 @@
; RUN: llc < %s -march=ptx32 -mattr=sm20 | FileCheck %s
define ptx_device void @test_add(float %x, float %y) {
; CHECK: ret;
%z = fadd float %x, %y
ret void
}
define ptx_device float @test_call(float %x, float %y) {
%a = fadd float %x, %y
; CHECK: call.uni test_add, (__localparam_{{[0-9]+}}, __localparam_{{[0-9]+}});
call void @test_add(float %a, float %y)
ret float %a
}
define ptx_device float @test_compute(float %x, float %y) {
; CHECK: ret;
%z = fadd float %x, %y
ret float %z
}
define ptx_device float @test_call_compute(float %x, float %y) {
; CHECK: call.uni (__localparam_{{[0-9]+}}), test_compute, (__localparam_{{[0-9]+}}, __localparam_{{[0-9]+}})
%z = call float @test_compute(float %x, float %y)
ret float %z
}

View File

@ -1,337 +0,0 @@
; RUN: llc < %s -march=ptx32 | FileCheck %s
;CHECK: .extern .global .b16 array_i16[10];
@array_i16 = external global [10 x i16]
;CHECK: .extern .const .b16 array_constant_i16[10];
@array_constant_i16 = external addrspace(1) constant [10 x i16]
;CHECK: .extern .shared .b16 array_shared_i16[10];
@array_shared_i16 = external addrspace(4) global [10 x i16]
;CHECK: .extern .global .b32 array_i32[10];
@array_i32 = external global [10 x i32]
;CHECK: .extern .const .b32 array_constant_i32[10];
@array_constant_i32 = external addrspace(1) constant [10 x i32]
;CHECK: .extern .shared .b32 array_shared_i32[10];
@array_shared_i32 = external addrspace(4) global [10 x i32]
;CHECK: .extern .global .b64 array_i64[10];
@array_i64 = external global [10 x i64]
;CHECK: .extern .const .b64 array_constant_i64[10];
@array_constant_i64 = external addrspace(1) constant [10 x i64]
;CHECK: .extern .shared .b64 array_shared_i64[10];
@array_shared_i64 = external addrspace(4) global [10 x i64]
;CHECK: .extern .global .b32 array_float[10];
@array_float = external global [10 x float]
;CHECK: .extern .const .b32 array_constant_float[10];
@array_constant_float = external addrspace(1) constant [10 x float]
;CHECK: .extern .shared .b32 array_shared_float[10];
@array_shared_float = external addrspace(4) global [10 x float]
;CHECK: .extern .global .b64 array_double[10];
@array_double = external global [10 x double]
;CHECK: .extern .const .b64 array_constant_double[10];
@array_constant_double = external addrspace(1) constant [10 x double]
;CHECK: .extern .shared .b64 array_shared_double[10];
@array_shared_double = external addrspace(4) global [10 x double]
define ptx_device void @t1_u16(i16* %p, i16 %x) {
entry:
;CHECK: st.global.u16 [%r{{[0-9]+}}], %rh{{[0-9]+}};
;CHECK: ret;
store i16 %x, i16* %p
ret void
}
define ptx_device void @t1_u32(i32* %p, i32 %x) {
entry:
;CHECK: st.global.u32 [%r{{[0-9]+}}], %r{{[0-9]+}};
;CHECK: ret;
store i32 %x, i32* %p
ret void
}
define ptx_device void @t1_u64(i64* %p, i64 %x) {
entry:
;CHECK: st.global.u64 [%r{{[0-9]+}}], %rd{{[0-9]+}};
;CHECK: ret;
store i64 %x, i64* %p
ret void
}
define ptx_device void @t1_f32(float* %p, float %x) {
entry:
;CHECK: st.global.f32 [%r{{[0-9]+}}], %f{{[0-9]+}};
;CHECK: ret;
store float %x, float* %p
ret void
}
define ptx_device void @t1_f64(double* %p, double %x) {
entry:
;CHECK: st.global.f64 [%r{{[0-9]+}}], %fd{{[0-9]+}};
;CHECK: ret;
store double %x, double* %p
ret void
}
define ptx_device void @t2_u16(i16* %p, i16 %x) {
entry:
;CHECK: st.global.u16 [%r{{[0-9]+}}+2], %rh{{[0-9]+}};
;CHECK: ret;
%i = getelementptr i16* %p, i32 1
store i16 %x, i16* %i
ret void
}
define ptx_device void @t2_u32(i32* %p, i32 %x) {
entry:
;CHECK: st.global.u32 [%r{{[0-9]+}}+4], %r{{[0-9]+}};
;CHECK: ret;
%i = getelementptr i32* %p, i32 1
store i32 %x, i32* %i
ret void
}
define ptx_device void @t2_u64(i64* %p, i64 %x) {
entry:
;CHECK: st.global.u64 [%r{{[0-9]+}}+8], %rd{{[0-9]+}};
;CHECK: ret;
%i = getelementptr i64* %p, i32 1
store i64 %x, i64* %i
ret void
}
define ptx_device void @t2_f32(float* %p, float %x) {
entry:
;CHECK: st.global.f32 [%r{{[0-9]+}}+4], %f{{[0-9]+}};
;CHECK: ret;
%i = getelementptr float* %p, i32 1
store float %x, float* %i
ret void
}
define ptx_device void @t2_f64(double* %p, double %x) {
entry:
;CHECK: st.global.f64 [%r{{[0-9]+}}+8], %fd{{[0-9]+}};
;CHECK: ret;
%i = getelementptr double* %p, i32 1
store double %x, double* %i
ret void
}
define ptx_device void @t3_u16(i16* %p, i32 %q, i16 %x) {
entry:
;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 1;
;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
;CHECK: st.global.u16 [%r{{[0-9]+}}], %rh{{[0-9]+}};
;CHECK: ret;
%i = getelementptr i16* %p, i32 %q
store i16 %x, i16* %i
ret void
}
define ptx_device void @t3_u32(i32* %p, i32 %q, i32 %x) {
entry:
;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 2;
;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
;CHECK: st.global.u32 [%r{{[0-9]+}}], %r{{[0-9]+}};
;CHECK: ret;
%i = getelementptr i32* %p, i32 %q
store i32 %x, i32* %i
ret void
}
define ptx_device void @t3_u64(i64* %p, i32 %q, i64 %x) {
entry:
;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 3;
;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
;CHECK: st.global.u64 [%r{{[0-9]+}}], %rd{{[0-9]+}};
;CHECK: ret;
%i = getelementptr i64* %p, i32 %q
store i64 %x, i64* %i
ret void
}
define ptx_device void @t3_f32(float* %p, i32 %q, float %x) {
entry:
;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 2;
;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
;CHECK: st.global.f32 [%r{{[0-9]+}}], %f{{[0-9]+}};
;CHECK: ret;
%i = getelementptr float* %p, i32 %q
store float %x, float* %i
ret void
}
define ptx_device void @t3_f64(double* %p, i32 %q, double %x) {
entry:
;CHECK: shl.b32 %r[[R0:[0-9]+]], %r{{[0-9]+}}, 3;
;CHECK: add.u32 %r{{[0-9]+}}, %r{{[0-9]+}}, %r[[R0]];
;CHECK: st.global.f64 [%r{{[0-9]+}}], %fd{{[0-9]+}};
;CHECK: ret;
%i = getelementptr double* %p, i32 %q
store double %x, double* %i
ret void
}
define ptx_device void @t4_global_u16(i16 %x) {
entry:
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i16;
;CHECK: st.global.u16 [%r[[R0]]], %rh{{[0-9]+}};
;CHECK: ret;
%i = getelementptr [10 x i16]* @array_i16, i16 0, i16 0
store i16 %x, i16* %i
ret void
}
define ptx_device void @t4_global_u32(i32 %x) {
entry:
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i32;
;CHECK: st.global.u32 [%r[[R0]]], %r{{[0-9]+}};
;CHECK: ret;
%i = getelementptr [10 x i32]* @array_i32, i32 0, i32 0
store i32 %x, i32* %i
ret void
}
define ptx_device void @t4_global_u64(i64 %x) {
entry:
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i64;
;CHECK: st.global.u64 [%r[[R0]]], %rd{{[0-9]+}};
;CHECK: ret;
%i = getelementptr [10 x i64]* @array_i64, i32 0, i32 0
store i64 %x, i64* %i
ret void
}
define ptx_device void @t4_global_f32(float %x) {
entry:
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_float;
;CHECK: st.global.f32 [%r[[R0]]], %f{{[0-9]+}};
;CHECK: ret;
%i = getelementptr [10 x float]* @array_float, i32 0, i32 0
store float %x, float* %i
ret void
}
define ptx_device void @t4_global_f64(double %x) {
entry:
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_double;
;CHECK: st.global.f64 [%r[[R0]]], %fd{{[0-9]+}};
;CHECK: ret;
%i = getelementptr [10 x double]* @array_double, i32 0, i32 0
store double %x, double* %i
ret void
}
define ptx_device void @t4_shared_u16(i16 %x) {
entry:
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_i16;
;CHECK: st.shared.u16 [%r[[R0]]], %rh{{[0-9]+}};
;CHECK: ret;
%i = getelementptr [10 x i16] addrspace(4)* @array_shared_i16, i32 0, i32 0
store i16 %x, i16 addrspace(4)* %i
ret void
}
define ptx_device void @t4_shared_u32(i32 %x) {
entry:
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_i32;
;CHECK: st.shared.u32 [%r[[R0]]], %r{{[0-9]+}};
;CHECK: ret;
%i = getelementptr [10 x i32] addrspace(4)* @array_shared_i32, i32 0, i32 0
store i32 %x, i32 addrspace(4)* %i
ret void
}
define ptx_device void @t4_shared_u64(i64 %x) {
entry:
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_i64;
;CHECK: st.shared.u64 [%r[[R0]]], %rd{{[0-9]+}};
;CHECK: ret;
%i = getelementptr [10 x i64] addrspace(4)* @array_shared_i64, i32 0, i32 0
store i64 %x, i64 addrspace(4)* %i
ret void
}
define ptx_device void @t4_shared_f32(float %x) {
entry:
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_float;
;CHECK: st.shared.f32 [%r[[R0]]], %f{{[0-9]+}};
;CHECK: ret;
%i = getelementptr [10 x float] addrspace(4)* @array_shared_float, i32 0, i32 0
store float %x, float addrspace(4)* %i
ret void
}
define ptx_device void @t4_shared_f64(double %x) {
entry:
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_shared_double;
;CHECK: st.shared.f64 [%r[[R0]]], %fd{{[0-9]+}};
;CHECK: ret;
%i = getelementptr [10 x double] addrspace(4)* @array_shared_double, i32 0, i32 0
store double %x, double addrspace(4)* %i
ret void
}
define ptx_device void @t5_u16(i16 %x) {
entry:
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i16;
;CHECK: st.global.u16 [%r[[R0]]+2], %rh{{[0-9]+}};
;CHECK: ret;
%i = getelementptr [10 x i16]* @array_i16, i32 0, i32 1
store i16 %x, i16* %i
ret void
}
define ptx_device void @t5_u32(i32 %x) {
entry:
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i32;
;CHECK: st.global.u32 [%r[[R0]]+4], %r{{[0-9]+}};
;CHECK: ret;
%i = getelementptr [10 x i32]* @array_i32, i32 0, i32 1
store i32 %x, i32* %i
ret void
}
define ptx_device void @t5_u64(i64 %x) {
entry:
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_i64;
;CHECK: st.global.u64 [%r[[R0]]+8], %rd{{[0-9]+}};
;CHECK: ret;
%i = getelementptr [10 x i64]* @array_i64, i32 0, i32 1
store i64 %x, i64* %i
ret void
}
define ptx_device void @t5_f32(float %x) {
entry:
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_float;
;CHECK: st.global.f32 [%r[[R0]]+4], %f{{[0-9]+}};
;CHECK: ret;
%i = getelementptr [10 x float]* @array_float, i32 0, i32 1
store float %x, float* %i
ret void
}
define ptx_device void @t5_f64(double %x) {
entry:
;CHECK: mov.u32 %r[[R0:[0-9]+]], array_double;
;CHECK: st.global.f64 [%r[[R0]]+8], %fd{{[0-9]+}};
;CHECK: ret;
%i = getelementptr [10 x double]* @array_double, i32 0, i32 1
store double %x, double* %i
ret void
}

View File

@ -1,19 +0,0 @@
; RUN: llc < %s -march=ptx32 -mattr=sm20 | FileCheck %s
define ptx_device float @stack1(float %a) {
; CHECK: .local .align 4 .b8 __local0[4];
%a.2 = alloca float, align 4
; CHECK: st.local.f32 [__local0], %f0
store float %a, float* %a.2
%a.3 = load float* %a.2
ret float %a.3
}
define ptx_device float @stack1_align8(float %a) {
; CHECK: .local .align 8 .b8 __local0[4];
%a.2 = alloca float, align 8
; CHECK: st.local.f32 [__local0], %f0
store float %a, float* %a.2
%a.3 = load float* %a.2
ret float %a.3
}

View File

@ -1,71 +0,0 @@
; RUN: llc < %s -march=ptx32 | FileCheck %s
define ptx_device i16 @t1_u16(i16 %x, i16 %y) {
; CHECK: sub.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}}, %rh{{[0-9]+}};
; CHECK: ret;
%z = sub i16 %x, %y
ret i16 %z
}
define ptx_device i32 @t1_u32(i32 %x, i32 %y) {
; CHECK: sub.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, %r{{[0-9]+}};
; CHECK: ret;
%z = sub i32 %x, %y
ret i32 %z
}
define ptx_device i64 @t1_u64(i64 %x, i64 %y) {
; CHECK: sub.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}}, %rd{{[0-9]+}};
; CHECK: ret;
%z = sub i64 %x, %y
ret i64 %z
}
define ptx_device float @t1_f32(float %x, float %y) {
; CHECK: sub.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}
; CHECK: ret;
%z = fsub float %x, %y
ret float %z
}
define ptx_device double @t1_f64(double %x, double %y) {
; CHECK: sub.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}
; CHECK: ret;
%z = fsub double %x, %y
ret double %z
}
define ptx_device i16 @t2_u16(i16 %x) {
; CHECK: add.u16 %ret{{[0-9]+}}, %rh{{[0-9]+}}, -1;
; CHECK: ret;
%z = sub i16 %x, 1
ret i16 %z
}
define ptx_device i32 @t2_u32(i32 %x) {
; CHECK: add.u32 %ret{{[0-9]+}}, %r{{[0-9]+}}, -1;
; CHECK: ret;
%z = sub i32 %x, 1
ret i32 %z
}
define ptx_device i64 @t2_u64(i64 %x) {
; CHECK: add.u64 %ret{{[0-9]+}}, %rd{{[0-9]+}}, -1;
; CHECK: ret;
%z = sub i64 %x, 1
ret i64 %z
}
define ptx_device float @t2_f32(float %x) {
; CHECK: add.rn.f32 %ret{{[0-9]+}}, %f{{[0-9]+}}, 0DBFF0000000000000;
; CHECK: ret;
%z = fsub float %x, 1.0
ret float %z
}
define ptx_device double @t2_f64(double %x) {
; CHECK: add.rn.f64 %ret{{[0-9]+}}, %fd{{[0-9]+}}, 0DBFF0000000000000;
; CHECK: ret;
%z = fsub double %x, 1.0
ret double %z
}

View File

@ -353,9 +353,9 @@ TEST(TripleTest, BitWidthArchVariants) {
EXPECT_EQ(Triple::ppc, T.get32BitArchVariant().getArch());
EXPECT_EQ(Triple::ppc64, T.get64BitArchVariant().getArch());
T.setArch(Triple::ptx32);
EXPECT_EQ(Triple::ptx32, T.get32BitArchVariant().getArch());
EXPECT_EQ(Triple::ptx64, T.get64BitArchVariant().getArch());
T.setArch(Triple::nvptx);
EXPECT_EQ(Triple::nvptx, T.get32BitArchVariant().getArch());
EXPECT_EQ(Triple::nvptx64, T.get64BitArchVariant().getArch());
T.setArch(Triple::sparc);
EXPECT_EQ(Triple::sparc, T.get32BitArchVariant().getArch());
@ -377,9 +377,9 @@ TEST(TripleTest, BitWidthArchVariants) {
EXPECT_EQ(Triple::ppc, T.get32BitArchVariant().getArch());
EXPECT_EQ(Triple::ppc64, T.get64BitArchVariant().getArch());
T.setArch(Triple::ptx64);
EXPECT_EQ(Triple::ptx32, T.get32BitArchVariant().getArch());
EXPECT_EQ(Triple::ptx64, T.get64BitArchVariant().getArch());
T.setArch(Triple::nvptx64);
EXPECT_EQ(Triple::nvptx, T.get32BitArchVariant().getArch());
EXPECT_EQ(Triple::nvptx64, T.get64BitArchVariant().getArch());
T.setArch(Triple::sparcv9);
EXPECT_EQ(Triple::sparc, T.get32BitArchVariant().getArch());