mirror of
https://github.com/RPCSX/llvm.git
synced 2025-04-03 16:51:42 +00:00
Extend initial support for primitive types in PTX backend
- Allow i16, i32, i64, float, and double types, using the native .u16, .u32, .u64, .f32, and .f64 PTX types. - Allow loading/storing of all primitive types. - Allow primitive types to be passed as parameters. - Allow selection of PTX Version and Shader Model as sub-target attributes. - Merge integer/floating-point test cases for load/store. - Use .u32 instead of .s32 to conform to output from NVidia nvcc compiler. Patch by Justin Holewinski git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@126824 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
9ff5de99df
commit
fd8978b021
@ -19,8 +19,34 @@ include "llvm/Target/Target.td"
|
||||
// Subtarget Features.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def FeatureSM20 : SubtargetFeature<"sm20", "is_sm20", "true",
|
||||
"Enable sm_20 target architecture">;
|
||||
//===- Architectural Features ---------------------------------------------===//
|
||||
|
||||
def FeatureDouble : SubtargetFeature<"double", "SupportsDouble", "true",
|
||||
"Do not demote .f64 to .f32">;
|
||||
|
||||
//===- PTX Version --------------------------------------------------------===//
|
||||
|
||||
def FeaturePTX14 : SubtargetFeature<"ptx14", "PTXVersion", "PTX_VERSION_1_4",
|
||||
"Use PTX Language Version 1.4">;
|
||||
|
||||
def FeaturePTX20 : SubtargetFeature<"ptx20", "PTXVersion", "PTX_VERSION_2_0",
|
||||
"Use PTX Language Version 2.0",
|
||||
[FeaturePTX14]>;
|
||||
|
||||
def FeaturePTX21 : SubtargetFeature<"ptx21", "PTXVersion", "PTX_VERSION_2_1",
|
||||
"Use PTX Language Version 2.1",
|
||||
[FeaturePTX20]>;
|
||||
|
||||
//===- PTX Shader Model ---------------------------------------------------===//
|
||||
|
||||
def FeatureSM10 : SubtargetFeature<"sm10", "PTXShaderModel", "PTX_SM_1_0",
|
||||
"Enable Shader Model 1.0 compliance">;
|
||||
def FeatureSM13 : SubtargetFeature<"sm13", "PTXShaderModel", "PTX_SM_1_3",
|
||||
"Enable Shader Model 1.3 compliance",
|
||||
[FeatureSM10, FeatureDouble]>;
|
||||
def FeatureSM20 : SubtargetFeature<"sm20", "PTXShaderModel", "PTX_SM_2_0",
|
||||
"Enable Shader Model 2.0 compliance",
|
||||
[FeatureSM13]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// PTX supported processors.
|
||||
|
@ -24,6 +24,7 @@
|
||||
#include "llvm/ADT/Twine.h"
|
||||
#include "llvm/CodeGen/AsmPrinter.h"
|
||||
#include "llvm/CodeGen/MachineInstr.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/MC/MCStreamer.h"
|
||||
#include "llvm/MC/MCSymbol.h"
|
||||
#include "llvm/Target/Mangler.h"
|
||||
@ -37,13 +38,6 @@
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
static cl::opt<std::string>
|
||||
OptPTXVersion("ptx-version", cl::desc("Set PTX version"), cl::init("1.4"));
|
||||
|
||||
static cl::opt<std::string>
|
||||
OptPTXTarget("ptx-target", cl::desc("Set GPU target (comma-separated list)"),
|
||||
cl::init("sm_10"));
|
||||
|
||||
namespace {
|
||||
class PTXAsmPrinter : public AsmPrinter {
|
||||
public:
|
||||
@ -82,11 +76,14 @@ private:
|
||||
static const char PARAM_PREFIX[] = "__param_";
|
||||
|
||||
static const char *getRegisterTypeName(unsigned RegNo) {
|
||||
#define TEST_REGCLS(cls, clsstr) \
|
||||
#define TEST_REGCLS(cls, clsstr) \
|
||||
if (PTX::cls ## RegisterClass->contains(RegNo)) return # clsstr;
|
||||
TEST_REGCLS(RRegf32, f32);
|
||||
TEST_REGCLS(RRegs32, s32);
|
||||
TEST_REGCLS(Preds, pred);
|
||||
TEST_REGCLS(RRegu16, u16);
|
||||
TEST_REGCLS(RRegu32, u32);
|
||||
TEST_REGCLS(RRegu64, u64);
|
||||
TEST_REGCLS(RRegf32, f32);
|
||||
TEST_REGCLS(RRegf64, f64);
|
||||
#undef TEST_REGCLS
|
||||
|
||||
llvm_unreachable("Not in any register class!");
|
||||
@ -121,7 +118,14 @@ static const char *getTypeName(const Type* type) {
|
||||
switch (type->getTypeID()) {
|
||||
default: llvm_unreachable("Unknown type");
|
||||
case Type::FloatTyID: return ".f32";
|
||||
case Type::IntegerTyID: return ".s32"; // TODO: Handle 64-bit types.
|
||||
case Type::DoubleTyID: return ".f64";
|
||||
case Type::IntegerTyID:
|
||||
switch (type->getPrimitiveSizeInBits()) {
|
||||
default: llvm_unreachable("Unknown integer bit-width");
|
||||
case 16: return ".u16";
|
||||
case 32: return ".u32";
|
||||
case 64: return ".u64";
|
||||
}
|
||||
case Type::ArrayTyID:
|
||||
case Type::PointerTyID:
|
||||
type = dyn_cast<const SequentialType>(type)->getElementType();
|
||||
@ -162,8 +166,11 @@ bool PTXAsmPrinter::doFinalization(Module &M) {
|
||||
|
||||
void PTXAsmPrinter::EmitStartOfAsmFile(Module &M)
|
||||
{
|
||||
OutStreamer.EmitRawText(Twine("\t.version " + OptPTXVersion));
|
||||
OutStreamer.EmitRawText(Twine("\t.target " + OptPTXTarget));
|
||||
const PTXSubtarget& ST = TM.getSubtarget<PTXSubtarget>();
|
||||
|
||||
OutStreamer.EmitRawText(Twine("\t.version " + ST.getPTXVersionString()));
|
||||
OutStreamer.EmitRawText(Twine("\t.target " + ST.getTargetString() +
|
||||
(ST.supportsDouble() ? "" : ", map_f64_to_f32")));
|
||||
OutStreamer.AddBlankLine();
|
||||
|
||||
// declare global variables
|
||||
@ -236,11 +243,24 @@ void PTXAsmPrinter::printOperand(const MachineInstr *MI, int opNum,
|
||||
break;
|
||||
case MachineOperand::MO_FPImmediate:
|
||||
APInt constFP = MO.getFPImm()->getValueAPF().bitcastToAPInt();
|
||||
if (constFP.getZExtValue() > 0) {
|
||||
OS << "0F" << constFP.toString(16, false);
|
||||
bool isFloat = MO.getFPImm()->getType()->getTypeID() == Type::FloatTyID;
|
||||
// Emit 0F for 32-bit floats and 0D for 64-bit doubles.
|
||||
if (isFloat) {
|
||||
OS << "0F";
|
||||
}
|
||||
else {
|
||||
OS << "0F00000000";
|
||||
OS << "0D";
|
||||
}
|
||||
// Emit the encoded floating-point value.
|
||||
if (constFP.getZExtValue() > 0) {
|
||||
OS << constFP.toString(16, false);
|
||||
}
|
||||
else {
|
||||
OS << "00000000";
|
||||
// If We have a double-precision zero, pad to 8-bytes.
|
||||
if (!isFloat) {
|
||||
OS << "00000000";
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
@ -338,12 +358,18 @@ void PTXAsmPrinter::EmitFunctionDeclaration() {
|
||||
if (!MFI->argRegEmpty()) {
|
||||
decl += " (";
|
||||
if (isKernel) {
|
||||
for (int i = 0, e = MFI->getNumArg(); i != e; ++i) {
|
||||
if (i != 0)
|
||||
unsigned cnt = 0;
|
||||
//for (int i = 0, e = MFI->getNumArg(); i != e; ++i) {
|
||||
for(PTXMachineFunctionInfo::reg_iterator
|
||||
i = MFI->argRegBegin(), e = MFI->argRegEnd(), b = i; i != e; ++i) {
|
||||
reg = *i;
|
||||
assert(reg != PTX::NoRegister && "Not a valid register!");
|
||||
if (i != b)
|
||||
decl += ", ";
|
||||
decl += ".param .s32 "; // TODO: add types
|
||||
decl += ".param .u32"; // TODO: Parse type from register map
|
||||
decl += " ";
|
||||
decl += PARAM_PREFIX;
|
||||
decl += utostr(i + 1);
|
||||
decl += utostr(++cnt);
|
||||
}
|
||||
} else {
|
||||
for (PTXMachineFunctionInfo::reg_iterator
|
||||
|
@ -15,6 +15,7 @@
|
||||
#include "PTXTargetMachine.h"
|
||||
#include "llvm/CodeGen/SelectionDAGISel.h"
|
||||
#include "llvm/DerivedTypes.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
@ -66,14 +67,34 @@ SDNode *PTXDAGToDAGISel::Select(SDNode *Node) {
|
||||
}
|
||||
|
||||
SDNode *PTXDAGToDAGISel::SelectREAD_PARAM(SDNode *Node) {
|
||||
SDValue index = Node->getOperand(1);
|
||||
DebugLoc dl = Node->getDebugLoc();
|
||||
SDValue index = Node->getOperand(1);
|
||||
DebugLoc dl = Node->getDebugLoc();
|
||||
unsigned opcode;
|
||||
|
||||
if (index.getOpcode() != ISD::TargetConstant)
|
||||
llvm_unreachable("READ_PARAM: index is not ISD::TargetConstant");
|
||||
|
||||
if (Node->getValueType(0) == MVT::i16) {
|
||||
opcode = PTX::LDpiU16;
|
||||
}
|
||||
else if (Node->getValueType(0) == MVT::i32) {
|
||||
opcode = PTX::LDpiU32;
|
||||
}
|
||||
else if (Node->getValueType(0) == MVT::i64) {
|
||||
opcode = PTX::LDpiU64;
|
||||
}
|
||||
else if (Node->getValueType(0) == MVT::f32) {
|
||||
opcode = PTX::LDpiF32;
|
||||
}
|
||||
else if (Node->getValueType(0) == MVT::f64) {
|
||||
opcode = PTX::LDpiF64;
|
||||
}
|
||||
else {
|
||||
llvm_unreachable("Unknown parameter type for ld.param");
|
||||
}
|
||||
|
||||
return PTXInstrInfo::
|
||||
GetPTXMachineNode(CurDAG, PTX::LDpi, dl, MVT::i32, index);
|
||||
GetPTXMachineNode(CurDAG, opcode, dl, Node->getValueType(0), index);
|
||||
}
|
||||
|
||||
// Match memory operand of the form [reg+reg]
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/CodeGen/SelectionDAG.h"
|
||||
#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
@ -27,13 +28,17 @@ PTXTargetLowering::PTXTargetLowering(TargetMachine &TM)
|
||||
: TargetLowering(TM, new TargetLoweringObjectFileELF()) {
|
||||
// Set up the register classes.
|
||||
addRegisterClass(MVT::i1, PTX::PredsRegisterClass);
|
||||
addRegisterClass(MVT::i32, PTX::RRegs32RegisterClass);
|
||||
addRegisterClass(MVT::i16, PTX::RRegu16RegisterClass);
|
||||
addRegisterClass(MVT::i32, PTX::RRegu32RegisterClass);
|
||||
addRegisterClass(MVT::i64, PTX::RRegu64RegisterClass);
|
||||
addRegisterClass(MVT::f32, PTX::RRegf32RegisterClass);
|
||||
|
||||
addRegisterClass(MVT::f64, PTX::RRegf64RegisterClass);
|
||||
|
||||
setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand);
|
||||
|
||||
setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
|
||||
|
||||
setOperationAction(ISD::ConstantFP, MVT::f64, Legal);
|
||||
|
||||
// Customize translation of memory addresses
|
||||
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
|
||||
|
||||
@ -90,10 +95,13 @@ struct argmap_entry {
|
||||
bool operator==(MVT::SimpleValueType _VT) const { return VT == _VT; }
|
||||
} argmap[] = {
|
||||
argmap_entry(MVT::i1, PTX::PredsRegisterClass),
|
||||
argmap_entry(MVT::i32, PTX::RRegs32RegisterClass),
|
||||
argmap_entry(MVT::f32, PTX::RRegf32RegisterClass)
|
||||
argmap_entry(MVT::i16, PTX::RRegu16RegisterClass),
|
||||
argmap_entry(MVT::i32, PTX::RRegu32RegisterClass),
|
||||
argmap_entry(MVT::i64, PTX::RRegu64RegisterClass),
|
||||
argmap_entry(MVT::f32, PTX::RRegf32RegisterClass),
|
||||
argmap_entry(MVT::f64, PTX::RRegf64RegisterClass)
|
||||
};
|
||||
} // end anonymous namespace
|
||||
} // end anonymous namespace
|
||||
|
||||
SDValue PTXTargetLowering::
|
||||
LowerFormalArguments(SDValue Chain,
|
||||
@ -192,12 +200,21 @@ SDValue PTXTargetLowering::
|
||||
SDValue Flag;
|
||||
unsigned reg;
|
||||
|
||||
if (Outs[0].VT == MVT::i32) {
|
||||
if (Outs[0].VT == MVT::i16) {
|
||||
reg = PTX::RH0;
|
||||
}
|
||||
else if (Outs[0].VT == MVT::i32) {
|
||||
reg = PTX::R0;
|
||||
}
|
||||
else if (Outs[0].VT == MVT::i64) {
|
||||
reg = PTX::RD0;
|
||||
}
|
||||
else if (Outs[0].VT == MVT::f32) {
|
||||
reg = PTX::F0;
|
||||
}
|
||||
else if (Outs[0].VT == MVT::f64) {
|
||||
reg = PTX::FD0;
|
||||
}
|
||||
else {
|
||||
assert(false && "Can return only basic types");
|
||||
}
|
||||
|
@ -27,9 +27,12 @@ static const struct map_entry {
|
||||
const TargetRegisterClass *cls;
|
||||
const int opcode;
|
||||
} map[] = {
|
||||
{ &PTX::RRegs32RegClass, PTX::MOVrr },
|
||||
{ &PTX::RRegf32RegClass, PTX::MOVrr },
|
||||
{ &PTX::PredsRegClass, PTX::MOVpp }
|
||||
{ &PTX::RRegu16RegClass, PTX::MOVU16rr },
|
||||
{ &PTX::RRegu32RegClass, PTX::MOVU32rr },
|
||||
{ &PTX::RRegu64RegClass, PTX::MOVU64rr },
|
||||
{ &PTX::RRegf32RegClass, PTX::MOVF32rr },
|
||||
{ &PTX::RRegf64RegClass, PTX::MOVF64rr },
|
||||
{ &PTX::PredsRegClass, PTX::MOVPREDrr }
|
||||
};
|
||||
|
||||
void PTXInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
||||
@ -76,8 +79,12 @@ bool PTXInstrInfo::isMoveInstr(const MachineInstr& MI,
|
||||
switch (MI.getOpcode()) {
|
||||
default:
|
||||
return false;
|
||||
case PTX::MOVpp:
|
||||
case PTX::MOVrr:
|
||||
case PTX::MOVU16rr:
|
||||
case PTX::MOVU32rr:
|
||||
case PTX::MOVU64rr:
|
||||
case PTX::MOVF32rr:
|
||||
case PTX::MOVF64rr:
|
||||
case PTX::MOVPREDrr:
|
||||
assert(MI.getNumOperands() >= 2 &&
|
||||
MI.getOperand(0).isReg() && MI.getOperand(1).isReg() &&
|
||||
"Invalid register-register move instruction");
|
||||
|
@ -114,7 +114,7 @@ def ADDRii : ComplexPattern<i32, 2, "SelectADDRii", [], []>;
|
||||
// Address operands
|
||||
def MEMri : Operand<i32> {
|
||||
let PrintMethod = "printMemOperand";
|
||||
let MIOperandInfo = (ops RRegs32, i32imm);
|
||||
let MIOperandInfo = (ops RRegu32, i32imm);
|
||||
}
|
||||
def MEMii : Operand<i32> {
|
||||
let PrintMethod = "printMemOperand";
|
||||
@ -143,75 +143,115 @@ def PTXret
|
||||
// Instruction Class Templates
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
// Three-operand f32 instruction template
|
||||
// Three-operand floating-point instruction template
|
||||
multiclass FLOAT3<string opcstr, SDNode opnode> {
|
||||
def rr : InstPTX<(outs RRegf32:$d),
|
||||
(ins RRegf32:$a, RRegf32:$b),
|
||||
!strconcat(opcstr, ".%type\t$d, $a, $b"),
|
||||
[(set RRegf32:$d, (opnode RRegf32:$a, RRegf32:$b))]>;
|
||||
def ri : InstPTX<(outs RRegf32:$d),
|
||||
(ins RRegf32:$a, f32imm:$b),
|
||||
!strconcat(opcstr, ".%type\t$d, $a, $b"),
|
||||
[(set RRegf32:$d, (opnode RRegf32:$a, fpimm:$b))]>;
|
||||
def rr32 : InstPTX<(outs RRegf32:$d),
|
||||
(ins RRegf32:$a, RRegf32:$b),
|
||||
!strconcat(opcstr, ".f32\t$d, $a, $b"),
|
||||
[(set RRegf32:$d, (opnode RRegf32:$a, RRegf32:$b))]>;
|
||||
def ri32 : InstPTX<(outs RRegf32:$d),
|
||||
(ins RRegf32:$a, f32imm:$b),
|
||||
!strconcat(opcstr, ".f32\t$d, $a, $b"),
|
||||
[(set RRegf32:$d, (opnode RRegf32:$a, fpimm:$b))]>;
|
||||
def rr64 : InstPTX<(outs RRegf64:$d),
|
||||
(ins RRegf64:$a, RRegf64:$b),
|
||||
!strconcat(opcstr, ".f64\t$d, $a, $b"),
|
||||
[(set RRegf64:$d, (opnode RRegf64:$a, RRegf64:$b))]>;
|
||||
def ri64 : InstPTX<(outs RRegf64:$d),
|
||||
(ins RRegf64:$a, f64imm:$b),
|
||||
!strconcat(opcstr, ".f64\t$d, $a, $b"),
|
||||
[(set RRegf64:$d, (opnode RRegf64:$a, fpimm:$b))]>;
|
||||
}
|
||||
|
||||
multiclass INT3<string opcstr, SDNode opnode> {
|
||||
def rr : InstPTX<(outs RRegs32:$d),
|
||||
(ins RRegs32:$a, RRegs32:$b),
|
||||
!strconcat(opcstr, ".%type\t$d, $a, $b"),
|
||||
[(set RRegs32:$d, (opnode RRegs32:$a, RRegs32:$b))]>;
|
||||
def ri : InstPTX<(outs RRegs32:$d),
|
||||
(ins RRegs32:$a, i32imm:$b),
|
||||
!strconcat(opcstr, ".%type\t$d, $a, $b"),
|
||||
[(set RRegs32:$d, (opnode RRegs32:$a, imm:$b))]>;
|
||||
def rr16 : InstPTX<(outs RRegu16:$d),
|
||||
(ins RRegu16:$a, RRegu16:$b),
|
||||
!strconcat(opcstr, ".u16\t$d, $a, $b"),
|
||||
[(set RRegu16:$d, (opnode RRegu16:$a, RRegu16:$b))]>;
|
||||
def ri16 : InstPTX<(outs RRegu16:$d),
|
||||
(ins RRegu16:$a, i16imm:$b),
|
||||
!strconcat(opcstr, ".u16\t$d, $a, $b"),
|
||||
[(set RRegu16:$d, (opnode RRegu16:$a, imm:$b))]>;
|
||||
def rr32 : InstPTX<(outs RRegu32:$d),
|
||||
(ins RRegu32:$a, RRegu32:$b),
|
||||
!strconcat(opcstr, ".u32\t$d, $a, $b"),
|
||||
[(set RRegu32:$d, (opnode RRegu32:$a, RRegu32:$b))]>;
|
||||
def ri32 : InstPTX<(outs RRegu32:$d),
|
||||
(ins RRegu32:$a, i32imm:$b),
|
||||
!strconcat(opcstr, ".u32\t$d, $a, $b"),
|
||||
[(set RRegu32:$d, (opnode RRegu32:$a, imm:$b))]>;
|
||||
def rr64 : InstPTX<(outs RRegu64:$d),
|
||||
(ins RRegu64:$a, RRegu64:$b),
|
||||
!strconcat(opcstr, ".u64\t$d, $a, $b"),
|
||||
[(set RRegu64:$d, (opnode RRegu64:$a, RRegu64:$b))]>;
|
||||
def ri64 : InstPTX<(outs RRegu64:$d),
|
||||
(ins RRegu64:$a, i64imm:$b),
|
||||
!strconcat(opcstr, ".u64\t$d, $a, $b"),
|
||||
[(set RRegu64:$d, (opnode RRegu64:$a, imm:$b))]>;
|
||||
}
|
||||
|
||||
// no %type directive, non-communtable
|
||||
multiclass INT3ntnc<string opcstr, SDNode opnode> {
|
||||
def rr : InstPTX<(outs RRegs32:$d),
|
||||
(ins RRegs32:$a, RRegs32:$b),
|
||||
def rr : InstPTX<(outs RRegu32:$d),
|
||||
(ins RRegu32:$a, RRegu32:$b),
|
||||
!strconcat(opcstr, "\t$d, $a, $b"),
|
||||
[(set RRegs32:$d, (opnode RRegs32:$a, RRegs32:$b))]>;
|
||||
def ri : InstPTX<(outs RRegs32:$d),
|
||||
(ins RRegs32:$a, i32imm:$b),
|
||||
[(set RRegu32:$d, (opnode RRegu32:$a, RRegu32:$b))]>;
|
||||
def ri : InstPTX<(outs RRegu32:$d),
|
||||
(ins RRegu32:$a, i32imm:$b),
|
||||
!strconcat(opcstr, "\t$d, $a, $b"),
|
||||
[(set RRegs32:$d, (opnode RRegs32:$a, imm:$b))]>;
|
||||
def ir : InstPTX<(outs RRegs32:$d),
|
||||
(ins i32imm:$a, RRegs32:$b),
|
||||
[(set RRegu32:$d, (opnode RRegu32:$a, imm:$b))]>;
|
||||
def ir : InstPTX<(outs RRegu32:$d),
|
||||
(ins i32imm:$a, RRegu32:$b),
|
||||
!strconcat(opcstr, "\t$d, $a, $b"),
|
||||
[(set RRegs32:$d, (opnode imm:$a, RRegs32:$b))]>;
|
||||
[(set RRegu32:$d, (opnode imm:$a, RRegu32:$b))]>;
|
||||
}
|
||||
|
||||
multiclass PTX_LD<string opstr, RegisterClass RC, PatFrag pat_load> {
|
||||
multiclass PTX_LD<string opstr, string typestr, RegisterClass RC, PatFrag pat_load> {
|
||||
def rr : InstPTX<(outs RC:$d),
|
||||
(ins MEMri:$a),
|
||||
!strconcat(opstr, ".%type\t$d, [$a]"),
|
||||
!strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
|
||||
[(set RC:$d, (pat_load ADDRrr:$a))]>;
|
||||
def ri : InstPTX<(outs RC:$d),
|
||||
(ins MEMri:$a),
|
||||
!strconcat(opstr, ".%type\t$d, [$a]"),
|
||||
!strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
|
||||
[(set RC:$d, (pat_load ADDRri:$a))]>;
|
||||
def ii : InstPTX<(outs RC:$d),
|
||||
(ins MEMii:$a),
|
||||
!strconcat(opstr, ".%type\t$d, [$a]"),
|
||||
!strconcat(opstr, !strconcat(typestr, "\t$d, [$a]")),
|
||||
[(set RC:$d, (pat_load ADDRii:$a))]>;
|
||||
}
|
||||
|
||||
multiclass PTX_ST<string opstr, RegisterClass RC, PatFrag pat_store> {
|
||||
multiclass PTX_LD_ALL<string opstr, PatFrag pat_load> {
|
||||
defm u16 : PTX_LD<opstr, ".u16", RRegu16, pat_load>;
|
||||
defm u32 : PTX_LD<opstr, ".u32", RRegu32, pat_load>;
|
||||
defm u64 : PTX_LD<opstr, ".u64", RRegu64, pat_load>;
|
||||
defm f32 : PTX_LD<opstr, ".f32", RRegf32, pat_load>;
|
||||
defm f64 : PTX_LD<opstr, ".f64", RRegf64, pat_load>;
|
||||
}
|
||||
|
||||
multiclass PTX_ST<string opstr, string typestr, RegisterClass RC, PatFrag pat_store> {
|
||||
def rr : InstPTX<(outs),
|
||||
(ins RC:$d, MEMri:$a),
|
||||
!strconcat(opstr, ".%type\t[$a], $d"),
|
||||
!strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
|
||||
[(pat_store RC:$d, ADDRrr:$a)]>;
|
||||
def ri : InstPTX<(outs),
|
||||
(ins RC:$d, MEMri:$a),
|
||||
!strconcat(opstr, ".%type\t[$a], $d"),
|
||||
!strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
|
||||
[(pat_store RC:$d, ADDRri:$a)]>;
|
||||
def ii : InstPTX<(outs),
|
||||
(ins RC:$d, MEMii:$a),
|
||||
!strconcat(opstr, ".%type\t[$a], $d"),
|
||||
!strconcat(opstr, !strconcat(typestr, "\t[$a], $d")),
|
||||
[(pat_store RC:$d, ADDRii:$a)]>;
|
||||
}
|
||||
|
||||
multiclass PTX_ST_ALL<string opstr, PatFrag pat_store> {
|
||||
defm u16 : PTX_ST<opstr, ".u16", RRegu16, pat_store>;
|
||||
defm u32 : PTX_ST<opstr, ".u32", RRegu32, pat_store>;
|
||||
defm u64 : PTX_ST<opstr, ".u64", RRegu64, pat_store>;
|
||||
defm f32 : PTX_ST<opstr, ".f32", RRegf32, pat_store>;
|
||||
defm f64 : PTX_ST<opstr, ".f64", RRegf64, pat_store>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
@ -236,60 +276,67 @@ defm SRA : INT3ntnc<"shr.s32", PTXsra>;
|
||||
///===- Data Movement and Conversion Instructions -------------------------===//
|
||||
|
||||
let neverHasSideEffects = 1 in {
|
||||
// rely on isMoveInstr to separate MOVpp, MOVrr, etc.
|
||||
def MOVpp
|
||||
def MOVPREDrr
|
||||
: InstPTX<(outs Preds:$d), (ins Preds:$a), "mov.pred\t$d, $a", []>;
|
||||
def MOVrr
|
||||
: InstPTX<(outs RRegs32:$d), (ins RRegs32:$a), "mov.%type\t$d, $a", []>;
|
||||
def FMOVrr
|
||||
def MOVU16rr
|
||||
: InstPTX<(outs RRegu16:$d), (ins RRegu16:$a), "mov.u16\t$d, $a", []>;
|
||||
def MOVU32rr
|
||||
: InstPTX<(outs RRegu32:$d), (ins RRegu32:$a), "mov.u32\t$d, $a", []>;
|
||||
def MOVU64rr
|
||||
: InstPTX<(outs RRegu64:$d), (ins RRegu64:$a), "mov.u64\t$d, $a", []>;
|
||||
def MOVF32rr
|
||||
: InstPTX<(outs RRegf32:$d), (ins RRegf32:$a), "mov.f32\t$d, $a", []>;
|
||||
def MOVF64rr
|
||||
: InstPTX<(outs RRegf64:$d), (ins RRegf64:$a), "mov.f64\t$d, $a", []>;
|
||||
}
|
||||
|
||||
let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
|
||||
def MOVpi
|
||||
def MOVPREDri
|
||||
: InstPTX<(outs Preds:$d), (ins i1imm:$a), "mov.pred\t$d, $a",
|
||||
[(set Preds:$d, imm:$a)]>;
|
||||
def MOVri
|
||||
: InstPTX<(outs RRegs32:$d), (ins i32imm:$a), "mov.s32\t$d, $a",
|
||||
[(set RRegs32:$d, imm:$a)]>;
|
||||
def FMOVri
|
||||
def MOVU16ri
|
||||
: InstPTX<(outs RRegu16:$d), (ins i16imm:$a), "mov.u16\t$d, $a",
|
||||
[(set RRegu16:$d, imm:$a)]>;
|
||||
def MOVU32ri
|
||||
: InstPTX<(outs RRegu32:$d), (ins i32imm:$a), "mov.u32\t$d, $a",
|
||||
[(set RRegu32:$d, imm:$a)]>;
|
||||
def MOVU164ri
|
||||
: InstPTX<(outs RRegu64:$d), (ins i64imm:$a), "mov.u64\t$d, $a",
|
||||
[(set RRegu64:$d, imm:$a)]>;
|
||||
def MOVF32ri
|
||||
: InstPTX<(outs RRegf32:$d), (ins f32imm:$a), "mov.f32\t$d, $a",
|
||||
[(set RRegf32:$d, fpimm:$a)]>;
|
||||
def MOVF64ri
|
||||
: InstPTX<(outs RRegf64:$d), (ins f64imm:$a), "mov.f64\t$d, $a",
|
||||
[(set RRegf64:$d, fpimm:$a)]>;
|
||||
}
|
||||
|
||||
// Integer loads
|
||||
defm LDg : PTX_LD<"ld.global", RRegs32, load_global>;
|
||||
defm LDc : PTX_LD<"ld.const", RRegs32, load_constant>;
|
||||
defm LDl : PTX_LD<"ld.local", RRegs32, load_local>;
|
||||
defm LDp : PTX_LD<"ld.param", RRegs32, load_parameter>;
|
||||
defm LDs : PTX_LD<"ld.shared", RRegs32, load_shared>;
|
||||
// Loads
|
||||
defm LDg : PTX_LD_ALL<"ld.global", load_global>;
|
||||
defm LDc : PTX_LD_ALL<"ld.const", load_constant>;
|
||||
defm LDl : PTX_LD_ALL<"ld.local", load_local>;
|
||||
defm LDs : PTX_LD_ALL<"ld.shared", load_shared>;
|
||||
|
||||
def LDpi : InstPTX<(outs RRegs32:$d), (ins MEMpi:$a),
|
||||
"ld.param.%type\t$d, [$a]", []>;
|
||||
// This is a special instruction that is manually inserted for kernel parameters
|
||||
def LDpiU16 : InstPTX<(outs RRegu16:$d), (ins MEMpi:$a),
|
||||
"ld.param.u16\t$d, [$a]", []>;
|
||||
def LDpiU32 : InstPTX<(outs RRegu32:$d), (ins MEMpi:$a),
|
||||
"ld.param.u32\t$d, [$a]", []>;
|
||||
def LDpiU64 : InstPTX<(outs RRegu64:$d), (ins MEMpi:$a),
|
||||
"ld.param.u64\t$d, [$a]", []>;
|
||||
def LDpiF32 : InstPTX<(outs RRegf32:$d), (ins MEMpi:$a),
|
||||
"ld.param.f32\t$d, [$a]", []>;
|
||||
def LDpiF64 : InstPTX<(outs RRegf64:$d), (ins MEMpi:$a),
|
||||
"ld.param.f64\t$d, [$a]", []>;
|
||||
|
||||
// Floating-point loads
|
||||
defm FLDg : PTX_LD<"ld.global", RRegf32, load_global>;
|
||||
defm FLDc : PTX_LD<"ld.const", RRegf32, load_constant>;
|
||||
defm FLDl : PTX_LD<"ld.local", RRegf32, load_local>;
|
||||
defm FLDp : PTX_LD<"ld.param", RRegf32, load_parameter>;
|
||||
defm FLDs : PTX_LD<"ld.shared", RRegf32, load_shared>;
|
||||
// Stores
|
||||
defm STg : PTX_ST_ALL<"st.global", store_global>;
|
||||
defm STl : PTX_ST_ALL<"st.local", store_local>;
|
||||
defm STs : PTX_ST_ALL<"st.shared", store_shared>;
|
||||
|
||||
def FLDpi : InstPTX<(outs RRegf32:$d), (ins MEMpi:$a),
|
||||
"ld.param.%type\t$d, [$a]", []>;
|
||||
|
||||
// Integer stores
|
||||
defm STg : PTX_ST<"st.global", RRegs32, store_global>;
|
||||
defm STl : PTX_ST<"st.local", RRegs32, store_local>;
|
||||
// Store to parameter state space requires PTX 2.0 or higher?
|
||||
// defm STp : PTX_ST<"st.param", RRegs32, store_parameter>;
|
||||
defm STs : PTX_ST<"st.shared", RRegs32, store_shared>;
|
||||
|
||||
// Floating-point stores
|
||||
defm FSTg : PTX_ST<"st.global", RRegf32, store_global>;
|
||||
defm FSTl : PTX_ST<"st.local", RRegf32, store_local>;
|
||||
// Store to parameter state space requires PTX 2.0 or higher?
|
||||
// defm FSTp : PTX_ST<"st.param", RRegf32, store_parameter>;
|
||||
defm FSTs : PTX_ST<"st.shared", RRegf32, store_shared>;
|
||||
// defm STp : PTX_ST_ALL<"st.param", store_parameter>;
|
||||
// defm LDp : PTX_LD_ALL<"ld.param", load_parameter>;
|
||||
// TODO: Do something with st.param if/when it is needed.
|
||||
|
||||
///===- Control Flow Instructions -----------------------------------------===//
|
||||
|
||||
|
@ -79,12 +79,12 @@ bool PTXMFInfoExtract::runOnMachineFunction(MachineFunction &MF) {
|
||||
|
||||
DEBUG(for (PTXMachineFunctionInfo::reg_iterator
|
||||
i = MFI->argRegBegin(), e = MFI->argRegEnd();
|
||||
i != e; ++i)
|
||||
i != e; ++i)
|
||||
dbgs() << "Arg Reg: " << *i << "\n";);
|
||||
|
||||
DEBUG(for (PTXMachineFunctionInfo::reg_iterator
|
||||
i = MFI->localVarRegBegin(), e = MFI->localVarRegEnd();
|
||||
i != e; ++i)
|
||||
i != e; ++i)
|
||||
dbgs() << "Local Var Reg: " << *i << "\n";);
|
||||
|
||||
return false;
|
||||
|
@ -19,6 +19,8 @@ class PTXReg<string n> : Register<n> {
|
||||
// Registers
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
///===- Predicate Registers -----------------------------------------------===//
|
||||
|
||||
def P0 : PTXReg<"p0">;
|
||||
def P1 : PTXReg<"p1">;
|
||||
def P2 : PTXReg<"p2">;
|
||||
@ -52,6 +54,43 @@ def P29 : PTXReg<"p29">;
|
||||
def P30 : PTXReg<"p30">;
|
||||
def P31 : PTXReg<"p31">;
|
||||
|
||||
///===- 16-bit Integer Registers ------------------------------------------===//
|
||||
|
||||
def RH0 : PTXReg<"rh0">;
|
||||
def RH1 : PTXReg<"rh1">;
|
||||
def RH2 : PTXReg<"rh2">;
|
||||
def RH3 : PTXReg<"rh3">;
|
||||
def RH4 : PTXReg<"rh4">;
|
||||
def RH5 : PTXReg<"rh5">;
|
||||
def RH6 : PTXReg<"rh6">;
|
||||
def RH7 : PTXReg<"rh7">;
|
||||
def RH8 : PTXReg<"rh8">;
|
||||
def RH9 : PTXReg<"rh9">;
|
||||
def RH10 : PTXReg<"rh10">;
|
||||
def RH11 : PTXReg<"rh11">;
|
||||
def RH12 : PTXReg<"rh12">;
|
||||
def RH13 : PTXReg<"rh13">;
|
||||
def RH14 : PTXReg<"rh14">;
|
||||
def RH15 : PTXReg<"rh15">;
|
||||
def RH16 : PTXReg<"rh16">;
|
||||
def RH17 : PTXReg<"rh17">;
|
||||
def RH18 : PTXReg<"rh18">;
|
||||
def RH19 : PTXReg<"rh19">;
|
||||
def RH20 : PTXReg<"rh20">;
|
||||
def RH21 : PTXReg<"rh21">;
|
||||
def RH22 : PTXReg<"rh22">;
|
||||
def RH23 : PTXReg<"rh23">;
|
||||
def RH24 : PTXReg<"rh24">;
|
||||
def RH25 : PTXReg<"rh25">;
|
||||
def RH26 : PTXReg<"rh26">;
|
||||
def RH27 : PTXReg<"rh27">;
|
||||
def RH28 : PTXReg<"rh28">;
|
||||
def RH29 : PTXReg<"rh29">;
|
||||
def RH30 : PTXReg<"rh30">;
|
||||
def RH31 : PTXReg<"rh31">;
|
||||
|
||||
///===- 32-bit Integer Registers ------------------------------------------===//
|
||||
|
||||
def R0 : PTXReg<"r0">;
|
||||
def R1 : PTXReg<"r1">;
|
||||
def R2 : PTXReg<"r2">;
|
||||
@ -85,6 +124,43 @@ def R29 : PTXReg<"r29">;
|
||||
def R30 : PTXReg<"r30">;
|
||||
def R31 : PTXReg<"r31">;
|
||||
|
||||
///===- 64-bit Integer Registers ------------------------------------------===//
|
||||
|
||||
def RD0 : PTXReg<"rd0">;
|
||||
def RD1 : PTXReg<"rd1">;
|
||||
def RD2 : PTXReg<"rd2">;
|
||||
def RD3 : PTXReg<"rd3">;
|
||||
def RD4 : PTXReg<"rd4">;
|
||||
def RD5 : PTXReg<"rd5">;
|
||||
def RD6 : PTXReg<"rd6">;
|
||||
def RD7 : PTXReg<"rd7">;
|
||||
def RD8 : PTXReg<"rd8">;
|
||||
def RD9 : PTXReg<"rd9">;
|
||||
def RD10 : PTXReg<"rd10">;
|
||||
def RD11 : PTXReg<"rd11">;
|
||||
def RD12 : PTXReg<"rd12">;
|
||||
def RD13 : PTXReg<"rd13">;
|
||||
def RD14 : PTXReg<"rd14">;
|
||||
def RD15 : PTXReg<"rd15">;
|
||||
def RD16 : PTXReg<"rd16">;
|
||||
def RD17 : PTXReg<"rd17">;
|
||||
def RD18 : PTXReg<"rd18">;
|
||||
def RD19 : PTXReg<"rd19">;
|
||||
def RD20 : PTXReg<"rd20">;
|
||||
def RD21 : PTXReg<"rd21">;
|
||||
def RD22 : PTXReg<"rd22">;
|
||||
def RD23 : PTXReg<"rd23">;
|
||||
def RD24 : PTXReg<"rd24">;
|
||||
def RD25 : PTXReg<"rd25">;
|
||||
def RD26 : PTXReg<"rd26">;
|
||||
def RD27 : PTXReg<"rd27">;
|
||||
def RD28 : PTXReg<"rd28">;
|
||||
def RD29 : PTXReg<"rd29">;
|
||||
def RD30 : PTXReg<"rd30">;
|
||||
def RD31 : PTXReg<"rd31">;
|
||||
|
||||
///===- 32-bit Floating-Point Registers -----------------------------------===//
|
||||
|
||||
def F0 : PTXReg<"f0">;
|
||||
def F1 : PTXReg<"f1">;
|
||||
def F2 : PTXReg<"f2">;
|
||||
@ -118,6 +194,41 @@ def F29 : PTXReg<"f29">;
|
||||
def F30 : PTXReg<"f30">;
|
||||
def F31 : PTXReg<"f31">;
|
||||
|
||||
///===- 64-bit Floating-Point Registers -----------------------------------===//
|
||||
|
||||
def FD0 : PTXReg<"fd0">;
|
||||
def FD1 : PTXReg<"fd1">;
|
||||
def FD2 : PTXReg<"fd2">;
|
||||
def FD3 : PTXReg<"fd3">;
|
||||
def FD4 : PTXReg<"fd4">;
|
||||
def FD5 : PTXReg<"fd5">;
|
||||
def FD6 : PTXReg<"fd6">;
|
||||
def FD7 : PTXReg<"fd7">;
|
||||
def FD8 : PTXReg<"fd8">;
|
||||
def FD9 : PTXReg<"fd9">;
|
||||
def FD10 : PTXReg<"fd10">;
|
||||
def FD11 : PTXReg<"fd11">;
|
||||
def FD12 : PTXReg<"fd12">;
|
||||
def FD13 : PTXReg<"fd13">;
|
||||
def FD14 : PTXReg<"fd14">;
|
||||
def FD15 : PTXReg<"fd15">;
|
||||
def FD16 : PTXReg<"fd16">;
|
||||
def FD17 : PTXReg<"fd17">;
|
||||
def FD18 : PTXReg<"fd18">;
|
||||
def FD19 : PTXReg<"fd19">;
|
||||
def FD20 : PTXReg<"fd20">;
|
||||
def FD21 : PTXReg<"fd21">;
|
||||
def FD22 : PTXReg<"fd22">;
|
||||
def FD23 : PTXReg<"fd23">;
|
||||
def FD24 : PTXReg<"fd24">;
|
||||
def FD25 : PTXReg<"fd25">;
|
||||
def FD26 : PTXReg<"fd26">;
|
||||
def FD27 : PTXReg<"fd27">;
|
||||
def FD28 : PTXReg<"fd28">;
|
||||
def FD29 : PTXReg<"fd29">;
|
||||
def FD30 : PTXReg<"fd30">;
|
||||
def FD31 : PTXReg<"fd31">;
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Register classes
|
||||
@ -129,14 +240,32 @@ def Preds : RegisterClass<"PTX", [i1], 8,
|
||||
P16, P17, P18, P19, P20, P21, P22, P23,
|
||||
P24, P25, P26, P27, P28, P29, P30, P31]>;
|
||||
|
||||
def RRegs32 : RegisterClass<"PTX", [i32], 32,
|
||||
def RRegu16 : RegisterClass<"PTX", [i16], 16,
|
||||
[RH0, RH1, RH2, RH3, RH4, RH5, RH6, RH7,
|
||||
RH8, RH9, RH10, RH11, RH12, RH13, RH14, RH15,
|
||||
RH16, RH17, RH18, RH19, RH20, RH21, RH22, RH23,
|
||||
RH24, RH25, RH26, RH27, RH28, RH29, RH30, RH31]>;
|
||||
|
||||
def RRegu32 : RegisterClass<"PTX", [i32], 32,
|
||||
[R0, R1, R2, R3, R4, R5, R6, R7,
|
||||
R8, R9, R10, R11, R12, R13, R14, R15,
|
||||
R16, R17, R18, R19, R20, R21, R22, R23,
|
||||
R24, R25, R26, R27, R28, R29, R30, R31]>;
|
||||
|
||||
def RRegu64 : RegisterClass<"PTX", [i64], 64,
|
||||
[RD0, RD1, RD2, RD3, RD4, RD5, RD6, RD7,
|
||||
RD8, RD9, RD10, RD11, RD12, RD13, RD14, RD15,
|
||||
RD16, RD17, RD18, RD19, RD20, RD21, RD22, RD23,
|
||||
RD24, RD25, RD26, RD27, RD28, RD29, RD30, RD31]>;
|
||||
|
||||
def RRegf32 : RegisterClass<"PTX", [f32], 32,
|
||||
[F0, F1, F2, F3, F4, F5, F6, F7,
|
||||
F8, F9, F10, F11, F12, F13, F14, F15,
|
||||
F16, F17, F18, F19, F20, F21, F22, F23,
|
||||
F24, F25, F26, F27, F28, F29, F30, F31]>;
|
||||
|
||||
def RRegf64 : RegisterClass<"PTX", [f64], 64,
|
||||
[FD0, FD1, FD2, FD3, FD4, FD5, FD6, FD7,
|
||||
FD8, FD9, FD10, FD11, FD12, FD13, FD14, FD15,
|
||||
FD16, FD17, FD18, FD19, FD20, FD21, FD22, FD23,
|
||||
FD24, FD25, FD26, FD27, FD28, FD29, FD30, FD31]>;
|
||||
|
@ -12,12 +12,33 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "PTXSubtarget.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &FS) {
|
||||
std::string TARGET = "sm_20";
|
||||
// TODO: call ParseSubtargetFeatures(FS, TARGET);
|
||||
PTXSubtarget::PTXSubtarget(const std::string &TT, const std::string &FS)
|
||||
: PTXShaderModel(PTX_SM_1_0),
|
||||
PTXVersion(PTX_VERSION_1_4) {
|
||||
std::string TARGET = "generic";
|
||||
ParseSubtargetFeatures(FS, TARGET);
|
||||
}
|
||||
|
||||
std::string PTXSubtarget::getTargetString() const {
|
||||
switch(PTXShaderModel) {
|
||||
default: llvm_unreachable("Unknown shader model");
|
||||
case PTX_SM_1_0: return "sm_10";
|
||||
case PTX_SM_1_3: return "sm_13";
|
||||
case PTX_SM_2_0: return "sm_20";
|
||||
}
|
||||
}
|
||||
|
||||
std::string PTXSubtarget::getPTXVersionString() const {
|
||||
switch(PTXVersion) {
|
||||
default: llvm_unreachable("Unknown PTX version");
|
||||
case PTX_VERSION_1_4: return "1.4";
|
||||
case PTX_VERSION_2_0: return "2.0";
|
||||
case PTX_VERSION_2_1: return "2.1";
|
||||
}
|
||||
}
|
||||
|
||||
#include "PTXGenSubtarget.inc"
|
||||
|
@ -19,11 +19,36 @@
|
||||
namespace llvm {
|
||||
class PTXSubtarget : public TargetSubtarget {
|
||||
private:
|
||||
bool is_sm20;
|
||||
enum PTXShaderModelEnum {
|
||||
PTX_SM_1_0,
|
||||
PTX_SM_1_3,
|
||||
PTX_SM_2_0
|
||||
};
|
||||
|
||||
enum PTXVersionEnum {
|
||||
PTX_VERSION_1_4,
|
||||
PTX_VERSION_2_0,
|
||||
PTX_VERSION_2_1
|
||||
};
|
||||
|
||||
/// Shader Model supported on the target GPU.
|
||||
PTXShaderModelEnum PTXShaderModel;
|
||||
|
||||
/// PTX Language Version.
|
||||
PTXVersionEnum PTXVersion;
|
||||
|
||||
// The native .f64 type is supported on the hardware.
|
||||
bool SupportsDouble;
|
||||
|
||||
public:
|
||||
PTXSubtarget(const std::string &TT, const std::string &FS);
|
||||
|
||||
std::string getTargetString() const;
|
||||
|
||||
std::string getPTXVersionString() const;
|
||||
|
||||
bool supportsDouble() const { return SupportsDouble; }
|
||||
|
||||
std::string ParseSubtargetFeatures(const std::string &FS,
|
||||
const std::string &CPU);
|
||||
}; // class PTXSubtarget
|
||||
|
@ -1,29 +1,71 @@
|
||||
; RUN: llc < %s -march=ptx | FileCheck %s
|
||||
|
||||
define ptx_device i32 @t1(i32 %x, i32 %y) {
|
||||
; CHECK: add.s32 r0, r1, r2;
|
||||
define ptx_device i16 @t1_u16(i16 %x, i16 %y) {
|
||||
; CHECK: add.u16 rh0, rh1, rh2;
|
||||
; CHECK-NEXT: ret;
|
||||
%z = add i16 %x, %y
|
||||
ret i16 %z
|
||||
}
|
||||
|
||||
define ptx_device i32 @t1_u32(i32 %x, i32 %y) {
|
||||
; CHECK: add.u32 r0, r1, r2;
|
||||
; CHECK-NEXT: ret;
|
||||
%z = add i32 %x, %y
|
||||
; CHECK: ret;
|
||||
ret i32 %z
|
||||
}
|
||||
|
||||
define ptx_device i32 @t2(i32 %x) {
|
||||
; CHECK: add.s32 r0, r1, 1;
|
||||
%z = add i32 %x, 1
|
||||
; CHECK: ret;
|
||||
ret i32 %z
|
||||
define ptx_device i64 @t1_u64(i64 %x, i64 %y) {
|
||||
; CHECK: add.u64 rd0, rd1, rd2;
|
||||
; CHECK-NEXT: ret;
|
||||
%z = add i64 %x, %y
|
||||
ret i64 %z
|
||||
}
|
||||
|
||||
define ptx_device float @t3(float %x, float %y) {
|
||||
define ptx_device float @t1_f32(float %x, float %y) {
|
||||
; CHECK: add.f32 f0, f1, f2
|
||||
; CHECK-NEXT: ret;
|
||||
%z = fadd float %x, %y
|
||||
ret float %z
|
||||
}
|
||||
|
||||
define ptx_device float @t4(float %x) {
|
||||
define ptx_device double @t1_f64(double %x, double %y) {
|
||||
; CHECK: add.f64 fd0, fd1, fd2
|
||||
; CHECK-NEXT: ret;
|
||||
%z = fadd double %x, %y
|
||||
ret double %z
|
||||
}
|
||||
|
||||
define ptx_device i16 @t2_u16(i16 %x) {
|
||||
; CHECK: add.u16 rh0, rh1, 1;
|
||||
; CHECK-NEXT: ret;
|
||||
%z = add i16 %x, 1
|
||||
ret i16 %z
|
||||
}
|
||||
|
||||
define ptx_device i32 @t2_u32(i32 %x) {
|
||||
; CHECK: add.u32 r0, r1, 1;
|
||||
; CHECK-NEXT: ret;
|
||||
%z = add i32 %x, 1
|
||||
ret i32 %z
|
||||
}
|
||||
|
||||
define ptx_device i64 @t2_u64(i64 %x) {
|
||||
; CHECK: add.u64 rd0, rd1, 1;
|
||||
; CHECK-NEXT: ret;
|
||||
%z = add i64 %x, 1
|
||||
ret i64 %z
|
||||
}
|
||||
|
||||
define ptx_device float @t2_f32(float %x) {
|
||||
; CHECK: add.f32 f0, f1, 0F3F800000;
|
||||
; CHECK-NEXT: ret;
|
||||
%z = fadd float %x, 1.0
|
||||
ret float %z
|
||||
}
|
||||
|
||||
define ptx_device double @t2_f64(double %x) {
|
||||
; CHECK: add.f64 fd0, fd1, 0D3FF0000000000000;
|
||||
; CHECK-NEXT: ret;
|
||||
%z = fadd double %x, 1.0
|
||||
ret double %z
|
||||
}
|
||||
|
@ -1,78 +1,422 @@
|
||||
; RUN: llc < %s -march=ptx | FileCheck %s
|
||||
|
||||
;CHECK: .extern .global .s32 array[];
|
||||
@array = external global [10 x i32]
|
||||
;CHECK: .extern .global .u16 array_i16[];
|
||||
@array_i16 = external global [10 x i16]
|
||||
|
||||
;CHECK: .extern .const .s32 array_constant[];
|
||||
@array_constant = external addrspace(1) constant [10 x i32]
|
||||
;CHECK: .extern .const .u16 array_constant_i16[];
|
||||
@array_constant_i16 = external addrspace(1) constant [10 x i16]
|
||||
|
||||
;CHECK: .extern .local .s32 array_local[];
|
||||
@array_local = external addrspace(2) global [10 x i32]
|
||||
;CHECK: .extern .local .u16 array_local_i16[];
|
||||
@array_local_i16 = external addrspace(2) global [10 x i16]
|
||||
|
||||
;CHECK: .extern .shared .s32 array_shared[];
|
||||
@array_shared = external addrspace(4) global [10 x i32]
|
||||
;CHECK: .extern .shared .u16 array_shared_i16[];
|
||||
@array_shared_i16 = external addrspace(4) global [10 x i16]
|
||||
|
||||
define ptx_device i32 @t1(i32* %p) {
|
||||
;CHECK: .extern .global .u32 array_i32[];
|
||||
@array_i32 = external global [10 x i32]
|
||||
|
||||
;CHECK: .extern .const .u32 array_constant_i32[];
|
||||
@array_constant_i32 = external addrspace(1) constant [10 x i32]
|
||||
|
||||
;CHECK: .extern .local .u32 array_local_i32[];
|
||||
@array_local_i32 = external addrspace(2) global [10 x i32]
|
||||
|
||||
;CHECK: .extern .shared .u32 array_shared_i32[];
|
||||
@array_shared_i32 = external addrspace(4) global [10 x i32]
|
||||
|
||||
;CHECK: .extern .global .u64 array_i64[];
|
||||
@array_i64 = external global [10 x i64]
|
||||
|
||||
;CHECK: .extern .const .u64 array_constant_i64[];
|
||||
@array_constant_i64 = external addrspace(1) constant [10 x i64]
|
||||
|
||||
;CHECK: .extern .local .u64 array_local_i64[];
|
||||
@array_local_i64 = external addrspace(2) global [10 x i64]
|
||||
|
||||
;CHECK: .extern .shared .u64 array_shared_i64[];
|
||||
@array_shared_i64 = external addrspace(4) global [10 x i64]
|
||||
|
||||
;CHECK: .extern .global .f32 array_float[];
|
||||
@array_float = external global [10 x float]
|
||||
|
||||
;CHECK: .extern .const .f32 array_constant_float[];
|
||||
@array_constant_float = external addrspace(1) constant [10 x float]
|
||||
|
||||
;CHECK: .extern .local .f32 array_local_float[];
|
||||
@array_local_float = external addrspace(2) global [10 x float]
|
||||
|
||||
;CHECK: .extern .shared .f32 array_shared_float[];
|
||||
@array_shared_float = external addrspace(4) global [10 x float]
|
||||
|
||||
;CHECK: .extern .global .f64 array_double[];
|
||||
@array_double = external global [10 x double]
|
||||
|
||||
;CHECK: .extern .const .f64 array_constant_double[];
|
||||
@array_constant_double = external addrspace(1) constant [10 x double]
|
||||
|
||||
;CHECK: .extern .local .f64 array_local_double[];
|
||||
@array_local_double = external addrspace(2) global [10 x double]
|
||||
|
||||
;CHECK: .extern .shared .f64 array_shared_double[];
|
||||
@array_shared_double = external addrspace(4) global [10 x double]
|
||||
|
||||
|
||||
define ptx_device i16 @t1_u16(i16* %p) {
|
||||
entry:
|
||||
;CHECK: ld.global.s32 r0, [r1];
|
||||
;CHECK: ld.global.u16 rh0, [r1];
|
||||
;CHECK-NEXT; ret;
|
||||
%x = load i16* %p
|
||||
ret i16 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @t1_u32(i32* %p) {
|
||||
entry:
|
||||
;CHECK: ld.global.u32 r0, [r1];
|
||||
;CHECK-NEXT: ret;
|
||||
%x = load i32* %p
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @t2(i32* %p) {
|
||||
define ptx_device i64 @t1_u64(i64* %p) {
|
||||
entry:
|
||||
;CHECK: ld.global.s32 r0, [r1+4];
|
||||
;CHECK: ld.global.u64 rd0, [r1];
|
||||
;CHECK-NEXT: ret;
|
||||
%x = load i64* %p
|
||||
ret i64 %x
|
||||
}
|
||||
|
||||
define ptx_device float @t1_f32(float* %p) {
|
||||
entry:
|
||||
;CHECK: ld.global.f32 f0, [r1];
|
||||
;CHECK-NEXT: ret;
|
||||
%x = load float* %p
|
||||
ret float %x
|
||||
}
|
||||
|
||||
define ptx_device double @t1_f64(double* %p) {
|
||||
entry:
|
||||
;CHECK: ld.global.f64 fd0, [r1];
|
||||
;CHECK-NEXT: ret;
|
||||
%x = load double* %p
|
||||
ret double %x
|
||||
}
|
||||
|
||||
define ptx_device i16 @t2_u16(i16* %p) {
|
||||
entry:
|
||||
;CHECK: ld.global.u16 rh0, [r1+2];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr i16* %p, i32 1
|
||||
%x = load i16* %i
|
||||
ret i16 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @t2_u32(i32* %p) {
|
||||
entry:
|
||||
;CHECK: ld.global.u32 r0, [r1+4];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr i32* %p, i32 1
|
||||
%x = load i32* %i
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @t3(i32* %p, i32 %q) {
|
||||
define ptx_device i64 @t2_u64(i64* %p) {
|
||||
entry:
|
||||
;CHECK: ld.global.u64 rd0, [r1+8];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr i64* %p, i32 1
|
||||
%x = load i64* %i
|
||||
ret i64 %x
|
||||
}
|
||||
|
||||
define ptx_device float @t2_f32(float* %p) {
|
||||
entry:
|
||||
;CHECK: ld.global.f32 f0, [r1+4];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr float* %p, i32 1
|
||||
%x = load float* %i
|
||||
ret float %x
|
||||
}
|
||||
|
||||
define ptx_device double @t2_f64(double* %p) {
|
||||
entry:
|
||||
;CHECK: ld.global.f64 fd0, [r1+8];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr double* %p, i32 1
|
||||
%x = load double* %i
|
||||
ret double %x
|
||||
}
|
||||
|
||||
define ptx_device i16 @t3_u16(i16* %p, i32 %q) {
|
||||
entry:
|
||||
;CHECK: shl.b32 r0, r2, 1;
|
||||
;CHECK-NEXT: add.u32 r0, r1, r0;
|
||||
;CHECK-NEXT: ld.global.u16 rh0, [r0];
|
||||
%i = getelementptr i16* %p, i32 %q
|
||||
%x = load i16* %i
|
||||
ret i16 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @t3_u32(i32* %p, i32 %q) {
|
||||
entry:
|
||||
;CHECK: shl.b32 r0, r2, 2;
|
||||
;CHECK: add.s32 r0, r1, r0;
|
||||
;CHECK: ld.global.s32 r0, [r0];
|
||||
;CHECK-NEXT: add.u32 r0, r1, r0;
|
||||
;CHECK-NEXT: ld.global.u32 r0, [r0];
|
||||
%i = getelementptr i32* %p, i32 %q
|
||||
%x = load i32* %i
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @t4_global() {
|
||||
define ptx_device i64 @t3_u64(i64* %p, i32 %q) {
|
||||
entry:
|
||||
;CHECK: ld.global.s32 r0, [array];
|
||||
%i = getelementptr [10 x i32]* @array, i32 0, i32 0
|
||||
;CHECK: shl.b32 r0, r2, 3;
|
||||
;CHECK-NEXT: add.u32 r0, r1, r0;
|
||||
;CHECK-NEXT: ld.global.u64 rd0, [r0];
|
||||
%i = getelementptr i64* %p, i32 %q
|
||||
%x = load i64* %i
|
||||
ret i64 %x
|
||||
}
|
||||
|
||||
define ptx_device float @t3_f32(float* %p, i32 %q) {
|
||||
entry:
|
||||
;CHECK: shl.b32 r0, r2, 2;
|
||||
;CHECK-NEXT: add.u32 r0, r1, r0;
|
||||
;CHECK-NEXT: ld.global.f32 f0, [r0];
|
||||
%i = getelementptr float* %p, i32 %q
|
||||
%x = load float* %i
|
||||
ret float %x
|
||||
}
|
||||
|
||||
define ptx_device double @t3_f64(double* %p, i32 %q) {
|
||||
entry:
|
||||
;CHECK: shl.b32 r0, r2, 3;
|
||||
;CHECK-NEXT: add.u32 r0, r1, r0;
|
||||
;CHECK-NEXT: ld.global.f64 fd0, [r0];
|
||||
%i = getelementptr double* %p, i32 %q
|
||||
%x = load double* %i
|
||||
ret double %x
|
||||
}
|
||||
|
||||
define ptx_device i16 @t4_global_u16() {
|
||||
entry:
|
||||
;CHECK: ld.global.u16 rh0, [array_i16];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i16]* @array_i16, i32 0, i32 0
|
||||
%x = load i16* %i
|
||||
ret i16 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @t4_global_u32() {
|
||||
entry:
|
||||
;CHECK: ld.global.u32 r0, [array_i32];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i32]* @array_i32, i32 0, i32 0
|
||||
%x = load i32* %i
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @t4_const() {
|
||||
define ptx_device i64 @t4_global_u64() {
|
||||
entry:
|
||||
;CHECK: ld.const.s32 r0, [array_constant];
|
||||
%i = getelementptr [10 x i32] addrspace(1)* @array_constant, i32 0, i32 0
|
||||
;CHECK: ld.global.u64 rd0, [array_i64];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i64]* @array_i64, i32 0, i32 0
|
||||
%x = load i64* %i
|
||||
ret i64 %x
|
||||
}
|
||||
|
||||
define ptx_device float @t4_global_f32() {
|
||||
entry:
|
||||
;CHECK: ld.global.f32 f0, [array_float];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x float]* @array_float, i32 0, i32 0
|
||||
%x = load float* %i
|
||||
ret float %x
|
||||
}
|
||||
|
||||
define ptx_device double @t4_global_f64() {
|
||||
entry:
|
||||
;CHECK: ld.global.f64 fd0, [array_double];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x double]* @array_double, i32 0, i32 0
|
||||
%x = load double* %i
|
||||
ret double %x
|
||||
}
|
||||
|
||||
define ptx_device i16 @t4_const_u16() {
|
||||
entry:
|
||||
;CHECK: ld.const.u16 rh0, [array_constant_i16];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i16] addrspace(1)* @array_constant_i16, i32 0, i32 0
|
||||
%x = load i16 addrspace(1)* %i
|
||||
ret i16 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @t4_const_u32() {
|
||||
entry:
|
||||
;CHECK: ld.const.u32 r0, [array_constant_i32];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i32] addrspace(1)* @array_constant_i32, i32 0, i32 0
|
||||
%x = load i32 addrspace(1)* %i
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @t4_local() {
|
||||
define ptx_device i64 @t4_const_u64() {
|
||||
entry:
|
||||
;CHECK: ld.local.s32 r0, [array_local];
|
||||
%i = getelementptr [10 x i32] addrspace(2)* @array_local, i32 0, i32 0
|
||||
;CHECK: ld.const.u64 rd0, [array_constant_i64];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i64] addrspace(1)* @array_constant_i64, i32 0, i32 0
|
||||
%x = load i64 addrspace(1)* %i
|
||||
ret i64 %x
|
||||
}
|
||||
|
||||
define ptx_device float @t4_const_f32() {
|
||||
entry:
|
||||
;CHECK: ld.const.f32 f0, [array_constant_float];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x float] addrspace(1)* @array_constant_float, i32 0, i32 0
|
||||
%x = load float addrspace(1)* %i
|
||||
ret float %x
|
||||
}
|
||||
|
||||
define ptx_device double @t4_const_f64() {
|
||||
entry:
|
||||
;CHECK: ld.const.f64 fd0, [array_constant_double];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x double] addrspace(1)* @array_constant_double, i32 0, i32 0
|
||||
%x = load double addrspace(1)* %i
|
||||
ret double %x
|
||||
}
|
||||
|
||||
define ptx_device i16 @t4_local_u16() {
|
||||
entry:
|
||||
;CHECK: ld.local.u16 rh0, [array_local_i16];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i16] addrspace(2)* @array_local_i16, i32 0, i32 0
|
||||
%x = load i16 addrspace(2)* %i
|
||||
ret i16 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @t4_local_u32() {
|
||||
entry:
|
||||
;CHECK: ld.local.u32 r0, [array_local_i32];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i32] addrspace(2)* @array_local_i32, i32 0, i32 0
|
||||
%x = load i32 addrspace(2)* %i
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @t4_shared() {
|
||||
define ptx_device i64 @t4_local_u64() {
|
||||
entry:
|
||||
;CHECK: ld.shared.s32 r0, [array_shared];
|
||||
%i = getelementptr [10 x i32] addrspace(4)* @array_shared, i32 0, i32 0
|
||||
;CHECK: ld.local.u64 rd0, [array_local_i64];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i64] addrspace(2)* @array_local_i64, i32 0, i32 0
|
||||
%x = load i64 addrspace(2)* %i
|
||||
ret i64 %x
|
||||
}
|
||||
|
||||
define ptx_device float @t4_local_f32() {
|
||||
entry:
|
||||
;CHECK: ld.local.f32 f0, [array_local_float];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x float] addrspace(2)* @array_local_float, i32 0, i32 0
|
||||
%x = load float addrspace(2)* %i
|
||||
ret float %x
|
||||
}
|
||||
|
||||
define ptx_device double @t4_local_f64() {
|
||||
entry:
|
||||
;CHECK: ld.local.f64 fd0, [array_local_double];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x double] addrspace(2)* @array_local_double, i32 0, i32 0
|
||||
%x = load double addrspace(2)* %i
|
||||
ret double %x
|
||||
}
|
||||
|
||||
define ptx_device i16 @t4_shared_u16() {
|
||||
entry:
|
||||
;CHECK: ld.shared.u16 rh0, [array_shared_i16];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i16] addrspace(4)* @array_shared_i16, i32 0, i32 0
|
||||
%x = load i16 addrspace(4)* %i
|
||||
ret i16 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @t4_shared_u32() {
|
||||
entry:
|
||||
;CHECK: ld.shared.u32 r0, [array_shared_i32];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i32] addrspace(4)* @array_shared_i32, i32 0, i32 0
|
||||
%x = load i32 addrspace(4)* %i
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @t5() {
|
||||
define ptx_device i64 @t4_shared_u64() {
|
||||
entry:
|
||||
;CHECK: ld.global.s32 r0, [array+4];
|
||||
%i = getelementptr [10 x i32]* @array, i32 0, i32 1
|
||||
;CHECK: ld.shared.u64 rd0, [array_shared_i64];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i64] addrspace(4)* @array_shared_i64, i32 0, i32 0
|
||||
%x = load i64 addrspace(4)* %i
|
||||
ret i64 %x
|
||||
}
|
||||
|
||||
define ptx_device float @t4_shared_f32() {
|
||||
entry:
|
||||
;CHECK: ld.shared.f32 f0, [array_shared_float];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x float] addrspace(4)* @array_shared_float, i32 0, i32 0
|
||||
%x = load float addrspace(4)* %i
|
||||
ret float %x
|
||||
}
|
||||
|
||||
define ptx_device double @t4_shared_f64() {
|
||||
entry:
|
||||
;CHECK: ld.shared.f64 fd0, [array_shared_double];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x double] addrspace(4)* @array_shared_double, i32 0, i32 0
|
||||
%x = load double addrspace(4)* %i
|
||||
ret double %x
|
||||
}
|
||||
|
||||
define ptx_device i16 @t5_u16() {
|
||||
entry:
|
||||
;CHECK: ld.global.u16 rh0, [array_i16+2];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i16]* @array_i16, i32 0, i32 1
|
||||
%x = load i16* %i
|
||||
ret i16 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @t5_u32() {
|
||||
entry:
|
||||
;CHECK: ld.global.u32 r0, [array_i32+4];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i32]* @array_i32, i32 0, i32 1
|
||||
%x = load i32* %i
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i64 @t5_u64() {
|
||||
entry:
|
||||
;CHECK: ld.global.u64 rd0, [array_i64+8];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i64]* @array_i64, i32 0, i32 1
|
||||
%x = load i64* %i
|
||||
ret i64 %x
|
||||
}
|
||||
|
||||
define ptx_device float @t5_f32() {
|
||||
entry:
|
||||
;CHECK: ld.global.f32 f0, [array_float+4];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x float]* @array_float, i32 0, i32 1
|
||||
%x = load float* %i
|
||||
ret float %x
|
||||
}
|
||||
|
||||
define ptx_device double @t5_f64() {
|
||||
entry:
|
||||
;CHECK: ld.global.f64 fd0, [array_double+8];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x double]* @array_double, i32 0, i32 1
|
||||
%x = load double* %i
|
||||
ret double %x
|
||||
}
|
||||
|
@ -1,86 +0,0 @@
|
||||
; RUN: llc < %s -march=ptx | FileCheck %s
|
||||
|
||||
;CHECK: .extern .global .f32 array[];
|
||||
@array = external global [10 x float]
|
||||
|
||||
;CHECK: .extern .const .f32 array_constant[];
|
||||
@array_constant = external addrspace(1) constant [10 x float]
|
||||
|
||||
;CHECK: .extern .local .f32 array_local[];
|
||||
@array_local = external addrspace(2) global [10 x float]
|
||||
|
||||
;CHECK: .extern .shared .f32 array_shared[];
|
||||
@array_shared = external addrspace(4) global [10 x float]
|
||||
|
||||
define ptx_device float @t1(float* %p) {
|
||||
entry:
|
||||
;CHECK: ld.global.f32 f0, [r1];
|
||||
;CHECK-NEXT: ret;
|
||||
%x = load float* %p
|
||||
ret float %x
|
||||
}
|
||||
|
||||
define ptx_device float @t2(float* %p) {
|
||||
entry:
|
||||
;CHECK: ld.global.f32 f0, [r1+4];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr float* %p, i32 1
|
||||
%x = load float* %i
|
||||
ret float %x
|
||||
}
|
||||
|
||||
define ptx_device float @t3(float* %p, i32 %q) {
|
||||
entry:
|
||||
;CHECK: shl.b32 r0, r2, 2;
|
||||
;CHECK-NEXT: add.s32 r0, r1, r0;
|
||||
;CHECK-NEXT: ld.global.f32 f0, [r0];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr float* %p, i32 %q
|
||||
%x = load float* %i
|
||||
ret float %x
|
||||
}
|
||||
|
||||
define ptx_device float @t4_global() {
|
||||
entry:
|
||||
;CHECK: ld.global.f32 f0, [array];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x float]* @array, i32 0, i32 0
|
||||
%x = load float* %i
|
||||
ret float %x
|
||||
}
|
||||
|
||||
define ptx_device float @t4_const() {
|
||||
entry:
|
||||
;CHECK: ld.const.f32 f0, [array_constant];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x float] addrspace(1)* @array_constant, i32 0, i32 0
|
||||
%x = load float addrspace(1)* %i
|
||||
ret float %x
|
||||
}
|
||||
|
||||
define ptx_device float @t4_local() {
|
||||
entry:
|
||||
;CHECK: ld.local.f32 f0, [array_local];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x float] addrspace(2)* @array_local, i32 0, i32 0
|
||||
%x = load float addrspace(2)* %i
|
||||
ret float %x
|
||||
}
|
||||
|
||||
define ptx_device float @t4_shared() {
|
||||
entry:
|
||||
;CHECK: ld.shared.f32 f0, [array_shared];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x float] addrspace(4)* @array_shared, i32 0, i32 0
|
||||
%x = load float addrspace(4)* %i
|
||||
ret float %x
|
||||
}
|
||||
|
||||
define ptx_device float @t5() {
|
||||
entry:
|
||||
;CHECK: ld.global.f32 f0, [array+4];
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x float]* @array, i32 0, i32 1
|
||||
%x = load float* %i
|
||||
ret float %x
|
||||
}
|
@ -1,25 +1,62 @@
|
||||
; RUN: llc < %s -march=ptx | FileCheck %s
|
||||
|
||||
define ptx_device i32 @t1() {
|
||||
; CHECK: mov.s32 r0, 0;
|
||||
define ptx_device i16 @t1_u16() {
|
||||
; CHECK: mov.u16 rh0, 0;
|
||||
; CHECK: ret;
|
||||
ret i16 0
|
||||
}
|
||||
|
||||
define ptx_device i32 @t1_u32() {
|
||||
; CHECK: mov.u32 r0, 0;
|
||||
; CHECK: ret;
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
define ptx_device i32 @t2(i32 %x) {
|
||||
; CHECK: mov.s32 r0, r1;
|
||||
define ptx_device i64 @t1_u64() {
|
||||
; CHECK: mov.u64 rd0, 0;
|
||||
; CHECK: ret;
|
||||
ret i64 0
|
||||
}
|
||||
|
||||
define ptx_device float @t1_f32() {
|
||||
; CHECK: mov.f32 f0, 0F00000000;
|
||||
; CHECK: ret;
|
||||
ret float 0.0
|
||||
}
|
||||
|
||||
define ptx_device double @t1_f64() {
|
||||
; CHECK: mov.f64 fd0, 0D0000000000000000;
|
||||
; CHECK: ret;
|
||||
ret double 0.0
|
||||
}
|
||||
|
||||
define ptx_device i16 @t2_u16(i16 %x) {
|
||||
; CHECK: mov.u16 rh0, rh1;
|
||||
; CHECK: ret;
|
||||
ret i16 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @t2_u32(i32 %x) {
|
||||
; CHECK: mov.u32 r0, r1;
|
||||
; CHECK: ret;
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device float @t3() {
|
||||
; CHECK: mov.f32 f0, 0F00000000;
|
||||
; CHECK-NEXT: ret;
|
||||
ret float 0.0
|
||||
define ptx_device i64 @t2_u64(i64 %x) {
|
||||
; CHECK: mov.u64 rd0, rd1;
|
||||
; CHECK: ret;
|
||||
ret i64 %x
|
||||
}
|
||||
|
||||
define ptx_device float @t4(float %x) {
|
||||
define ptx_device float @t3_f32(float %x) {
|
||||
; CHECK: mov.f32 f0, f1;
|
||||
; CHECK-NEXT: ret;
|
||||
ret float %x
|
||||
}
|
||||
|
||||
define ptx_device double @t3_f64(double %x) {
|
||||
; CHECK: mov.f64 fd0, fd1;
|
||||
; CHECK-NEXT: ret;
|
||||
ret double %x
|
||||
}
|
||||
|
||||
|
@ -10,16 +10,30 @@
|
||||
; ret i32 %z
|
||||
;}
|
||||
|
||||
define ptx_device float @t3(float %x, float %y) {
|
||||
define ptx_device float @t1_f32(float %x, float %y) {
|
||||
; CHECK: mul.f32 f0, f1, f2
|
||||
; CHECK-NEXT: ret;
|
||||
%z = fmul float %x, %y
|
||||
ret float %z
|
||||
}
|
||||
|
||||
define ptx_device float @t4(float %x) {
|
||||
define ptx_device double @t1_f64(double %x, double %y) {
|
||||
; CHECK: mul.f64 fd0, fd1, fd2
|
||||
; CHECK-NEXT: ret;
|
||||
%z = fmul double %x, %y
|
||||
ret double %z
|
||||
}
|
||||
|
||||
define ptx_device float @t2_f32(float %x) {
|
||||
; CHECK: mul.f32 f0, f1, 0F40A00000;
|
||||
; CHECK-NEXT: ret;
|
||||
%z = fmul float %x, 5.0
|
||||
ret float %z
|
||||
}
|
||||
|
||||
define ptx_device double @t2_f64(double %x) {
|
||||
; CHECK: mul.f64 fd0, fd1, 0D4014000000000000;
|
||||
; CHECK-NEXT: ret;
|
||||
%z = fmul double %x, 5.0
|
||||
ret double %z
|
||||
}
|
||||
|
@ -1,5 +1,8 @@
|
||||
; RUN: llc < %s -march=ptx -ptx-version=2.0 | grep ".version 2.0"
|
||||
; RUN: llc < %s -march=ptx -ptx-target=sm_20 | grep ".target sm_20"
|
||||
; RUN: llc < %s -march=ptx -mattr=ptx14 | grep ".version 1.4"
|
||||
; RUN: llc < %s -march=ptx -mattr=ptx20 | grep ".version 2.0"
|
||||
; RUN: llc < %s -march=ptx -mattr=ptx21 | grep ".version 2.1"
|
||||
; RUN: llc < %s -march=ptx -mattr=sm20 | grep ".target sm_20"
|
||||
; RUN: llc < %s -march=ptx -mattr=sm13 | grep ".target sm_13"
|
||||
|
||||
define ptx_device void @t1() {
|
||||
ret void
|
||||
|
@ -1,71 +1,382 @@
|
||||
; RUN: llc < %s -march=ptx | FileCheck %s
|
||||
|
||||
;CHECK: .extern .global .s32 array[];
|
||||
@array = external global [10 x i32]
|
||||
;CHECK: .extern .global .u16 array_i16[];
|
||||
@array_i16 = external global [10 x i16]
|
||||
|
||||
;CHECK: .extern .const .s32 array_constant[];
|
||||
@array_constant = external addrspace(1) constant [10 x i32]
|
||||
;CHECK: .extern .const .u16 array_constant_i16[];
|
||||
@array_constant_i16 = external addrspace(1) constant [10 x i16]
|
||||
|
||||
;CHECK: .extern .local .s32 array_local[];
|
||||
@array_local = external addrspace(2) global [10 x i32]
|
||||
;CHECK: .extern .local .u16 array_local_i16[];
|
||||
@array_local_i16 = external addrspace(2) global [10 x i16]
|
||||
|
||||
;CHECK: .extern .shared .s32 array_shared[];
|
||||
@array_shared = external addrspace(4) global [10 x i32]
|
||||
;CHECK: .extern .shared .u16 array_shared_i16[];
|
||||
@array_shared_i16 = external addrspace(4) global [10 x i16]
|
||||
|
||||
define ptx_device void @t1(i32* %p, i32 %x) {
|
||||
;CHECK: .extern .global .u32 array_i32[];
|
||||
@array_i32 = external global [10 x i32]
|
||||
|
||||
;CHECK: .extern .const .u32 array_constant_i32[];
|
||||
@array_constant_i32 = external addrspace(1) constant [10 x i32]
|
||||
|
||||
;CHECK: .extern .local .u32 array_local_i32[];
|
||||
@array_local_i32 = external addrspace(2) global [10 x i32]
|
||||
|
||||
;CHECK: .extern .shared .u32 array_shared_i32[];
|
||||
@array_shared_i32 = external addrspace(4) global [10 x i32]
|
||||
|
||||
;CHECK: .extern .global .u64 array_i64[];
|
||||
@array_i64 = external global [10 x i64]
|
||||
|
||||
;CHECK: .extern .const .u64 array_constant_i64[];
|
||||
@array_constant_i64 = external addrspace(1) constant [10 x i64]
|
||||
|
||||
;CHECK: .extern .local .u64 array_local_i64[];
|
||||
@array_local_i64 = external addrspace(2) global [10 x i64]
|
||||
|
||||
;CHECK: .extern .shared .u64 array_shared_i64[];
|
||||
@array_shared_i64 = external addrspace(4) global [10 x i64]
|
||||
|
||||
;CHECK: .extern .global .f32 array_float[];
|
||||
@array_float = external global [10 x float]
|
||||
|
||||
;CHECK: .extern .const .f32 array_constant_float[];
|
||||
@array_constant_float = external addrspace(1) constant [10 x float]
|
||||
|
||||
;CHECK: .extern .local .f32 array_local_float[];
|
||||
@array_local_float = external addrspace(2) global [10 x float]
|
||||
|
||||
;CHECK: .extern .shared .f32 array_shared_float[];
|
||||
@array_shared_float = external addrspace(4) global [10 x float]
|
||||
|
||||
;CHECK: .extern .global .f64 array_double[];
|
||||
@array_double = external global [10 x double]
|
||||
|
||||
;CHECK: .extern .const .f64 array_constant_double[];
|
||||
@array_constant_double = external addrspace(1) constant [10 x double]
|
||||
|
||||
;CHECK: .extern .local .f64 array_local_double[];
|
||||
@array_local_double = external addrspace(2) global [10 x double]
|
||||
|
||||
;CHECK: .extern .shared .f64 array_shared_double[];
|
||||
@array_shared_double = external addrspace(4) global [10 x double]
|
||||
|
||||
|
||||
define ptx_device void @t1_u16(i16* %p, i16 %x) {
|
||||
entry:
|
||||
;CHECK: st.global.s32 [r1], r2;
|
||||
;CHECK: st.global.u16 [r1], rh1;
|
||||
;CHECK-NEXT: ret;
|
||||
store i16 %x, i16* %p
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t1_u32(i32* %p, i32 %x) {
|
||||
entry:
|
||||
;CHECK: st.global.u32 [r1], r2;
|
||||
;CHECK-NEXT: ret;
|
||||
store i32 %x, i32* %p
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t2(i32* %p, i32 %x) {
|
||||
define ptx_device void @t1_u64(i64* %p, i64 %x) {
|
||||
entry:
|
||||
;CHECK: st.global.s32 [r1+4], r2;
|
||||
;CHECK: st.global.u64 [r1], rd1;
|
||||
;CHECK-NEXT: ret;
|
||||
store i64 %x, i64* %p
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t1_f32(float* %p, float %x) {
|
||||
entry:
|
||||
;CHECK: st.global.f32 [r1], f1;
|
||||
;CHECK-NEXT: ret;
|
||||
store float %x, float* %p
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t1_f64(double* %p, double %x) {
|
||||
entry:
|
||||
;CHECK: st.global.f64 [r1], fd1;
|
||||
;CHECK-NEXT: ret;
|
||||
store double %x, double* %p
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t2_u16(i16* %p, i16 %x) {
|
||||
entry:
|
||||
;CHECK: st.global.u16 [r1+2], rh1;
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr i16* %p, i32 1
|
||||
store i16 %x, i16* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t2_u32(i32* %p, i32 %x) {
|
||||
entry:
|
||||
;CHECK: st.global.u32 [r1+4], r2;
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr i32* %p, i32 1
|
||||
store i32 %x, i32* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t3(i32* %p, i32 %q, i32 %x) {
|
||||
;CHECK: .reg .s32 r0;
|
||||
define ptx_device void @t2_u64(i64* %p, i64 %x) {
|
||||
entry:
|
||||
;CHECK: st.global.u64 [r1+8], rd1;
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr i64* %p, i32 1
|
||||
store i64 %x, i64* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t2_f32(float* %p, float %x) {
|
||||
entry:
|
||||
;CHECK: st.global.f32 [r1+4], f1;
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr float* %p, i32 1
|
||||
store float %x, float* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t2_f64(double* %p, double %x) {
|
||||
entry:
|
||||
;CHECK: st.global.f64 [r1+8], fd1;
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr double* %p, i32 1
|
||||
store double %x, double* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t3_u16(i16* %p, i32 %q, i16 %x) {
|
||||
entry:
|
||||
;CHECK: shl.b32 r0, r2, 1;
|
||||
;CHECK-NEXT: add.u32 r0, r1, r0;
|
||||
;CHECK-NEXT: st.global.u16 [r0], rh1;
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr i16* %p, i32 %q
|
||||
store i16 %x, i16* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t3_u32(i32* %p, i32 %q, i32 %x) {
|
||||
entry:
|
||||
;CHECK: shl.b32 r0, r2, 2;
|
||||
;CHECK: add.s32 r0, r1, r0;
|
||||
;CHECK: st.global.s32 [r0], r3;
|
||||
;CHECK-NEXT: add.u32 r0, r1, r0;
|
||||
;CHECK-NEXT: st.global.u32 [r0], r3;
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr i32* %p, i32 %q
|
||||
store i32 %x, i32* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t4_global(i32 %x) {
|
||||
define ptx_device void @t3_u64(i64* %p, i32 %q, i64 %x) {
|
||||
entry:
|
||||
;CHECK: st.global.s32 [array], r1;
|
||||
%i = getelementptr [10 x i32]* @array, i32 0, i32 0
|
||||
;CHECK: shl.b32 r0, r2, 3;
|
||||
;CHECK-NEXT: add.u32 r0, r1, r0;
|
||||
;CHECK-NEXT: st.global.u64 [r0], rd1;
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr i64* %p, i32 %q
|
||||
store i64 %x, i64* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t3_f32(float* %p, i32 %q, float %x) {
|
||||
entry:
|
||||
;CHECK: shl.b32 r0, r2, 2;
|
||||
;CHECK-NEXT: add.u32 r0, r1, r0;
|
||||
;CHECK-NEXT: st.global.f32 [r0], f1;
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr float* %p, i32 %q
|
||||
store float %x, float* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t3_f64(double* %p, i32 %q, double %x) {
|
||||
entry:
|
||||
;CHECK: shl.b32 r0, r2, 3;
|
||||
;CHECK-NEXT: add.u32 r0, r1, r0;
|
||||
;CHECK-NEXT: st.global.f64 [r0], fd1;
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr double* %p, i32 %q
|
||||
store double %x, double* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t4_global_u16(i16 %x) {
|
||||
entry:
|
||||
;CHECK: st.global.u16 [array_i16], rh1;
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i16]* @array_i16, i16 0, i16 0
|
||||
store i16 %x, i16* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t4_global_u32(i32 %x) {
|
||||
entry:
|
||||
;CHECK: st.global.u32 [array_i32], r1;
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i32]* @array_i32, i32 0, i32 0
|
||||
store i32 %x, i32* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t4_local(i32 %x) {
|
||||
define ptx_device void @t4_global_u64(i64 %x) {
|
||||
entry:
|
||||
;CHECK: st.local.s32 [array_local], r1;
|
||||
%i = getelementptr [10 x i32] addrspace(2)* @array_local, i32 0, i32 0
|
||||
;CHECK: st.global.u64 [array_i64], rd1;
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i64]* @array_i64, i32 0, i32 0
|
||||
store i64 %x, i64* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t4_global_f32(float %x) {
|
||||
entry:
|
||||
;CHECK: st.global.f32 [array_float], f1;
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x float]* @array_float, i32 0, i32 0
|
||||
store float %x, float* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t4_global_f64(double %x) {
|
||||
entry:
|
||||
;CHECK: st.global.f64 [array_double], fd1;
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x double]* @array_double, i32 0, i32 0
|
||||
store double %x, double* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t4_local_u16(i16 %x) {
|
||||
entry:
|
||||
;CHECK: st.local.u16 [array_local_i16], rh1;
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i16] addrspace(2)* @array_local_i16, i32 0, i32 0
|
||||
store i16 %x, i16 addrspace(2)* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t4_local_u32(i32 %x) {
|
||||
entry:
|
||||
;CHECK: st.local.u32 [array_local_i32], r1;
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i32] addrspace(2)* @array_local_i32, i32 0, i32 0
|
||||
store i32 %x, i32 addrspace(2)* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t4_shared(i32 %x) {
|
||||
define ptx_device void @t4_local_u64(i64 %x) {
|
||||
entry:
|
||||
;CHECK: st.shared.s32 [array_shared], r1;
|
||||
%i = getelementptr [10 x i32] addrspace(4)* @array_shared, i32 0, i32 0
|
||||
;CHECK: st.local.u64 [array_local_i64], rd1;
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i64] addrspace(2)* @array_local_i64, i32 0, i32 0
|
||||
store i64 %x, i64 addrspace(2)* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t4_local_f32(float %x) {
|
||||
entry:
|
||||
;CHECK: st.local.f32 [array_local_float], f1;
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x float] addrspace(2)* @array_local_float, i32 0, i32 0
|
||||
store float %x, float addrspace(2)* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t4_local_f64(double %x) {
|
||||
entry:
|
||||
;CHECK: st.local.f64 [array_local_double], fd1;
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x double] addrspace(2)* @array_local_double, i32 0, i32 0
|
||||
store double %x, double addrspace(2)* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t4_shared_u16(i16 %x) {
|
||||
entry:
|
||||
;CHECK: st.shared.u16 [array_shared_i16], rh1;
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i16] addrspace(4)* @array_shared_i16, i32 0, i32 0
|
||||
store i16 %x, i16 addrspace(4)* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t4_shared_u32(i32 %x) {
|
||||
entry:
|
||||
;CHECK: st.shared.u32 [array_shared_i32], r1;
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i32] addrspace(4)* @array_shared_i32, i32 0, i32 0
|
||||
store i32 %x, i32 addrspace(4)* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t5(i32 %x) {
|
||||
define ptx_device void @t4_shared_u64(i64 %x) {
|
||||
entry:
|
||||
;CHECK: st.global.s32 [array+4], r1;
|
||||
%i = getelementptr [10 x i32]* @array, i32 0, i32 1
|
||||
;CHECK: st.shared.u64 [array_shared_i64], rd1;
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i64] addrspace(4)* @array_shared_i64, i32 0, i32 0
|
||||
store i64 %x, i64 addrspace(4)* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t4_shared_f32(float %x) {
|
||||
entry:
|
||||
;CHECK: st.shared.f32 [array_shared_float], f1;
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x float] addrspace(4)* @array_shared_float, i32 0, i32 0
|
||||
store float %x, float addrspace(4)* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t4_shared_f64(double %x) {
|
||||
entry:
|
||||
;CHECK: st.shared.f64 [array_shared_double], fd1;
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x double] addrspace(4)* @array_shared_double, i32 0, i32 0
|
||||
store double %x, double addrspace(4)* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t5_u16(i16 %x) {
|
||||
entry:
|
||||
;CHECK: st.global.u16 [array_i16+2], rh1;
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i16]* @array_i16, i32 0, i32 1
|
||||
store i16 %x, i16* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t5_u32(i32 %x) {
|
||||
entry:
|
||||
;CHECK: st.global.u32 [array_i32+4], r1;
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i32]* @array_i32, i32 0, i32 1
|
||||
store i32 %x, i32* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t5_u64(i64 %x) {
|
||||
entry:
|
||||
;CHECK: st.global.u64 [array_i64+8], rd1;
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x i64]* @array_i64, i32 0, i32 1
|
||||
store i64 %x, i64* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t5_f32(float %x) {
|
||||
entry:
|
||||
;CHECK: st.global.f32 [array_float+4], f1;
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x float]* @array_float, i32 0, i32 1
|
||||
store float %x, float* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t5_f64(double %x) {
|
||||
entry:
|
||||
;CHECK: st.global.f64 [array_double+8], fd1;
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x double]* @array_double, i32 0, i32 1
|
||||
store double %x, double* %i
|
||||
ret void
|
||||
}
|
||||
|
@ -1,78 +0,0 @@
|
||||
; RUN: llc < %s -march=ptx | FileCheck %s
|
||||
|
||||
;CHECK: .extern .global .f32 array[];
|
||||
@array = external global [10 x float]
|
||||
|
||||
;CHECK: .extern .const .f32 array_constant[];
|
||||
@array_constant = external addrspace(1) constant [10 x float]
|
||||
|
||||
;CHECK: .extern .local .f32 array_local[];
|
||||
@array_local = external addrspace(2) global [10 x float]
|
||||
|
||||
;CHECK: .extern .shared .f32 array_shared[];
|
||||
@array_shared = external addrspace(4) global [10 x float]
|
||||
|
||||
define ptx_device void @t1(float* %p, float %x) {
|
||||
entry:
|
||||
;CHECK: st.global.f32 [r1], f1;
|
||||
;CHECK-NEXT: ret;
|
||||
store float %x, float* %p
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t2(float* %p, float %x) {
|
||||
entry:
|
||||
;CHECK: st.global.f32 [r1+4], f1;
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr float* %p, i32 1
|
||||
store float %x, float* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t3(float* %p, i32 %q, float %x) {
|
||||
;CHECK: .reg .s32 r0;
|
||||
entry:
|
||||
;CHECK: shl.b32 r0, r2, 2;
|
||||
;CHECK-NEXT: add.s32 r0, r1, r0;
|
||||
;CHECK-NEXT: st.global.f32 [r0], f1;
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr float* %p, i32 %q
|
||||
store float %x, float* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t4_global(float %x) {
|
||||
entry:
|
||||
;CHECK: st.global.f32 [array], f1;
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x float]* @array, i32 0, i32 0
|
||||
store float %x, float* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t4_local(float %x) {
|
||||
entry:
|
||||
;CHECK: st.local.f32 [array_local], f1;
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x float] addrspace(2)* @array_local, i32 0, i32 0
|
||||
store float %x, float addrspace(2)* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t4_shared(float %x) {
|
||||
entry:
|
||||
;CHECK: st.shared.f32 [array_shared], f1;
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x float] addrspace(4)* @array_shared, i32 0, i32 0
|
||||
store float %x, float addrspace(4)* %i
|
||||
ret void
|
||||
}
|
||||
|
||||
define ptx_device void @t5(float %x) {
|
||||
entry:
|
||||
;CHECK: st.global.f32 [array+4], f1;
|
||||
;CHECK-NEXT: ret;
|
||||
%i = getelementptr [10 x float]* @array, i32 0, i32 1
|
||||
store float %x, float* %i
|
||||
ret void
|
||||
}
|
@ -1,29 +1,71 @@
|
||||
; RUN: llc < %s -march=ptx | FileCheck %s
|
||||
|
||||
define ptx_device i32 @t1(i32 %x, i32 %y) {
|
||||
;CHECK: sub.s32 r0, r1, r2;
|
||||
define ptx_device i16 @t1_u16(i16 %x, i16 %y) {
|
||||
; CHECK: sub.u16 rh0, rh1, rh2;
|
||||
; CHECK-NEXT: ret;
|
||||
%z = sub i16 %x, %y
|
||||
ret i16 %z
|
||||
}
|
||||
|
||||
define ptx_device i32 @t1_u32(i32 %x, i32 %y) {
|
||||
; CHECK: sub.u32 r0, r1, r2;
|
||||
; CHECK-NEXT: ret;
|
||||
%z = sub i32 %x, %y
|
||||
;CHECK: ret;
|
||||
ret i32 %z
|
||||
}
|
||||
|
||||
define ptx_device i32 @t2(i32 %x) {
|
||||
;CHECK: add.s32 r0, r1, -1;
|
||||
%z = sub i32 %x, 1
|
||||
;CHECK: ret;
|
||||
ret i32 %z
|
||||
define ptx_device i64 @t1_u64(i64 %x, i64 %y) {
|
||||
; CHECK: sub.u64 rd0, rd1, rd2;
|
||||
; CHECK-NEXT: ret;
|
||||
%z = sub i64 %x, %y
|
||||
ret i64 %z
|
||||
}
|
||||
|
||||
define ptx_device float @t3(float %x, float %y) {
|
||||
define ptx_device float @t1_f32(float %x, float %y) {
|
||||
; CHECK: sub.f32 f0, f1, f2
|
||||
; CHECK-NEXT: ret;
|
||||
%z = fsub float %x, %y
|
||||
ret float %z
|
||||
}
|
||||
|
||||
define ptx_device float @t4(float %x) {
|
||||
define ptx_device double @t1_f64(double %x, double %y) {
|
||||
; CHECK: sub.f64 fd0, fd1, fd2
|
||||
; CHECK-NEXT: ret;
|
||||
%z = fsub double %x, %y
|
||||
ret double %z
|
||||
}
|
||||
|
||||
define ptx_device i16 @t2_u16(i16 %x) {
|
||||
; CHECK: add.u16 rh0, rh1, -1;
|
||||
; CHECK-NEXT: ret;
|
||||
%z = sub i16 %x, 1
|
||||
ret i16 %z
|
||||
}
|
||||
|
||||
define ptx_device i32 @t2_u32(i32 %x) {
|
||||
; CHECK: add.u32 r0, r1, -1;
|
||||
; CHECK-NEXT: ret;
|
||||
%z = sub i32 %x, 1
|
||||
ret i32 %z
|
||||
}
|
||||
|
||||
define ptx_device i64 @t2_u64(i64 %x) {
|
||||
; CHECK: add.u64 rd0, rd1, -1;
|
||||
; CHECK-NEXT: ret;
|
||||
%z = sub i64 %x, 1
|
||||
ret i64 %z
|
||||
}
|
||||
|
||||
define ptx_device float @t2_f32(float %x) {
|
||||
; CHECK: add.f32 f0, f1, 0FBF800000;
|
||||
; CHECK-NEXT: ret;
|
||||
%z = fsub float %x, 1.0
|
||||
ret float %z
|
||||
}
|
||||
|
||||
define ptx_device double @t2_f64(double %x) {
|
||||
; CHECK: add.f64 fd0, fd1, 0DBFF0000000000000;
|
||||
; CHECK-NEXT: ret;
|
||||
%z = fsub double %x, 1.0
|
||||
ret double %z
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user