mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-17 23:44:43 +00:00
[NVPTX] Run clang-format on all NVPTX sources.
Hopefully this resolves any outstanding style issues and gives us an automated way of ensuring we conform to the style guidelines. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@178415 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
a9f83517fc
commit
3639ce2575
@ -52,25 +52,24 @@ enum PropertyAnnotation {
|
||||
};
|
||||
|
||||
const unsigned AnnotationNameLen = 8; // length of each annotation name
|
||||
const char
|
||||
PropertyAnnotationNames[PROPERTY_LAST + 1][AnnotationNameLen + 1] = {
|
||||
"maxntidx", // PROPERTY_MAXNTID_X
|
||||
"maxntidy", // PROPERTY_MAXNTID_Y
|
||||
"maxntidz", // PROPERTY_MAXNTID_Z
|
||||
"reqntidx", // PROPERTY_REQNTID_X
|
||||
"reqntidy", // PROPERTY_REQNTID_Y
|
||||
"reqntidz", // PROPERTY_REQNTID_Z
|
||||
"minctasm", // PROPERTY_MINNCTAPERSM
|
||||
"texture", // PROPERTY_ISTEXTURE
|
||||
"surface", // PROPERTY_ISSURFACE
|
||||
"sampler", // PROPERTY_ISSAMPLER
|
||||
"rdoimage", // PROPERTY_ISREADONLY_IMAGE_PARAM
|
||||
"wroimage", // PROPERTY_ISWRITEONLY_IMAGE_PARAM
|
||||
"kernel", // PROPERTY_ISKERNEL_FUNCTION
|
||||
"align", // PROPERTY_ALIGN
|
||||
const char PropertyAnnotationNames[PROPERTY_LAST + 1][AnnotationNameLen + 1] = {
|
||||
"maxntidx", // PROPERTY_MAXNTID_X
|
||||
"maxntidy", // PROPERTY_MAXNTID_Y
|
||||
"maxntidz", // PROPERTY_MAXNTID_Z
|
||||
"reqntidx", // PROPERTY_REQNTID_X
|
||||
"reqntidy", // PROPERTY_REQNTID_Y
|
||||
"reqntidz", // PROPERTY_REQNTID_Z
|
||||
"minctasm", // PROPERTY_MINNCTAPERSM
|
||||
"texture", // PROPERTY_ISTEXTURE
|
||||
"surface", // PROPERTY_ISSURFACE
|
||||
"sampler", // PROPERTY_ISSAMPLER
|
||||
"rdoimage", // PROPERTY_ISREADONLY_IMAGE_PARAM
|
||||
"wroimage", // PROPERTY_ISWRITEONLY_IMAGE_PARAM
|
||||
"kernel", // PROPERTY_ISKERNEL_FUNCTION
|
||||
"align", // PROPERTY_ALIGN
|
||||
|
||||
// last property
|
||||
"proplast", // PROPERTY_LAST
|
||||
// last property
|
||||
"proplast", // PROPERTY_LAST
|
||||
};
|
||||
|
||||
// name of named metadata used for global annotations
|
||||
@ -80,9 +79,8 @@ PropertyAnnotationNames[PROPERTY_LAST + 1][AnnotationNameLen + 1] = {
|
||||
// compiling those .cpp files, hence __attribute__((unused)).
|
||||
__attribute__((unused))
|
||||
#endif
|
||||
static const char* NamedMDForAnnotations = "nvvm.annotations";
|
||||
static const char *NamedMDForAnnotations = "nvvm.annotations";
|
||||
|
||||
}
|
||||
|
||||
|
||||
#endif
|
||||
|
@ -23,10 +23,9 @@ bool CompileForDebugging;
|
||||
// compile for debugging
|
||||
static cl::opt<bool, true>
|
||||
Debug("debug-compile", cl::desc("Compile for debugging"), cl::Hidden,
|
||||
cl::location(CompileForDebugging),
|
||||
cl::init(false));
|
||||
cl::location(CompileForDebugging), cl::init(false));
|
||||
|
||||
void NVPTXMCAsmInfo::anchor() { }
|
||||
void NVPTXMCAsmInfo::anchor() {}
|
||||
|
||||
NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Target &T, const StringRef &TT) {
|
||||
Triple TheTriple(TT);
|
||||
@ -55,7 +54,7 @@ NVPTXMCAsmInfo::NVPTXMCAsmInfo(const Target &T, const StringRef &TT) {
|
||||
Data32bitsDirective = " .b32 ";
|
||||
Data64bitsDirective = " .b64 ";
|
||||
PrivateGlobalPrefix = "";
|
||||
ZeroDirective = " .b8";
|
||||
ZeroDirective = " .b8";
|
||||
AsciiDirective = " .b8";
|
||||
AscizDirective = " .b8";
|
||||
|
||||
|
@ -28,7 +28,6 @@
|
||||
#define GET_REGINFO_MC_DESC
|
||||
#include "NVPTXGenRegisterInfo.inc"
|
||||
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
static MCInstrInfo *createNVPTXMCInstrInfo() {
|
||||
@ -44,22 +43,20 @@ static MCRegisterInfo *createNVPTXMCRegisterInfo(StringRef TT) {
|
||||
return X;
|
||||
}
|
||||
|
||||
static MCSubtargetInfo *createNVPTXMCSubtargetInfo(StringRef TT, StringRef CPU,
|
||||
StringRef FS) {
|
||||
static MCSubtargetInfo *
|
||||
createNVPTXMCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS) {
|
||||
MCSubtargetInfo *X = new MCSubtargetInfo();
|
||||
InitNVPTXMCSubtargetInfo(X, TT, CPU, FS);
|
||||
return X;
|
||||
}
|
||||
|
||||
static MCCodeGenInfo *createNVPTXMCCodeGenInfo(StringRef TT, Reloc::Model RM,
|
||||
CodeModel::Model CM,
|
||||
CodeGenOpt::Level OL) {
|
||||
static MCCodeGenInfo *createNVPTXMCCodeGenInfo(
|
||||
StringRef TT, Reloc::Model RM, CodeModel::Model CM, CodeGenOpt::Level OL) {
|
||||
MCCodeGenInfo *X = new MCCodeGenInfo();
|
||||
X->InitMCCodeGenInfo(RM, CM, OL);
|
||||
return X;
|
||||
}
|
||||
|
||||
|
||||
// Force static initialization.
|
||||
extern "C" void LLVMInitializeNVPTXTargetMC() {
|
||||
// Register the MC asm info.
|
||||
|
@ -12,7 +12,6 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
||||
#ifndef LLVM_SUPPORT_MANAGED_STRING_H
|
||||
#define LLVM_SUPPORT_MANAGED_STRING_H
|
||||
|
||||
|
@ -41,18 +41,24 @@ enum CondCodes {
|
||||
|
||||
inline static const char *NVPTXCondCodeToString(NVPTXCC::CondCodes CC) {
|
||||
switch (CC) {
|
||||
case NVPTXCC::NE: return "ne";
|
||||
case NVPTXCC::EQ: return "eq";
|
||||
case NVPTXCC::LT: return "lt";
|
||||
case NVPTXCC::LE: return "le";
|
||||
case NVPTXCC::GT: return "gt";
|
||||
case NVPTXCC::GE: return "ge";
|
||||
case NVPTXCC::NE:
|
||||
return "ne";
|
||||
case NVPTXCC::EQ:
|
||||
return "eq";
|
||||
case NVPTXCC::LT:
|
||||
return "lt";
|
||||
case NVPTXCC::LE:
|
||||
return "le";
|
||||
case NVPTXCC::GT:
|
||||
return "gt";
|
||||
case NVPTXCC::GE:
|
||||
return "ge";
|
||||
}
|
||||
llvm_unreachable("Unknown condition code");
|
||||
}
|
||||
|
||||
FunctionPass *createNVPTXISelDag(NVPTXTargetMachine &TM,
|
||||
llvm::CodeGenOpt::Level OptLevel);
|
||||
FunctionPass *
|
||||
createNVPTXISelDag(NVPTXTargetMachine &TM, llvm::CodeGenOpt::Level OptLevel);
|
||||
FunctionPass *createLowerStructArgsPass(NVPTXTargetMachine &);
|
||||
FunctionPass *createNVPTXReMatPass(NVPTXTargetMachine &);
|
||||
FunctionPass *createNVPTXReMatBlockPass(NVPTXTargetMachine &);
|
||||
@ -62,8 +68,7 @@ bool isImageOrSamplerVal(const Value *, const Module *);
|
||||
extern Target TheNVPTXTarget32;
|
||||
extern Target TheNVPTXTarget64;
|
||||
|
||||
namespace NVPTX
|
||||
{
|
||||
namespace NVPTX {
|
||||
enum DrvInterface {
|
||||
NVCL,
|
||||
CUDA,
|
||||
@ -102,7 +107,7 @@ enum LoadStore {
|
||||
};
|
||||
|
||||
namespace PTXLdStInstCode {
|
||||
enum AddressSpace{
|
||||
enum AddressSpace {
|
||||
GENERIC = 0,
|
||||
GLOBAL = 1,
|
||||
CONSTANT = 2,
|
||||
|
@ -19,9 +19,9 @@
|
||||
namespace llvm {
|
||||
|
||||
bool NVPTXAllocaHoisting::runOnFunction(Function &function) {
|
||||
bool functionModified = false;
|
||||
Function::iterator I = function.begin();
|
||||
TerminatorInst *firstTerminatorInst = (I++)->getTerminator();
|
||||
bool functionModified = false;
|
||||
Function::iterator I = function.begin();
|
||||
TerminatorInst *firstTerminatorInst = (I++)->getTerminator();
|
||||
|
||||
for (Function::iterator E = function.end(); I != E; ++I) {
|
||||
for (BasicBlock::iterator BI = I->begin(), BE = I->end(); BI != BE;) {
|
||||
@ -37,12 +37,10 @@ bool NVPTXAllocaHoisting::runOnFunction(Function &function) {
|
||||
}
|
||||
|
||||
char NVPTXAllocaHoisting::ID = 1;
|
||||
RegisterPass<NVPTXAllocaHoisting> X("alloca-hoisting",
|
||||
"Hoisting alloca instructions in non-entry "
|
||||
"blocks to the entry block");
|
||||
RegisterPass<NVPTXAllocaHoisting>
|
||||
X("alloca-hoisting", "Hoisting alloca instructions in non-entry "
|
||||
"blocks to the entry block");
|
||||
|
||||
FunctionPass *createAllocaHoisting() {
|
||||
return new NVPTXAllocaHoisting();
|
||||
}
|
||||
FunctionPass *createAllocaHoisting() { return new NVPTXAllocaHoisting(); }
|
||||
|
||||
} // end namespace llvm
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -43,15 +43,15 @@
|
||||
// This is defined in AsmPrinter.cpp.
|
||||
// Used to process the constant expressions in initializers.
|
||||
namespace nvptx {
|
||||
const llvm::MCExpr *LowerConstant(const llvm::Constant *CV,
|
||||
llvm::AsmPrinter &AP) ;
|
||||
const llvm::MCExpr *
|
||||
LowerConstant(const llvm::Constant *CV, llvm::AsmPrinter &AP);
|
||||
}
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class LineReader {
|
||||
private:
|
||||
unsigned theCurLine ;
|
||||
unsigned theCurLine;
|
||||
std::ifstream fstr;
|
||||
char buff[512];
|
||||
std::string theFileName;
|
||||
@ -63,17 +63,12 @@ public:
|
||||
theFileName = filename;
|
||||
}
|
||||
std::string fileName() { return theFileName; }
|
||||
~LineReader() {
|
||||
fstr.close();
|
||||
}
|
||||
~LineReader() { fstr.close(); }
|
||||
std::string readLine(unsigned line);
|
||||
};
|
||||
|
||||
|
||||
|
||||
class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
|
||||
|
||||
|
||||
class AggBuffer {
|
||||
// Used to buffer the emitted string for initializing global
|
||||
// aggregates.
|
||||
@ -92,7 +87,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
|
||||
// Once we have this AggBuffer setup, we can choose how to print
|
||||
// it out.
|
||||
public:
|
||||
unsigned size; // size of the buffer in bytes
|
||||
unsigned size; // size of the buffer in bytes
|
||||
unsigned char *buffer; // the buffer
|
||||
unsigned numSymbols; // number of symbol addresses
|
||||
SmallVector<unsigned, 4> symbolPosInBuffer;
|
||||
@ -105,33 +100,31 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
|
||||
|
||||
public:
|
||||
AggBuffer(unsigned _size, raw_ostream &_O, NVPTXAsmPrinter &_AP)
|
||||
:O(_O),AP(_AP) {
|
||||
: O(_O), AP(_AP) {
|
||||
buffer = new unsigned char[_size];
|
||||
size = _size;
|
||||
curpos = 0;
|
||||
numSymbols = 0;
|
||||
}
|
||||
~AggBuffer() {
|
||||
delete [] buffer;
|
||||
}
|
||||
~AggBuffer() { delete[] buffer; }
|
||||
unsigned addBytes(unsigned char *Ptr, int Num, int Bytes) {
|
||||
assert((curpos+Num) <= size);
|
||||
assert((curpos+Bytes) <= size);
|
||||
for ( int i= 0; i < Num; ++i) {
|
||||
assert((curpos + Num) <= size);
|
||||
assert((curpos + Bytes) <= size);
|
||||
for (int i = 0; i < Num; ++i) {
|
||||
buffer[curpos] = Ptr[i];
|
||||
curpos ++;
|
||||
curpos++;
|
||||
}
|
||||
for ( int i=Num; i < Bytes ; ++i) {
|
||||
for (int i = Num; i < Bytes; ++i) {
|
||||
buffer[curpos] = 0;
|
||||
curpos ++;
|
||||
curpos++;
|
||||
}
|
||||
return curpos;
|
||||
}
|
||||
unsigned addZeros(int Num) {
|
||||
assert((curpos+Num) <= size);
|
||||
for ( int i= 0; i < Num; ++i) {
|
||||
assert((curpos + Num) <= size);
|
||||
for (int i = 0; i < Num; ++i) {
|
||||
buffer[curpos] = 0;
|
||||
curpos ++;
|
||||
curpos++;
|
||||
}
|
||||
return curpos;
|
||||
}
|
||||
@ -143,10 +136,10 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
|
||||
void print() {
|
||||
if (numSymbols == 0) {
|
||||
// print out in bytes
|
||||
for (unsigned i=0; i<size; i++) {
|
||||
for (unsigned i = 0; i < size; i++) {
|
||||
if (i)
|
||||
O << ", ";
|
||||
O << (unsigned int)buffer[i];
|
||||
O << (unsigned int) buffer[i];
|
||||
}
|
||||
} else {
|
||||
// print out in 4-bytes or 8-bytes
|
||||
@ -156,7 +149,7 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
|
||||
unsigned int nBytes = 4;
|
||||
if (AP.nvptxSubtarget.is64Bit())
|
||||
nBytes = 8;
|
||||
for (pos=0; pos<size; pos+=nBytes) {
|
||||
for (pos = 0; pos < size; pos += nBytes) {
|
||||
if (pos)
|
||||
O << ", ";
|
||||
if (pos == nextSymbolPos) {
|
||||
@ -164,22 +157,19 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
|
||||
if (GlobalValue *GVar = dyn_cast<GlobalValue>(v)) {
|
||||
MCSymbol *Name = AP.Mang->getSymbol(GVar);
|
||||
O << *Name;
|
||||
}
|
||||
else if (ConstantExpr *Cexpr =
|
||||
dyn_cast<ConstantExpr>(v)) {
|
||||
} else if (ConstantExpr *Cexpr = dyn_cast<ConstantExpr>(v)) {
|
||||
O << *nvptx::LowerConstant(Cexpr, AP);
|
||||
} else
|
||||
llvm_unreachable("symbol type unknown");
|
||||
nSym++;
|
||||
if (nSym >= numSymbols)
|
||||
nextSymbolPos = size+1;
|
||||
nextSymbolPos = size + 1;
|
||||
else
|
||||
nextSymbolPos = symbolPosInBuffer[nSym];
|
||||
} else
|
||||
if (nBytes == 4)
|
||||
O << *(unsigned int*)(buffer+pos);
|
||||
else
|
||||
O << *(unsigned long long*)(buffer+pos);
|
||||
} else if (nBytes == 4)
|
||||
O << *(unsigned int *)(buffer + pos);
|
||||
else
|
||||
O << *(unsigned long long *)(buffer + pos);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -189,10 +179,8 @@ class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter {
|
||||
|
||||
virtual void emitSrcInText(StringRef filename, unsigned line);
|
||||
|
||||
private :
|
||||
virtual const char *getPassName() const {
|
||||
return "NVPTX Assembly Printer";
|
||||
}
|
||||
private:
|
||||
virtual const char *getPassName() const { return "NVPTX Assembly Printer"; }
|
||||
|
||||
const Function *F;
|
||||
std::string CurrentFnName;
|
||||
@ -207,31 +195,28 @@ private :
|
||||
|
||||
void printGlobalVariable(const GlobalVariable *GVar);
|
||||
void printOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
|
||||
const char *Modifier=0);
|
||||
const char *Modifier = 0);
|
||||
void printLdStCode(const MachineInstr *MI, int opNum, raw_ostream &O,
|
||||
const char *Modifier=0);
|
||||
void printVecModifiedImmediate(const MachineOperand &MO,
|
||||
const char *Modifier, raw_ostream &O);
|
||||
const char *Modifier = 0);
|
||||
void printVecModifiedImmediate(const MachineOperand &MO, const char *Modifier,
|
||||
raw_ostream &O);
|
||||
void printMemOperand(const MachineInstr *MI, int opNum, raw_ostream &O,
|
||||
const char *Modifier=0);
|
||||
const char *Modifier = 0);
|
||||
void printImplicitDef(const MachineInstr *MI, raw_ostream &O) const;
|
||||
// definition autogenerated.
|
||||
void printInstruction(const MachineInstr *MI, raw_ostream &O);
|
||||
void printModuleLevelGV(GlobalVariable* GVar, raw_ostream &O,
|
||||
bool=false);
|
||||
void printModuleLevelGV(GlobalVariable *GVar, raw_ostream &O, bool = false);
|
||||
void printParamName(int paramIndex, raw_ostream &O);
|
||||
void printParamName(Function::const_arg_iterator I, int paramIndex,
|
||||
raw_ostream &O);
|
||||
void emitHeader(Module &M, raw_ostream &O);
|
||||
void emitKernelFunctionDirectives(const Function& F,
|
||||
raw_ostream &O) const;
|
||||
void emitKernelFunctionDirectives(const Function &F, raw_ostream &O) const;
|
||||
void emitVirtualRegister(unsigned int vr, bool isVec, raw_ostream &O);
|
||||
void emitFunctionExternParamList(const MachineFunction &MF);
|
||||
void emitFunctionParamList(const Function *, raw_ostream &O);
|
||||
void emitFunctionParamList(const MachineFunction &MF, raw_ostream &O);
|
||||
void setAndEmitFunctionVirtualRegisters(const MachineFunction &MF);
|
||||
void emitFunctionTempData(const MachineFunction &MF,
|
||||
unsigned &FrameSize);
|
||||
void emitFunctionTempData(const MachineFunction &MF, unsigned &FrameSize);
|
||||
bool isImageType(const Type *Ty);
|
||||
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
|
||||
unsigned AsmVariant, const char *ExtraCode,
|
||||
@ -269,17 +254,16 @@ private:
|
||||
void recordAndEmitFilenames(Module &);
|
||||
|
||||
void emitPTXGlobalVariable(const GlobalVariable *GVar, raw_ostream &O);
|
||||
void emitPTXAddressSpace(unsigned int AddressSpace,
|
||||
raw_ostream &O) const;
|
||||
std::string getPTXFundamentalTypeStr(const Type *Ty, bool=true) const ;
|
||||
void printScalarConstant(Constant *CPV, raw_ostream &O) ;
|
||||
void printFPConstant(const ConstantFP *Fp, raw_ostream &O) ;
|
||||
void bufferLEByte(Constant *CPV, int Bytes, AggBuffer *aggBuffer) ;
|
||||
void bufferAggregateConstant(Constant *CV, AggBuffer *aggBuffer) ;
|
||||
void emitPTXAddressSpace(unsigned int AddressSpace, raw_ostream &O) const;
|
||||
std::string getPTXFundamentalTypeStr(const Type *Ty, bool = true) const;
|
||||
void printScalarConstant(Constant *CPV, raw_ostream &O);
|
||||
void printFPConstant(const ConstantFP *Fp, raw_ostream &O);
|
||||
void bufferLEByte(Constant *CPV, int Bytes, AggBuffer *aggBuffer);
|
||||
void bufferAggregateConstant(Constant *CV, AggBuffer *aggBuffer);
|
||||
|
||||
void printOperandProper(const MachineOperand &MO);
|
||||
|
||||
void emitLinkageDirective(const GlobalValue* V, raw_ostream &O);
|
||||
void emitLinkageDirective(const GlobalValue *V, raw_ostream &O);
|
||||
void emitDeclarations(Module &, raw_ostream &O);
|
||||
void emitDeclaration(const Function *, raw_ostream &O);
|
||||
|
||||
@ -289,10 +273,9 @@ private:
|
||||
LineReader *reader;
|
||||
LineReader *getReader(std::string);
|
||||
public:
|
||||
NVPTXAsmPrinter(TargetMachine &TM,
|
||||
MCStreamer &Streamer)
|
||||
: AsmPrinter(TM, Streamer),
|
||||
nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
|
||||
NVPTXAsmPrinter(TargetMachine &TM, MCStreamer &Streamer)
|
||||
: AsmPrinter(TM, Streamer),
|
||||
nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
|
||||
CurrentBankselLabelInBasicBlock = "";
|
||||
VRidGlobal2LocalMap = NULL;
|
||||
reader = NULL;
|
||||
|
@ -25,9 +25,7 @@
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
bool NVPTXFrameLowering::hasFP(const MachineFunction &MF) const {
|
||||
return true;
|
||||
}
|
||||
bool NVPTXFrameLowering::hasFP(const MachineFunction &MF) const { return true; }
|
||||
|
||||
void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const {
|
||||
if (MF.getFrameInfo()->hasStackObjects()) {
|
||||
@ -42,46 +40,39 @@ void NVPTXFrameLowering::emitPrologue(MachineFunction &MF) const {
|
||||
// mov %SPL, %depot;
|
||||
// cvta.local %SP, %SPL;
|
||||
if (is64bit) {
|
||||
MachineInstr *MI = BuildMI(MBB, MBBI, dl,
|
||||
tm.getInstrInfo()->get(NVPTX::cvta_local_yes_64),
|
||||
NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal);
|
||||
BuildMI(MBB, MI, dl,
|
||||
tm.getInstrInfo()->get(NVPTX::IMOV64rr), NVPTX::VRFrameLocal)
|
||||
.addReg(NVPTX::VRDepot);
|
||||
MachineInstr *MI = BuildMI(
|
||||
MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::cvta_local_yes_64),
|
||||
NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal);
|
||||
BuildMI(MBB, MI, dl, tm.getInstrInfo()->get(NVPTX::IMOV64rr),
|
||||
NVPTX::VRFrameLocal).addReg(NVPTX::VRDepot);
|
||||
} else {
|
||||
MachineInstr *MI = BuildMI(MBB, MBBI, dl,
|
||||
tm.getInstrInfo()->get(NVPTX::cvta_local_yes),
|
||||
NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal);
|
||||
BuildMI(MBB, MI, dl,
|
||||
tm.getInstrInfo()->get(NVPTX::IMOV32rr), NVPTX::VRFrameLocal)
|
||||
.addReg(NVPTX::VRDepot);
|
||||
MachineInstr *MI = BuildMI(
|
||||
MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::cvta_local_yes),
|
||||
NVPTX::VRFrame).addReg(NVPTX::VRFrameLocal);
|
||||
BuildMI(MBB, MI, dl, tm.getInstrInfo()->get(NVPTX::IMOV32rr),
|
||||
NVPTX::VRFrameLocal).addReg(NVPTX::VRDepot);
|
||||
}
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
// mov %SP, %depot;
|
||||
if (is64bit)
|
||||
BuildMI(MBB, MBBI, dl,
|
||||
tm.getInstrInfo()->get(NVPTX::IMOV64rr), NVPTX::VRFrame)
|
||||
.addReg(NVPTX::VRDepot);
|
||||
BuildMI(MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::IMOV64rr),
|
||||
NVPTX::VRFrame).addReg(NVPTX::VRDepot);
|
||||
else
|
||||
BuildMI(MBB, MBBI, dl,
|
||||
tm.getInstrInfo()->get(NVPTX::IMOV32rr), NVPTX::VRFrame)
|
||||
.addReg(NVPTX::VRDepot);
|
||||
BuildMI(MBB, MBBI, dl, tm.getInstrInfo()->get(NVPTX::IMOV32rr),
|
||||
NVPTX::VRFrame).addReg(NVPTX::VRDepot);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void NVPTXFrameLowering::emitEpilogue(MachineFunction &MF,
|
||||
MachineBasicBlock &MBB) const {
|
||||
}
|
||||
MachineBasicBlock &MBB) const {}
|
||||
|
||||
// This function eliminates ADJCALLSTACKDOWN,
|
||||
// ADJCALLSTACKUP pseudo instructions
|
||||
void NVPTXFrameLowering::
|
||||
eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I) const {
|
||||
void NVPTXFrameLowering::eliminateCallFramePseudoInstr(
|
||||
MachineFunction &MF, MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I) const {
|
||||
// Simply discard ADJCALLSTACKDOWN,
|
||||
// ADJCALLSTACKUP instructions.
|
||||
MBB.erase(I);
|
||||
}
|
||||
|
||||
|
@ -16,7 +16,6 @@
|
||||
|
||||
#include "llvm/Target/TargetFrameLowering.h"
|
||||
|
||||
|
||||
namespace llvm {
|
||||
class NVPTXTargetMachine;
|
||||
|
||||
@ -26,13 +25,12 @@ class NVPTXFrameLowering : public TargetFrameLowering {
|
||||
|
||||
public:
|
||||
explicit NVPTXFrameLowering(NVPTXTargetMachine &_tm, bool _is64bit)
|
||||
: TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 8, 0),
|
||||
tm(_tm), is64bit(_is64bit) {}
|
||||
: TargetFrameLowering(TargetFrameLowering::StackGrowsUp, 8, 0), tm(_tm),
|
||||
is64bit(_is64bit) {}
|
||||
|
||||
virtual bool hasFP(const MachineFunction &MF) const;
|
||||
virtual void emitPrologue(MachineFunction &MF) const;
|
||||
virtual void emitEpilogue(MachineFunction &MF,
|
||||
MachineBasicBlock &MBB) const;
|
||||
virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
|
||||
|
||||
void eliminateCallFramePseudoInstr(MachineFunction &MF,
|
||||
MachineBasicBlock &MBB,
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -64,11 +64,10 @@ public:
|
||||
|
||||
const NVPTXSubtarget &Subtarget;
|
||||
|
||||
virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
|
||||
char ConstraintCode,
|
||||
std::vector<SDValue> &OutOps);
|
||||
virtual bool SelectInlineAsmMemoryOperand(
|
||||
const SDValue &Op, char ConstraintCode, std::vector<SDValue> &OutOps);
|
||||
private:
|
||||
// Include the pieces autogenerated from the target description.
|
||||
// Include the pieces autogenerated from the target description.
|
||||
#include "NVPTXGenDAGISel.inc"
|
||||
|
||||
SDNode *Select(SDNode *N);
|
||||
@ -99,7 +98,6 @@ private:
|
||||
bool SelectADDRsi64(SDNode *OpNode, SDValue Addr, SDValue &Base,
|
||||
SDValue &Offset);
|
||||
|
||||
|
||||
bool ChkMemSDNodeAddressSpace(SDNode *N, unsigned int spN) const;
|
||||
|
||||
bool UndefOrImm(SDValue Op, SDValue N, SDValue &Retval);
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -87,7 +87,7 @@ public:
|
||||
|
||||
bool isTypeSupportedInIntrinsic(MVT VT) const;
|
||||
|
||||
bool getTgtMemIntrinsic(IntrinsicInfo& Info, const CallInst &I,
|
||||
bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I,
|
||||
unsigned Intrinsic) const;
|
||||
|
||||
/// isLegalAddressingMode - Return true if the addressing mode represented
|
||||
@ -107,14 +107,13 @@ public:
|
||||
}
|
||||
|
||||
ConstraintType getConstraintType(const std::string &Constraint) const;
|
||||
std::pair<unsigned, const TargetRegisterClass*>
|
||||
std::pair<unsigned, const TargetRegisterClass *>
|
||||
getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const;
|
||||
|
||||
virtual SDValue
|
||||
LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
|
||||
const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl,
|
||||
SelectionDAG &DAG,
|
||||
SmallVectorImpl<SDValue> &InVals) const;
|
||||
virtual SDValue LowerFormalArguments(
|
||||
SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
|
||||
const SmallVectorImpl<ISD::InputArg> &Ins, DebugLoc dl, SelectionDAG &DAG,
|
||||
SmallVectorImpl<SDValue> &InVals) const;
|
||||
|
||||
virtual SDValue
|
||||
LowerCall(CallLoweringInfo &CLI, SmallVectorImpl<SDValue> &InVals) const;
|
||||
@ -136,17 +135,15 @@ public:
|
||||
NVPTXTargetMachine *nvTM;
|
||||
|
||||
// PTX always uses 32-bit shift amounts
|
||||
virtual MVT getScalarShiftAmountTy(EVT LHSTy) const {
|
||||
return MVT::i32;
|
||||
}
|
||||
virtual MVT getScalarShiftAmountTy(EVT LHSTy) const { return MVT::i32; }
|
||||
|
||||
virtual bool shouldSplitVectorElementType(EVT VT) const;
|
||||
|
||||
private:
|
||||
const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here
|
||||
const NVPTXSubtarget &nvptxSubtarget; // cache the subtarget here
|
||||
|
||||
SDValue getExtSymb(SelectionDAG &DAG, const char *name, int idx, EVT =
|
||||
MVT::i32) const;
|
||||
SDValue getExtSymb(SelectionDAG &DAG, const char *name, int idx,
|
||||
EVT = MVT::i32) const;
|
||||
SDValue getParamSymbol(SelectionDAG &DAG, int idx, EVT = MVT::i32) const;
|
||||
SDValue getParamHelpSymbol(SelectionDAG &DAG, int idx);
|
||||
|
||||
@ -159,8 +156,7 @@ private:
|
||||
SDValue LowerSTOREi1(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue LowerSTOREVector(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
||||
virtual void ReplaceNodeResults(SDNode *N,
|
||||
SmallVectorImpl<SDValue> &Results,
|
||||
virtual void ReplaceNodeResults(SDNode *N, SmallVectorImpl<SDValue> &Results,
|
||||
SelectionDAG &DAG) const;
|
||||
};
|
||||
} // namespace llvm
|
||||
|
@ -23,61 +23,55 @@
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include <cstdio>
|
||||
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
// FIXME: Add the subtarget support on this constructor.
|
||||
NVPTXInstrInfo::NVPTXInstrInfo(NVPTXTargetMachine &tm)
|
||||
: NVPTXGenInstrInfo(),
|
||||
TM(tm),
|
||||
RegInfo(*this, *TM.getSubtargetImpl()) {}
|
||||
: NVPTXGenInstrInfo(), TM(tm), RegInfo(*this, *TM.getSubtargetImpl()) {}
|
||||
|
||||
|
||||
void NVPTXInstrInfo::copyPhysReg (MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I, DebugLoc DL,
|
||||
unsigned DestReg, unsigned SrcReg,
|
||||
bool KillSrc) const {
|
||||
void NVPTXInstrInfo::copyPhysReg(
|
||||
MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL,
|
||||
unsigned DestReg, unsigned SrcReg, bool KillSrc) const {
|
||||
if (NVPTX::Int32RegsRegClass.contains(DestReg) &&
|
||||
NVPTX::Int32RegsRegClass.contains(SrcReg))
|
||||
BuildMI(MBB, I, DL, get(NVPTX::IMOV32rr), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
else if (NVPTX::Int8RegsRegClass.contains(DestReg) &&
|
||||
NVPTX::Int8RegsRegClass.contains(SrcReg))
|
||||
NVPTX::Int8RegsRegClass.contains(SrcReg))
|
||||
BuildMI(MBB, I, DL, get(NVPTX::IMOV8rr), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
else if (NVPTX::Int1RegsRegClass.contains(DestReg) &&
|
||||
NVPTX::Int1RegsRegClass.contains(SrcReg))
|
||||
NVPTX::Int1RegsRegClass.contains(SrcReg))
|
||||
BuildMI(MBB, I, DL, get(NVPTX::IMOV1rr), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
else if (NVPTX::Float32RegsRegClass.contains(DestReg) &&
|
||||
NVPTX::Float32RegsRegClass.contains(SrcReg))
|
||||
NVPTX::Float32RegsRegClass.contains(SrcReg))
|
||||
BuildMI(MBB, I, DL, get(NVPTX::FMOV32rr), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
else if (NVPTX::Int16RegsRegClass.contains(DestReg) &&
|
||||
NVPTX::Int16RegsRegClass.contains(SrcReg))
|
||||
NVPTX::Int16RegsRegClass.contains(SrcReg))
|
||||
BuildMI(MBB, I, DL, get(NVPTX::IMOV16rr), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
else if (NVPTX::Int64RegsRegClass.contains(DestReg) &&
|
||||
NVPTX::Int64RegsRegClass.contains(SrcReg))
|
||||
NVPTX::Int64RegsRegClass.contains(SrcReg))
|
||||
BuildMI(MBB, I, DL, get(NVPTX::IMOV64rr), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
else if (NVPTX::Float64RegsRegClass.contains(DestReg) &&
|
||||
NVPTX::Float64RegsRegClass.contains(SrcReg))
|
||||
NVPTX::Float64RegsRegClass.contains(SrcReg))
|
||||
BuildMI(MBB, I, DL, get(NVPTX::FMOV64rr), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
else {
|
||||
llvm_unreachable("Don't know how to copy a register");
|
||||
}
|
||||
}
|
||||
|
||||
bool NVPTXInstrInfo::isMoveInstr(const MachineInstr &MI,
|
||||
unsigned &SrcReg,
|
||||
bool NVPTXInstrInfo::isMoveInstr(const MachineInstr &MI, unsigned &SrcReg,
|
||||
unsigned &DestReg) const {
|
||||
// Look for the appropriate part of TSFlags
|
||||
bool isMove = false;
|
||||
|
||||
unsigned TSFlags = (MI.getDesc().TSFlags & NVPTX::SimpleMoveMask) >>
|
||||
NVPTX::SimpleMoveShift;
|
||||
unsigned TSFlags =
|
||||
(MI.getDesc().TSFlags & NVPTX::SimpleMoveMask) >> NVPTX::SimpleMoveShift;
|
||||
isMove = (TSFlags == 1);
|
||||
|
||||
if (isMove) {
|
||||
@ -94,10 +88,10 @@ bool NVPTXInstrInfo::isMoveInstr(const MachineInstr &MI,
|
||||
return false;
|
||||
}
|
||||
|
||||
bool NVPTXInstrInfo::isReadSpecialReg(MachineInstr &MI) const
|
||||
{
|
||||
bool NVPTXInstrInfo::isReadSpecialReg(MachineInstr &MI) const {
|
||||
switch (MI.getOpcode()) {
|
||||
default: return false;
|
||||
default:
|
||||
return false;
|
||||
case NVPTX::INT_PTX_SREG_NTID_X:
|
||||
case NVPTX::INT_PTX_SREG_NTID_Y:
|
||||
case NVPTX::INT_PTX_SREG_NTID_Z:
|
||||
@ -115,12 +109,11 @@ bool NVPTXInstrInfo::isReadSpecialReg(MachineInstr &MI) const
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool NVPTXInstrInfo::isLoadInstr(const MachineInstr &MI,
|
||||
unsigned &AddrSpace) const {
|
||||
bool isLoad = false;
|
||||
unsigned TSFlags = (MI.getDesc().TSFlags & NVPTX::isLoadMask) >>
|
||||
NVPTX::isLoadShift;
|
||||
unsigned TSFlags =
|
||||
(MI.getDesc().TSFlags & NVPTX::isLoadMask) >> NVPTX::isLoadShift;
|
||||
isLoad = (TSFlags == 1);
|
||||
if (isLoad)
|
||||
AddrSpace = getLdStCodeAddrSpace(MI);
|
||||
@ -130,15 +123,14 @@ bool NVPTXInstrInfo::isLoadInstr(const MachineInstr &MI,
|
||||
bool NVPTXInstrInfo::isStoreInstr(const MachineInstr &MI,
|
||||
unsigned &AddrSpace) const {
|
||||
bool isStore = false;
|
||||
unsigned TSFlags = (MI.getDesc().TSFlags & NVPTX::isStoreMask) >>
|
||||
NVPTX::isStoreShift;
|
||||
unsigned TSFlags =
|
||||
(MI.getDesc().TSFlags & NVPTX::isStoreMask) >> NVPTX::isStoreShift;
|
||||
isStore = (TSFlags == 1);
|
||||
if (isStore)
|
||||
AddrSpace = getLdStCodeAddrSpace(MI);
|
||||
return isStore;
|
||||
}
|
||||
|
||||
|
||||
bool NVPTXInstrInfo::CanTailMerge(const MachineInstr *MI) const {
|
||||
unsigned addrspace = 0;
|
||||
if (MI->getOpcode() == NVPTX::INT_CUDA_SYNCTHREADS)
|
||||
@ -152,7 +144,6 @@ bool NVPTXInstrInfo::CanTailMerge(const MachineInstr *MI) const {
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/// AnalyzeBranch - Analyze the branching code at the end of MBB, returning
|
||||
/// true if it cannot be understood (e.g. it's a switch dispatch or isn't
|
||||
/// implemented for a target). Upon success, this returns false and returns
|
||||
@ -176,11 +167,9 @@ bool NVPTXInstrInfo::CanTailMerge(const MachineInstr *MI) const {
|
||||
/// Note that RemoveBranch and InsertBranch must be implemented to support
|
||||
/// cases where this method returns success.
|
||||
///
|
||||
bool NVPTXInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock *&TBB,
|
||||
MachineBasicBlock *&FBB,
|
||||
SmallVectorImpl<MachineOperand> &Cond,
|
||||
bool AllowModify) const {
|
||||
bool NVPTXInstrInfo::AnalyzeBranch(
|
||||
MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
|
||||
SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const {
|
||||
// If the block has no terminators, it just falls into the block after it.
|
||||
MachineBasicBlock::iterator I = MBB.end();
|
||||
if (I == MBB.begin() || !isUnpredicatedTerminator(--I))
|
||||
@ -208,14 +197,13 @@ bool NVPTXInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
|
||||
MachineInstr *SecondLastInst = I;
|
||||
|
||||
// If there are three terminators, we don't know what sort of block this is.
|
||||
if (SecondLastInst && I != MBB.begin() &&
|
||||
isUnpredicatedTerminator(--I))
|
||||
if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I))
|
||||
return true;
|
||||
|
||||
// If the block ends with NVPTX::GOTO and NVPTX:CBranch, handle it.
|
||||
if (SecondLastInst->getOpcode() == NVPTX::CBranch &&
|
||||
LastInst->getOpcode() == NVPTX::GOTO) {
|
||||
TBB = SecondLastInst->getOperand(1).getMBB();
|
||||
TBB = SecondLastInst->getOperand(1).getMBB();
|
||||
Cond.push_back(SecondLastInst->getOperand(0));
|
||||
FBB = LastInst->getOperand(0).getMBB();
|
||||
return false;
|
||||
@ -238,7 +226,8 @@ bool NVPTXInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
|
||||
|
||||
unsigned NVPTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
|
||||
MachineBasicBlock::iterator I = MBB.end();
|
||||
if (I == MBB.begin()) return 0;
|
||||
if (I == MBB.begin())
|
||||
return 0;
|
||||
--I;
|
||||
if (I->getOpcode() != NVPTX::GOTO && I->getOpcode() != NVPTX::CBranch)
|
||||
return 0;
|
||||
@ -248,7 +237,8 @@ unsigned NVPTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
|
||||
|
||||
I = MBB.end();
|
||||
|
||||
if (I == MBB.begin()) return 1;
|
||||
if (I == MBB.begin())
|
||||
return 1;
|
||||
--I;
|
||||
if (I->getOpcode() != NVPTX::CBranch)
|
||||
return 1;
|
||||
@ -258,11 +248,9 @@ unsigned NVPTXInstrInfo::RemoveBranch(MachineBasicBlock &MBB) const {
|
||||
return 2;
|
||||
}
|
||||
|
||||
unsigned
|
||||
NVPTXInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
|
||||
MachineBasicBlock *FBB,
|
||||
const SmallVectorImpl<MachineOperand> &Cond,
|
||||
DebugLoc DL) const {
|
||||
unsigned NVPTXInstrInfo::InsertBranch(
|
||||
MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
|
||||
const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const {
|
||||
// Shouldn't be a fall through.
|
||||
assert(TBB && "InsertBranch must not be told to insert a fallthrough");
|
||||
assert((Cond.size() == 1 || Cond.size() == 0) &&
|
||||
@ -270,17 +258,16 @@ NVPTXInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
|
||||
|
||||
// One-way branch.
|
||||
if (FBB == 0) {
|
||||
if (Cond.empty()) // Unconditional branch
|
||||
if (Cond.empty()) // Unconditional branch
|
||||
BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(TBB);
|
||||
else // Conditional branch
|
||||
BuildMI(&MBB, DL, get(NVPTX::CBranch))
|
||||
.addReg(Cond[0].getReg()).addMBB(TBB);
|
||||
else // Conditional branch
|
||||
BuildMI(&MBB, DL, get(NVPTX::CBranch)).addReg(Cond[0].getReg())
|
||||
.addMBB(TBB);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// Two-way Conditional Branch.
|
||||
BuildMI(&MBB, DL, get(NVPTX::CBranch))
|
||||
.addReg(Cond[0].getReg()).addMBB(TBB);
|
||||
BuildMI(&MBB, DL, get(NVPTX::CBranch)).addReg(Cond[0].getReg()).addMBB(TBB);
|
||||
BuildMI(&MBB, DL, get(NVPTX::GOTO)).addMBB(FBB);
|
||||
return 2;
|
||||
}
|
||||
|
@ -23,8 +23,7 @@
|
||||
|
||||
namespace llvm {
|
||||
|
||||
class NVPTXInstrInfo : public NVPTXGenInstrInfo
|
||||
{
|
||||
class NVPTXInstrInfo : public NVPTXGenInstrInfo {
|
||||
NVPTXTargetMachine &TM;
|
||||
const NVPTXRegisterInfo RegInfo;
|
||||
public:
|
||||
@ -50,30 +49,26 @@ public:
|
||||
* const TargetRegisterClass *RC) const;
|
||||
*/
|
||||
|
||||
virtual void copyPhysReg(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator I, DebugLoc DL,
|
||||
unsigned DestReg, unsigned SrcReg,
|
||||
bool KillSrc) const ;
|
||||
virtual bool isMoveInstr(const MachineInstr &MI,
|
||||
unsigned &SrcReg,
|
||||
virtual void copyPhysReg(
|
||||
MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL,
|
||||
unsigned DestReg, unsigned SrcReg, bool KillSrc) const;
|
||||
virtual bool isMoveInstr(const MachineInstr &MI, unsigned &SrcReg,
|
||||
unsigned &DestReg) const;
|
||||
bool isLoadInstr(const MachineInstr &MI, unsigned &AddrSpace) const;
|
||||
bool isStoreInstr(const MachineInstr &MI, unsigned &AddrSpace) const;
|
||||
bool isReadSpecialReg(MachineInstr &MI) const;
|
||||
|
||||
virtual bool CanTailMerge(const MachineInstr *MI) const ;
|
||||
virtual bool CanTailMerge(const MachineInstr *MI) const;
|
||||
// Branch analysis.
|
||||
virtual bool AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
|
||||
MachineBasicBlock *&FBB,
|
||||
SmallVectorImpl<MachineOperand> &Cond,
|
||||
bool AllowModify) const;
|
||||
virtual bool AnalyzeBranch(
|
||||
MachineBasicBlock &MBB, MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
|
||||
SmallVectorImpl<MachineOperand> &Cond, bool AllowModify) const;
|
||||
virtual unsigned RemoveBranch(MachineBasicBlock &MBB) const;
|
||||
virtual unsigned InsertBranch(MachineBasicBlock &MBB,MachineBasicBlock *TBB,
|
||||
MachineBasicBlock *FBB,
|
||||
const SmallVectorImpl<MachineOperand> &Cond,
|
||||
DebugLoc DL) const;
|
||||
virtual unsigned InsertBranch(
|
||||
MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB,
|
||||
const SmallVectorImpl<MachineOperand> &Cond, DebugLoc DL) const;
|
||||
unsigned getLdStCodeAddrSpace(const MachineInstr &MI) const {
|
||||
return MI.getOperand(2).getImm();
|
||||
return MI.getOperand(2).getImm();
|
||||
}
|
||||
|
||||
};
|
||||
|
@ -25,18 +25,15 @@
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace llvm {
|
||||
FunctionPass *createLowerAggrCopies();
|
||||
}
|
||||
namespace llvm { FunctionPass *createLowerAggrCopies(); }
|
||||
|
||||
char NVPTXLowerAggrCopies::ID = 0;
|
||||
|
||||
// Lower MemTransferInst or load-store pair to loop
|
||||
static void convertTransferToLoop(Instruction *splitAt, Value *srcAddr,
|
||||
Value *dstAddr, Value *len,
|
||||
//unsigned numLoads,
|
||||
bool srcVolatile, bool dstVolatile,
|
||||
LLVMContext &Context, Function &F) {
|
||||
static void convertTransferToLoop(
|
||||
Instruction *splitAt, Value *srcAddr, Value *dstAddr, Value *len,
|
||||
//unsigned numLoads,
|
||||
bool srcVolatile, bool dstVolatile, LLVMContext &Context, Function &F) {
|
||||
Type *indType = len->getType();
|
||||
|
||||
BasicBlock *origBB = splitAt->getParent();
|
||||
@ -48,10 +45,8 @@ static void convertTransferToLoop(Instruction *splitAt, Value *srcAddr,
|
||||
|
||||
// srcAddr and dstAddr are expected to be pointer types,
|
||||
// so no check is made here.
|
||||
unsigned srcAS =
|
||||
dyn_cast<PointerType>(srcAddr->getType())->getAddressSpace();
|
||||
unsigned dstAS =
|
||||
dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace();
|
||||
unsigned srcAS = dyn_cast<PointerType>(srcAddr->getType())->getAddressSpace();
|
||||
unsigned dstAS = dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace();
|
||||
|
||||
// Cast pointers to (char *)
|
||||
srcAddr = builder.CreateBitCast(srcAddr, Type::getInt8PtrTy(Context, srcAS));
|
||||
@ -86,12 +81,11 @@ static void convertMemSetToLoop(Instruction *splitAt, Value *dstAddr,
|
||||
origBB->getTerminator()->setSuccessor(0, loopBB);
|
||||
IRBuilder<> builder(origBB, origBB->getTerminator());
|
||||
|
||||
unsigned dstAS =
|
||||
dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace();
|
||||
unsigned dstAS = dyn_cast<PointerType>(dstAddr->getType())->getAddressSpace();
|
||||
|
||||
// Cast pointer to the type of value getting stored
|
||||
dstAddr = builder.CreateBitCast(dstAddr,
|
||||
PointerType::get(val->getType(), dstAS));
|
||||
dstAddr =
|
||||
builder.CreateBitCast(dstAddr, PointerType::get(val->getType(), dstAS));
|
||||
|
||||
IRBuilder<> loop(loopBB);
|
||||
PHINode *ind = loop.CreatePHI(len->getType(), 0);
|
||||
@ -120,24 +114,26 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
|
||||
for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) {
|
||||
//BasicBlock *bb = BI;
|
||||
for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE;
|
||||
++II) {
|
||||
if (LoadInst * load = dyn_cast<LoadInst>(II)) {
|
||||
++II) {
|
||||
if (LoadInst *load = dyn_cast<LoadInst>(II)) {
|
||||
|
||||
if (load->hasOneUse() == false) continue;
|
||||
if (load->hasOneUse() == false)
|
||||
continue;
|
||||
|
||||
if (TD->getTypeStoreSize(load->getType()) < MaxAggrCopySize) continue;
|
||||
if (TD->getTypeStoreSize(load->getType()) < MaxAggrCopySize)
|
||||
continue;
|
||||
|
||||
User *use = *(load->use_begin());
|
||||
if (StoreInst * store = dyn_cast<StoreInst>(use)) {
|
||||
if (StoreInst *store = dyn_cast<StoreInst>(use)) {
|
||||
if (store->getOperand(0) != load) //getValueOperand
|
||||
continue;
|
||||
continue;
|
||||
aggrLoads.push_back(load);
|
||||
}
|
||||
} else if (MemTransferInst * intr = dyn_cast<MemTransferInst>(II)) {
|
||||
} else if (MemTransferInst *intr = dyn_cast<MemTransferInst>(II)) {
|
||||
Value *len = intr->getLength();
|
||||
// If the number of elements being copied is greater
|
||||
// than MaxAggrCopySize, lower it to a loop
|
||||
if (ConstantInt * len_int = dyn_cast < ConstantInt > (len)) {
|
||||
if (ConstantInt *len_int = dyn_cast<ConstantInt>(len)) {
|
||||
if (len_int->getZExtValue() >= MaxAggrCopySize) {
|
||||
aggrMemcpys.push_back(intr);
|
||||
}
|
||||
@ -145,9 +141,9 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
|
||||
// turn variable length memcpy/memmov into loop
|
||||
aggrMemcpys.push_back(intr);
|
||||
}
|
||||
} else if (MemSetInst * memsetintr = dyn_cast<MemSetInst>(II)) {
|
||||
} else if (MemSetInst *memsetintr = dyn_cast<MemSetInst>(II)) {
|
||||
Value *len = memsetintr->getLength();
|
||||
if (ConstantInt * len_int = dyn_cast<ConstantInt>(len)) {
|
||||
if (ConstantInt *len_int = dyn_cast<ConstantInt>(len)) {
|
||||
if (len_int->getZExtValue() >= MaxAggrCopySize) {
|
||||
aggrMemsets.push_back(memsetintr);
|
||||
}
|
||||
@ -158,8 +154,9 @@ bool NVPTXLowerAggrCopies::runOnFunction(Function &F) {
|
||||
}
|
||||
}
|
||||
}
|
||||
if ((aggrLoads.size() == 0) && (aggrMemcpys.size() == 0)
|
||||
&& (aggrMemsets.size() == 0)) return false;
|
||||
if ((aggrLoads.size() == 0) && (aggrMemcpys.size() == 0) &&
|
||||
(aggrMemsets.size() == 0))
|
||||
return false;
|
||||
|
||||
//
|
||||
// Do the transformation of an aggr load/copy/set to a loop
|
||||
|
@ -11,10 +11,6 @@
|
||||
#ifndef NVPTX_NUM_REGISTERS_H
|
||||
#define NVPTX_NUM_REGISTERS_H
|
||||
|
||||
namespace llvm {
|
||||
|
||||
const unsigned NVPTXNumRegisters = 396;
|
||||
|
||||
}
|
||||
namespace llvm { const unsigned NVPTXNumRegisters = 396; }
|
||||
|
||||
#endif
|
||||
|
@ -23,69 +23,54 @@
|
||||
#include "llvm/MC/MachineLocation.h"
|
||||
#include "llvm/Target/TargetInstrInfo.h"
|
||||
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace llvm
|
||||
{
|
||||
std::string getNVPTXRegClassName (TargetRegisterClass const *RC) {
|
||||
namespace llvm {
|
||||
std::string getNVPTXRegClassName(TargetRegisterClass const *RC) {
|
||||
if (RC == &NVPTX::Float32RegsRegClass) {
|
||||
return ".f32";
|
||||
}
|
||||
if (RC == &NVPTX::Float64RegsRegClass) {
|
||||
return ".f64";
|
||||
}
|
||||
else if (RC == &NVPTX::Int64RegsRegClass) {
|
||||
} else if (RC == &NVPTX::Int64RegsRegClass) {
|
||||
return ".s64";
|
||||
}
|
||||
else if (RC == &NVPTX::Int32RegsRegClass) {
|
||||
} else if (RC == &NVPTX::Int32RegsRegClass) {
|
||||
return ".s32";
|
||||
}
|
||||
else if (RC == &NVPTX::Int16RegsRegClass) {
|
||||
} else if (RC == &NVPTX::Int16RegsRegClass) {
|
||||
return ".s16";
|
||||
}
|
||||
// Int8Regs become 16-bit registers in PTX
|
||||
else if (RC == &NVPTX::Int8RegsRegClass) {
|
||||
// Int8Regs become 16-bit registers in PTX
|
||||
else if (RC == &NVPTX::Int8RegsRegClass) {
|
||||
return ".s16";
|
||||
}
|
||||
else if (RC == &NVPTX::Int1RegsRegClass) {
|
||||
} else if (RC == &NVPTX::Int1RegsRegClass) {
|
||||
return ".pred";
|
||||
}
|
||||
else if (RC == &NVPTX::SpecialRegsRegClass) {
|
||||
} else if (RC == &NVPTX::SpecialRegsRegClass) {
|
||||
return "!Special!";
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
return "INTERNAL";
|
||||
}
|
||||
return "";
|
||||
}
|
||||
|
||||
std::string getNVPTXRegClassStr (TargetRegisterClass const *RC) {
|
||||
std::string getNVPTXRegClassStr(TargetRegisterClass const *RC) {
|
||||
if (RC == &NVPTX::Float32RegsRegClass) {
|
||||
return "%f";
|
||||
}
|
||||
if (RC == &NVPTX::Float64RegsRegClass) {
|
||||
return "%fd";
|
||||
}
|
||||
else if (RC == &NVPTX::Int64RegsRegClass) {
|
||||
} else if (RC == &NVPTX::Int64RegsRegClass) {
|
||||
return "%rd";
|
||||
}
|
||||
else if (RC == &NVPTX::Int32RegsRegClass) {
|
||||
} else if (RC == &NVPTX::Int32RegsRegClass) {
|
||||
return "%r";
|
||||
}
|
||||
else if (RC == &NVPTX::Int16RegsRegClass) {
|
||||
} else if (RC == &NVPTX::Int16RegsRegClass) {
|
||||
return "%rs";
|
||||
}
|
||||
else if (RC == &NVPTX::Int8RegsRegClass) {
|
||||
} else if (RC == &NVPTX::Int8RegsRegClass) {
|
||||
return "%rc";
|
||||
}
|
||||
else if (RC == &NVPTX::Int1RegsRegClass) {
|
||||
} else if (RC == &NVPTX::Int1RegsRegClass) {
|
||||
return "%p";
|
||||
}
|
||||
else if (RC == &NVPTX::SpecialRegsRegClass) {
|
||||
} else if (RC == &NVPTX::SpecialRegsRegClass) {
|
||||
return "!Special!";
|
||||
}
|
||||
else {
|
||||
} else {
|
||||
return "INTERNAL";
|
||||
}
|
||||
return "";
|
||||
@ -94,23 +79,22 @@ std::string getNVPTXRegClassStr (TargetRegisterClass const *RC) {
|
||||
|
||||
NVPTXRegisterInfo::NVPTXRegisterInfo(const TargetInstrInfo &tii,
|
||||
const NVPTXSubtarget &st)
|
||||
: NVPTXGenRegisterInfo(0),
|
||||
Is64Bit(st.is64Bit()) {}
|
||||
: NVPTXGenRegisterInfo(0), Is64Bit(st.is64Bit()) {}
|
||||
|
||||
#define GET_REGINFO_TARGET_DESC
|
||||
#include "NVPTXGenRegisterInfo.inc"
|
||||
|
||||
/// NVPTX Callee Saved Registers
|
||||
const uint16_t* NVPTXRegisterInfo::
|
||||
getCalleeSavedRegs(const MachineFunction *MF) const {
|
||||
const uint16_t *
|
||||
NVPTXRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
|
||||
static const uint16_t CalleeSavedRegs[] = { 0 };
|
||||
return CalleeSavedRegs;
|
||||
}
|
||||
|
||||
// NVPTX Callee Saved Reg Classes
|
||||
const TargetRegisterClass* const*
|
||||
const TargetRegisterClass *const *
|
||||
NVPTXRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
|
||||
static const TargetRegisterClass * const CalleeSavedRegClasses[] = { 0 };
|
||||
static const TargetRegisterClass *const CalleeSavedRegClasses[] = { 0 };
|
||||
return CalleeSavedRegClasses;
|
||||
}
|
||||
|
||||
@ -119,10 +103,9 @@ BitVector NVPTXRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
|
||||
return Reserved;
|
||||
}
|
||||
|
||||
void NVPTXRegisterInfo::
|
||||
eliminateFrameIndex(MachineBasicBlock::iterator II,
|
||||
int SPAdj, unsigned FIOperandNum,
|
||||
RegScavenger *RS) const {
|
||||
void NVPTXRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
|
||||
int SPAdj, unsigned FIOperandNum,
|
||||
RegScavenger *RS) const {
|
||||
assert(SPAdj == 0 && "Unexpected");
|
||||
|
||||
MachineInstr &MI = *II;
|
||||
@ -130,15 +113,14 @@ eliminateFrameIndex(MachineBasicBlock::iterator II,
|
||||
|
||||
MachineFunction &MF = *MI.getParent()->getParent();
|
||||
int Offset = MF.getFrameInfo()->getObjectOffset(FrameIndex) +
|
||||
MI.getOperand(FIOperandNum+1).getImm();
|
||||
MI.getOperand(FIOperandNum + 1).getImm();
|
||||
|
||||
// Using I0 as the frame pointer
|
||||
MI.getOperand(FIOperandNum).ChangeToRegister(NVPTX::VRFrame, false);
|
||||
MI.getOperand(FIOperandNum+1).ChangeToImmediate(Offset);
|
||||
MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset);
|
||||
}
|
||||
|
||||
int NVPTXRegisterInfo::
|
||||
getDwarfRegNum(unsigned RegNum, bool isEH) const {
|
||||
int NVPTXRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
@ -146,7 +128,4 @@ unsigned NVPTXRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
|
||||
return NVPTX::VRFrame;
|
||||
}
|
||||
|
||||
unsigned NVPTXRegisterInfo::getRARegister() const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
unsigned NVPTXRegisterInfo::getRARegister() const { return 0; }
|
||||
|
@ -17,7 +17,6 @@
|
||||
#include "ManagedStringPool.h"
|
||||
#include "llvm/Target/TargetRegisterInfo.h"
|
||||
|
||||
|
||||
#define GET_REGINFO_HEADER
|
||||
#include "NVPTXGenRegisterInfo.inc"
|
||||
#include "llvm/Target/TargetRegisterInfo.h"
|
||||
@ -33,30 +32,28 @@ class NVPTXRegisterInfo : public NVPTXGenRegisterInfo {
|
||||
private:
|
||||
bool Is64Bit;
|
||||
// Hold Strings that can be free'd all together with NVPTXRegisterInfo
|
||||
ManagedStringPool ManagedStrPool;
|
||||
ManagedStringPool ManagedStrPool;
|
||||
|
||||
public:
|
||||
NVPTXRegisterInfo(const TargetInstrInfo &tii,
|
||||
const NVPTXSubtarget &st);
|
||||
|
||||
NVPTXRegisterInfo(const TargetInstrInfo &tii, const NVPTXSubtarget &st);
|
||||
|
||||
//------------------------------------------------------
|
||||
// Pure virtual functions from TargetRegisterInfo
|
||||
//------------------------------------------------------
|
||||
|
||||
// NVPTX callee saved registers
|
||||
virtual const uint16_t*
|
||||
virtual const uint16_t *
|
||||
getCalleeSavedRegs(const MachineFunction *MF = 0) const;
|
||||
|
||||
// NVPTX callee saved register classes
|
||||
virtual const TargetRegisterClass* const *
|
||||
virtual const TargetRegisterClass *const *
|
||||
getCalleeSavedRegClasses(const MachineFunction *MF) const;
|
||||
|
||||
virtual BitVector getReservedRegs(const MachineFunction &MF) const;
|
||||
|
||||
virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI,
|
||||
int SPAdj, unsigned FIOperandNum,
|
||||
RegScavenger *RS=NULL) const;
|
||||
virtual void eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj,
|
||||
unsigned FIOperandNum,
|
||||
RegScavenger *RS = NULL) const;
|
||||
|
||||
virtual int getDwarfRegNum(unsigned RegNum, bool isEH) const;
|
||||
virtual unsigned getFrameRegister(const MachineFunction &MF) const;
|
||||
@ -74,11 +71,9 @@ public:
|
||||
|
||||
};
|
||||
|
||||
|
||||
std::string getNVPTXRegClassName (const TargetRegisterClass *RC);
|
||||
std::string getNVPTXRegClassStr (const TargetRegisterClass *RC);
|
||||
std::string getNVPTXRegClassName(const TargetRegisterClass *RC);
|
||||
std::string getNVPTXRegClassStr(const TargetRegisterClass *RC);
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
|
||||
#endif
|
||||
|
@ -21,9 +21,7 @@
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace llvm {
|
||||
FunctionPass *createSplitBBatBarPass();
|
||||
}
|
||||
namespace llvm { FunctionPass *createSplitBBatBarPass(); }
|
||||
|
||||
char NVPTXSplitBBatBar::ID = 0;
|
||||
|
||||
@ -72,6 +70,4 @@ bool NVPTXSplitBBatBar::runOnFunction(Function &F) {
|
||||
// This interface will most likely not be necessary, because this pass will
|
||||
// not be invoked by the driver, but will be used as a prerequisite to
|
||||
// another pass.
|
||||
FunctionPass *llvm::createSplitBBatBarPass() {
|
||||
return new NVPTXSplitBBatBar();
|
||||
}
|
||||
FunctionPass *llvm::createSplitBBatBarPass() { return new NVPTXSplitBBatBar(); }
|
||||
|
@ -22,22 +22,18 @@ using namespace llvm;
|
||||
// Select Driver Interface
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
namespace {
|
||||
cl::opt<NVPTX::DrvInterface>
|
||||
DriverInterface(cl::desc("Choose driver interface:"),
|
||||
cl::values(
|
||||
clEnumValN(NVPTX::NVCL, "drvnvcl", "Nvidia OpenCL driver"),
|
||||
clEnumValN(NVPTX::CUDA, "drvcuda", "Nvidia CUDA driver"),
|
||||
clEnumValN(NVPTX::TEST, "drvtest", "Plain Test"),
|
||||
clEnumValEnd),
|
||||
cl::init(NVPTX::NVCL));
|
||||
cl::opt<NVPTX::DrvInterface> DriverInterface(
|
||||
cl::desc("Choose driver interface:"),
|
||||
cl::values(clEnumValN(NVPTX::NVCL, "drvnvcl", "Nvidia OpenCL driver"),
|
||||
clEnumValN(NVPTX::CUDA, "drvcuda", "Nvidia CUDA driver"),
|
||||
clEnumValN(NVPTX::TEST, "drvtest", "Plain Test"), clEnumValEnd),
|
||||
cl::init(NVPTX::NVCL));
|
||||
}
|
||||
|
||||
NVPTXSubtarget::NVPTXSubtarget(const std::string &TT, const std::string &CPU,
|
||||
const std::string &FS, bool is64Bit)
|
||||
: NVPTXGenSubtargetInfo(TT, CPU, FS),
|
||||
Is64Bit(is64Bit),
|
||||
PTXVersion(0),
|
||||
SmVersion(10) {
|
||||
: NVPTXGenSubtargetInfo(TT, CPU, FS), Is64Bit(is64Bit), PTXVersion(0),
|
||||
SmVersion(10) {
|
||||
|
||||
drvInterface = DriverInterface;
|
||||
|
||||
|
@ -25,7 +25,7 @@
|
||||
namespace llvm {
|
||||
|
||||
class NVPTXSubtarget : public NVPTXGenSubtargetInfo {
|
||||
|
||||
|
||||
std::string TargetName;
|
||||
NVPTX::DrvInterface drvInterface;
|
||||
bool Is64Bit;
|
||||
@ -61,13 +61,10 @@ public:
|
||||
bool hasLDU() const { return SmVersion >= 20; }
|
||||
bool hasGenericLdSt() const { return SmVersion >= 20; }
|
||||
inline bool hasHWROT32() const { return false; }
|
||||
inline bool hasSWROT32() const {
|
||||
return true;
|
||||
}
|
||||
inline bool hasROT32() const { return hasHWROT32() || hasSWROT32() ; }
|
||||
inline bool hasSWROT32() const { return true; }
|
||||
inline bool hasROT32() const { return hasHWROT32() || hasSWROT32(); }
|
||||
inline bool hasROT64() const { return SmVersion >= 20; }
|
||||
|
||||
|
||||
bool is64Bit() const { return Is64Bit; }
|
||||
|
||||
unsigned int getSmVersion() const { return SmVersion; }
|
||||
@ -96,4 +93,4 @@ public:
|
||||
|
||||
} // End llvm namespace
|
||||
|
||||
#endif // NVPTXSUBTARGET_H
|
||||
#endif // NVPTXSUBTARGET_H
|
||||
|
@ -45,10 +45,8 @@
|
||||
#include "llvm/Target/TargetSubtargetInfo.h"
|
||||
#include "llvm/Transforms/Scalar.h"
|
||||
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
|
||||
extern "C" void LLVMInitializeNVPTXTarget() {
|
||||
// Register the target.
|
||||
RegisterTargetMachine<NVPTXTargetMachine32> X(TheNVPTXTarget32);
|
||||
@ -59,50 +57,37 @@ extern "C" void LLVMInitializeNVPTXTarget() {
|
||||
|
||||
}
|
||||
|
||||
NVPTXTargetMachine::NVPTXTargetMachine(const Target &T,
|
||||
StringRef TT,
|
||||
StringRef CPU,
|
||||
StringRef FS,
|
||||
const TargetOptions& Options,
|
||||
Reloc::Model RM,
|
||||
CodeModel::Model CM,
|
||||
CodeGenOpt::Level OL,
|
||||
bool is64bit)
|
||||
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
|
||||
Subtarget(TT, CPU, FS, is64bit),
|
||||
DL(Subtarget.getDataLayout()),
|
||||
InstrInfo(*this), TLInfo(*this), TSInfo(*this), FrameLowering(*this,is64bit)
|
||||
/*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ {
|
||||
}
|
||||
|
||||
|
||||
NVPTXTargetMachine::NVPTXTargetMachine(
|
||||
const Target &T, StringRef TT, StringRef CPU, StringRef FS,
|
||||
const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM,
|
||||
CodeGenOpt::Level OL, bool is64bit)
|
||||
: LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
|
||||
Subtarget(TT, CPU, FS, is64bit), DL(Subtarget.getDataLayout()),
|
||||
InstrInfo(*this), TLInfo(*this), TSInfo(*this),
|
||||
FrameLowering(
|
||||
*this, is64bit) /*FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0)*/ {}
|
||||
|
||||
void NVPTXTargetMachine32::anchor() {}
|
||||
|
||||
NVPTXTargetMachine32::NVPTXTargetMachine32(const Target &T, StringRef TT,
|
||||
StringRef CPU, StringRef FS,
|
||||
const TargetOptions &Options,
|
||||
Reloc::Model RM, CodeModel::Model CM,
|
||||
CodeGenOpt::Level OL)
|
||||
: NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {
|
||||
}
|
||||
NVPTXTargetMachine32::NVPTXTargetMachine32(
|
||||
const Target &T, StringRef TT, StringRef CPU, StringRef FS,
|
||||
const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM,
|
||||
CodeGenOpt::Level OL)
|
||||
: NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
|
||||
|
||||
void NVPTXTargetMachine64::anchor() {}
|
||||
|
||||
NVPTXTargetMachine64::NVPTXTargetMachine64(const Target &T, StringRef TT,
|
||||
StringRef CPU, StringRef FS,
|
||||
const TargetOptions &Options,
|
||||
Reloc::Model RM, CodeModel::Model CM,
|
||||
CodeGenOpt::Level OL)
|
||||
: NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {
|
||||
}
|
||||
|
||||
NVPTXTargetMachine64::NVPTXTargetMachine64(
|
||||
const Target &T, StringRef TT, StringRef CPU, StringRef FS,
|
||||
const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM,
|
||||
CodeGenOpt::Level OL)
|
||||
: NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
|
||||
|
||||
namespace llvm {
|
||||
class NVPTXPassConfig : public TargetPassConfig {
|
||||
public:
|
||||
NVPTXPassConfig(NVPTXTargetMachine *TM, PassManagerBase &PM)
|
||||
: TargetPassConfig(TM, PM) {}
|
||||
: TargetPassConfig(TM, PM) {}
|
||||
|
||||
NVPTXTargetMachine &getNVPTXTargetMachine() const {
|
||||
return getTM<NVPTXTargetMachine>();
|
||||
@ -126,6 +111,4 @@ bool NVPTXPassConfig::addInstSelector() {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool NVPTXPassConfig::addPreRegAlloc() {
|
||||
return false;
|
||||
}
|
||||
bool NVPTXPassConfig::addPreRegAlloc() { return false; }
|
||||
|
@ -11,7 +11,6 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
|
||||
#ifndef NVPTX_TARGETMACHINE_H
|
||||
#define NVPTX_TARGETMACHINE_H
|
||||
|
||||
@ -31,42 +30,40 @@ namespace llvm {
|
||||
/// NVPTXTargetMachine
|
||||
///
|
||||
class NVPTXTargetMachine : public LLVMTargetMachine {
|
||||
NVPTXSubtarget Subtarget;
|
||||
const DataLayout DL; // Calculates type size & alignment
|
||||
NVPTXInstrInfo InstrInfo;
|
||||
NVPTXTargetLowering TLInfo;
|
||||
TargetSelectionDAGInfo TSInfo;
|
||||
NVPTXSubtarget Subtarget;
|
||||
const DataLayout DL; // Calculates type size & alignment
|
||||
NVPTXInstrInfo InstrInfo;
|
||||
NVPTXTargetLowering TLInfo;
|
||||
TargetSelectionDAGInfo TSInfo;
|
||||
|
||||
// NVPTX does not have any call stack frame, but need a NVPTX specific
|
||||
// FrameLowering class because TargetFrameLowering is abstract.
|
||||
NVPTXFrameLowering FrameLowering;
|
||||
NVPTXFrameLowering FrameLowering;
|
||||
|
||||
// Hold Strings that can be free'd all together with NVPTXTargetMachine
|
||||
ManagedStringPool ManagedStrPool;
|
||||
ManagedStringPool ManagedStrPool;
|
||||
|
||||
//bool addCommonCodeGenPasses(PassManagerBase &, CodeGenOpt::Level,
|
||||
// bool DisableVerify, MCContext *&OutCtx);
|
||||
|
||||
public:
|
||||
NVPTXTargetMachine(const Target &T, StringRef TT, StringRef CPU,
|
||||
StringRef FS, const TargetOptions &Options,
|
||||
Reloc::Model RM, CodeModel::Model CM,
|
||||
CodeGenOpt::Level OP,
|
||||
bool is64bit);
|
||||
NVPTXTargetMachine(const Target &T, StringRef TT, StringRef CPU, StringRef FS,
|
||||
const TargetOptions &Options, Reloc::Model RM,
|
||||
CodeModel::Model CM, CodeGenOpt::Level OP, bool is64bit);
|
||||
|
||||
virtual const TargetFrameLowering *getFrameLowering() const {
|
||||
return &FrameLowering;
|
||||
}
|
||||
virtual const NVPTXInstrInfo *getInstrInfo() const { return &InstrInfo; }
|
||||
virtual const DataLayout *getDataLayout() const { return &DL;}
|
||||
virtual const NVPTXSubtarget *getSubtargetImpl() const { return &Subtarget;}
|
||||
virtual const NVPTXInstrInfo *getInstrInfo() const { return &InstrInfo; }
|
||||
virtual const DataLayout *getDataLayout() const { return &DL; }
|
||||
virtual const NVPTXSubtarget *getSubtargetImpl() const { return &Subtarget; }
|
||||
|
||||
virtual const NVPTXRegisterInfo *getRegisterInfo() const {
|
||||
return &(InstrInfo.getRegisterInfo());
|
||||
}
|
||||
|
||||
virtual NVPTXTargetLowering *getTargetLowering() const {
|
||||
return const_cast<NVPTXTargetLowering*>(&TLInfo);
|
||||
return const_cast<NVPTXTargetLowering *>(&TLInfo);
|
||||
}
|
||||
|
||||
virtual const TargetSelectionDAGInfo *getSelectionDAGInfo() const {
|
||||
@ -79,22 +76,19 @@ public:
|
||||
//virtual bool addPreRegAlloc(PassManagerBase &, CodeGenOpt::Level);
|
||||
|
||||
ManagedStringPool *getManagedStrPool() const {
|
||||
return const_cast<ManagedStringPool*>(&ManagedStrPool);
|
||||
return const_cast<ManagedStringPool *>(&ManagedStrPool);
|
||||
}
|
||||
|
||||
virtual TargetPassConfig *createPassConfig(PassManagerBase &PM);
|
||||
|
||||
// Emission of machine code through JITCodeEmitter is not supported.
|
||||
virtual bool addPassesToEmitMachineCode(PassManagerBase &,
|
||||
JITCodeEmitter &,
|
||||
virtual bool addPassesToEmitMachineCode(PassManagerBase &, JITCodeEmitter &,
|
||||
bool = true) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Emission of machine code through MCJIT is not supported.
|
||||
virtual bool addPassesToEmitMC(PassManagerBase &,
|
||||
MCContext *&,
|
||||
raw_ostream &,
|
||||
virtual bool addPassesToEmitMC(PassManagerBase &, MCContext *&, raw_ostream &,
|
||||
bool = true) {
|
||||
return true;
|
||||
}
|
||||
@ -119,7 +113,6 @@ public:
|
||||
CodeGenOpt::Level OL);
|
||||
};
|
||||
|
||||
|
||||
} // end namespace llvm
|
||||
|
||||
#endif
|
||||
|
@ -46,45 +46,43 @@ public:
|
||||
}
|
||||
|
||||
virtual void Initialize(MCContext &ctx, const TargetMachine &TM) {
|
||||
TextSection = new NVPTXSection(MCSection::SV_ELF,
|
||||
SectionKind::getText());
|
||||
DataSection = new NVPTXSection(MCSection::SV_ELF,
|
||||
SectionKind::getDataRel());
|
||||
BSSSection = new NVPTXSection(MCSection::SV_ELF,
|
||||
SectionKind::getBSS());
|
||||
ReadOnlySection = new NVPTXSection(MCSection::SV_ELF,
|
||||
SectionKind::getReadOnly());
|
||||
TextSection = new NVPTXSection(MCSection::SV_ELF, SectionKind::getText());
|
||||
DataSection =
|
||||
new NVPTXSection(MCSection::SV_ELF, SectionKind::getDataRel());
|
||||
BSSSection = new NVPTXSection(MCSection::SV_ELF, SectionKind::getBSS());
|
||||
ReadOnlySection =
|
||||
new NVPTXSection(MCSection::SV_ELF, SectionKind::getReadOnly());
|
||||
|
||||
StaticCtorSection = new NVPTXSection(MCSection::SV_ELF,
|
||||
SectionKind::getMetadata());
|
||||
StaticDtorSection = new NVPTXSection(MCSection::SV_ELF,
|
||||
SectionKind::getMetadata());
|
||||
LSDASection = new NVPTXSection(MCSection::SV_ELF,
|
||||
SectionKind::getMetadata());
|
||||
EHFrameSection = new NVPTXSection(MCSection::SV_ELF,
|
||||
SectionKind::getMetadata());
|
||||
DwarfAbbrevSection = new NVPTXSection(MCSection::SV_ELF,
|
||||
SectionKind::getMetadata());
|
||||
DwarfInfoSection = new NVPTXSection(MCSection::SV_ELF,
|
||||
SectionKind::getMetadata());
|
||||
DwarfLineSection = new NVPTXSection(MCSection::SV_ELF,
|
||||
SectionKind::getMetadata());
|
||||
DwarfFrameSection = new NVPTXSection(MCSection::SV_ELF,
|
||||
SectionKind::getMetadata());
|
||||
DwarfPubTypesSection = new NVPTXSection(MCSection::SV_ELF,
|
||||
SectionKind::getMetadata());
|
||||
DwarfDebugInlineSection = new NVPTXSection(MCSection::SV_ELF,
|
||||
SectionKind::getMetadata());
|
||||
DwarfStrSection = new NVPTXSection(MCSection::SV_ELF,
|
||||
SectionKind::getMetadata());
|
||||
DwarfLocSection = new NVPTXSection(MCSection::SV_ELF,
|
||||
SectionKind::getMetadata());
|
||||
DwarfARangesSection = new NVPTXSection(MCSection::SV_ELF,
|
||||
SectionKind::getMetadata());
|
||||
DwarfRangesSection = new NVPTXSection(MCSection::SV_ELF,
|
||||
SectionKind::getMetadata());
|
||||
DwarfMacroInfoSection = new NVPTXSection(MCSection::SV_ELF,
|
||||
SectionKind::getMetadata());
|
||||
StaticCtorSection =
|
||||
new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
|
||||
StaticDtorSection =
|
||||
new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
|
||||
LSDASection =
|
||||
new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
|
||||
EHFrameSection =
|
||||
new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
|
||||
DwarfAbbrevSection =
|
||||
new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
|
||||
DwarfInfoSection =
|
||||
new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
|
||||
DwarfLineSection =
|
||||
new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
|
||||
DwarfFrameSection =
|
||||
new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
|
||||
DwarfPubTypesSection =
|
||||
new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
|
||||
DwarfDebugInlineSection =
|
||||
new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
|
||||
DwarfStrSection =
|
||||
new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
|
||||
DwarfLocSection =
|
||||
new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
|
||||
DwarfARangesSection =
|
||||
new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
|
||||
DwarfRangesSection =
|
||||
new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
|
||||
DwarfMacroInfoSection =
|
||||
new NVPTXSection(MCSection::SV_ELF, SectionKind::getMetadata());
|
||||
}
|
||||
|
||||
virtual const MCSection *getSectionForConstant(SectionKind Kind) const {
|
||||
@ -93,8 +91,7 @@ public:
|
||||
|
||||
virtual const MCSection *
|
||||
getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind,
|
||||
Mangler *Mang,
|
||||
const TargetMachine &TM) const {
|
||||
Mangler *Mang, const TargetMachine &TM) const {
|
||||
return DataSection;
|
||||
}
|
||||
|
||||
|
@ -34,7 +34,6 @@ typedef std::map<const Module *, global_val_annot_t> per_module_annot_t;
|
||||
|
||||
ManagedStatic<per_module_annot_t> annotationCache;
|
||||
|
||||
|
||||
static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) {
|
||||
assert(md && "Invalid mdnode for annotation");
|
||||
assert((md->getNumOperands() % 2) == 1 && "Invalid number of operands");
|
||||
@ -46,7 +45,7 @@ static void cacheAnnotationFromMD(const MDNode *md, key_val_pair_t &retval) {
|
||||
assert(prop && "Annotation property not a string");
|
||||
|
||||
// value
|
||||
ConstantInt *Val = dyn_cast<ConstantInt>(md->getOperand(i+1));
|
||||
ConstantInt *Val = dyn_cast<ConstantInt>(md->getOperand(i + 1));
|
||||
assert(Val && "Value operand not a constant int");
|
||||
|
||||
std::string keyname = prop->getString().str();
|
||||
@ -120,9 +119,9 @@ bool llvm::findAllNVVMAnnotation(const GlobalValue *gv, std::string prop,
|
||||
bool llvm::isTexture(const llvm::Value &val) {
|
||||
if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) {
|
||||
unsigned annot;
|
||||
if (llvm::findOneNVVMAnnotation(gv,
|
||||
llvm::PropertyAnnotationNames[llvm::PROPERTY_ISTEXTURE],
|
||||
annot)) {
|
||||
if (llvm::findOneNVVMAnnotation(
|
||||
gv, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISTEXTURE],
|
||||
annot)) {
|
||||
assert((annot == 1) && "Unexpected annotation on a texture symbol");
|
||||
return true;
|
||||
}
|
||||
@ -133,9 +132,9 @@ bool llvm::isTexture(const llvm::Value &val) {
|
||||
bool llvm::isSurface(const llvm::Value &val) {
|
||||
if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) {
|
||||
unsigned annot;
|
||||
if (llvm::findOneNVVMAnnotation(gv,
|
||||
llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSURFACE],
|
||||
annot)) {
|
||||
if (llvm::findOneNVVMAnnotation(
|
||||
gv, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSURFACE],
|
||||
annot)) {
|
||||
assert((annot == 1) && "Unexpected annotation on a surface symbol");
|
||||
return true;
|
||||
}
|
||||
@ -146,9 +145,9 @@ bool llvm::isSurface(const llvm::Value &val) {
|
||||
bool llvm::isSampler(const llvm::Value &val) {
|
||||
if (const GlobalValue *gv = dyn_cast<GlobalValue>(&val)) {
|
||||
unsigned annot;
|
||||
if (llvm::findOneNVVMAnnotation(gv,
|
||||
llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER],
|
||||
annot)) {
|
||||
if (llvm::findOneNVVMAnnotation(
|
||||
gv, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER],
|
||||
annot)) {
|
||||
assert((annot == 1) && "Unexpected annotation on a sampler symbol");
|
||||
return true;
|
||||
}
|
||||
@ -156,9 +155,9 @@ bool llvm::isSampler(const llvm::Value &val) {
|
||||
if (const Argument *arg = dyn_cast<Argument>(&val)) {
|
||||
const Function *func = arg->getParent();
|
||||
std::vector<unsigned> annot;
|
||||
if (llvm::findAllNVVMAnnotation(func,
|
||||
llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER],
|
||||
annot)) {
|
||||
if (llvm::findAllNVVMAnnotation(
|
||||
func, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISSAMPLER],
|
||||
annot)) {
|
||||
if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end())
|
||||
return true;
|
||||
}
|
||||
@ -171,8 +170,9 @@ bool llvm::isImageReadOnly(const llvm::Value &val) {
|
||||
const Function *func = arg->getParent();
|
||||
std::vector<unsigned> annot;
|
||||
if (llvm::findAllNVVMAnnotation(func,
|
||||
llvm::PropertyAnnotationNames[llvm::PROPERTY_ISREADONLY_IMAGE_PARAM],
|
||||
annot)) {
|
||||
llvm::PropertyAnnotationNames[
|
||||
llvm::PROPERTY_ISREADONLY_IMAGE_PARAM],
|
||||
annot)) {
|
||||
if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end())
|
||||
return true;
|
||||
}
|
||||
@ -185,8 +185,9 @@ bool llvm::isImageWriteOnly(const llvm::Value &val) {
|
||||
const Function *func = arg->getParent();
|
||||
std::vector<unsigned> annot;
|
||||
if (llvm::findAllNVVMAnnotation(func,
|
||||
llvm::PropertyAnnotationNames[llvm::PROPERTY_ISWRITEONLY_IMAGE_PARAM],
|
||||
annot)) {
|
||||
llvm::PropertyAnnotationNames[
|
||||
llvm::PROPERTY_ISWRITEONLY_IMAGE_PARAM],
|
||||
annot)) {
|
||||
if (std::find(annot.begin(), annot.end(), arg->getArgNo()) != annot.end())
|
||||
return true;
|
||||
}
|
||||
@ -214,52 +215,44 @@ std::string llvm::getSamplerName(const llvm::Value &val) {
|
||||
}
|
||||
|
||||
bool llvm::getMaxNTIDx(const Function &F, unsigned &x) {
|
||||
return (llvm::findOneNVVMAnnotation(&F,
|
||||
llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_X],
|
||||
x));
|
||||
return (llvm::findOneNVVMAnnotation(
|
||||
&F, llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_X], x));
|
||||
}
|
||||
|
||||
bool llvm::getMaxNTIDy(const Function &F, unsigned &y) {
|
||||
return (llvm::findOneNVVMAnnotation(&F,
|
||||
llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Y],
|
||||
y));
|
||||
return (llvm::findOneNVVMAnnotation(
|
||||
&F, llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Y], y));
|
||||
}
|
||||
|
||||
bool llvm::getMaxNTIDz(const Function &F, unsigned &z) {
|
||||
return (llvm::findOneNVVMAnnotation(&F,
|
||||
llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Z],
|
||||
z));
|
||||
return (llvm::findOneNVVMAnnotation(
|
||||
&F, llvm::PropertyAnnotationNames[llvm::PROPERTY_MAXNTID_Z], z));
|
||||
}
|
||||
|
||||
bool llvm::getReqNTIDx(const Function &F, unsigned &x) {
|
||||
return (llvm::findOneNVVMAnnotation(&F,
|
||||
llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_X],
|
||||
x));
|
||||
return (llvm::findOneNVVMAnnotation(
|
||||
&F, llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_X], x));
|
||||
}
|
||||
|
||||
bool llvm::getReqNTIDy(const Function &F, unsigned &y) {
|
||||
return (llvm::findOneNVVMAnnotation(&F,
|
||||
llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Y],
|
||||
y));
|
||||
return (llvm::findOneNVVMAnnotation(
|
||||
&F, llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Y], y));
|
||||
}
|
||||
|
||||
bool llvm::getReqNTIDz(const Function &F, unsigned &z) {
|
||||
return (llvm::findOneNVVMAnnotation(&F,
|
||||
llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Z],
|
||||
z));
|
||||
return (llvm::findOneNVVMAnnotation(
|
||||
&F, llvm::PropertyAnnotationNames[llvm::PROPERTY_REQNTID_Z], z));
|
||||
}
|
||||
|
||||
bool llvm::getMinCTASm(const Function &F, unsigned &x) {
|
||||
return (llvm::findOneNVVMAnnotation(&F,
|
||||
llvm::PropertyAnnotationNames[llvm::PROPERTY_MINNCTAPERSM],
|
||||
x));
|
||||
return (llvm::findOneNVVMAnnotation(
|
||||
&F, llvm::PropertyAnnotationNames[llvm::PROPERTY_MINNCTAPERSM], x));
|
||||
}
|
||||
|
||||
bool llvm::isKernelFunction(const Function &F) {
|
||||
unsigned x = 0;
|
||||
bool retval = llvm::findOneNVVMAnnotation(&F,
|
||||
llvm::PropertyAnnotationNames[llvm::PROPERTY_ISKERNEL_FUNCTION],
|
||||
x);
|
||||
bool retval = llvm::findOneNVVMAnnotation(
|
||||
&F, llvm::PropertyAnnotationNames[llvm::PROPERTY_ISKERNEL_FUNCTION], x);
|
||||
if (retval == false) {
|
||||
// There is no NVVM metadata, check the calling convention
|
||||
if (F.getCallingConv() == llvm::CallingConv::PTX_Kernel)
|
||||
@ -267,20 +260,19 @@ bool llvm::isKernelFunction(const Function &F) {
|
||||
else
|
||||
return false;
|
||||
}
|
||||
return (x==1);
|
||||
return (x == 1);
|
||||
}
|
||||
|
||||
bool llvm::getAlign(const Function &F, unsigned index, unsigned &align) {
|
||||
std::vector<unsigned> Vs;
|
||||
bool retval = llvm::findAllNVVMAnnotation(&F,
|
||||
llvm::PropertyAnnotationNames[llvm::PROPERTY_ALIGN],
|
||||
Vs);
|
||||
bool retval = llvm::findAllNVVMAnnotation(
|
||||
&F, llvm::PropertyAnnotationNames[llvm::PROPERTY_ALIGN], Vs);
|
||||
if (retval == false)
|
||||
return false;
|
||||
for (int i=0, e=Vs.size(); i<e; i++) {
|
||||
for (int i = 0, e = Vs.size(); i < e; i++) {
|
||||
unsigned v = Vs[i];
|
||||
if ( (v >> 16) == index ) {
|
||||
align = v & 0xFFFF;
|
||||
if ((v >> 16) == index) {
|
||||
align = v & 0xFFFF;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
@ -289,16 +281,15 @@ bool llvm::getAlign(const Function &F, unsigned index, unsigned &align) {
|
||||
|
||||
bool llvm::getAlign(const CallInst &I, unsigned index, unsigned &align) {
|
||||
if (MDNode *alignNode = I.getMetadata("callalign")) {
|
||||
for (int i=0, n = alignNode->getNumOperands();
|
||||
i<n; i++) {
|
||||
for (int i = 0, n = alignNode->getNumOperands(); i < n; i++) {
|
||||
if (const ConstantInt *CI =
|
||||
dyn_cast<ConstantInt>(alignNode->getOperand(i))) {
|
||||
dyn_cast<ConstantInt>(alignNode->getOperand(i))) {
|
||||
unsigned v = CI->getZExtValue();
|
||||
if ( (v>>16) == index ) {
|
||||
if ((v >> 16) == index) {
|
||||
align = v & 0xFFFF;
|
||||
return true;
|
||||
}
|
||||
if ( (v>>16) > index ) {
|
||||
if ((v >> 16) > index) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@ -337,8 +328,8 @@ bool llvm::isMemorySpaceTransferIntrinsic(Intrinsic::ID id) {
|
||||
// consider several special intrinsics in striping pointer casts, and
|
||||
// provide an option to ignore GEP indicies for find out the base address only
|
||||
// which could be used in simple alias disambigurate.
|
||||
const Value *llvm::skipPointerTransfer(const Value *V,
|
||||
bool ignore_GEP_indices) {
|
||||
const Value *
|
||||
llvm::skipPointerTransfer(const Value *V, bool ignore_GEP_indices) {
|
||||
V = V->stripPointerCasts();
|
||||
while (true) {
|
||||
if (const IntrinsicInst *IS = dyn_cast<IntrinsicInst>(V)) {
|
||||
@ -360,8 +351,8 @@ const Value *llvm::skipPointerTransfer(const Value *V,
|
||||
// - ignore GEP indicies for find out the base address only, and
|
||||
// - tracking PHINode
|
||||
// which could be used in simple alias disambigurate.
|
||||
const Value *llvm::skipPointerTransfer(const Value *V,
|
||||
std::set<const Value *> &processed) {
|
||||
const Value *
|
||||
llvm::skipPointerTransfer(const Value *V, std::set<const Value *> &processed) {
|
||||
if (processed.find(V) != processed.end())
|
||||
return NULL;
|
||||
processed.insert(V);
|
||||
@ -406,7 +397,6 @@ const Value *llvm::skipPointerTransfer(const Value *V,
|
||||
return V;
|
||||
}
|
||||
|
||||
|
||||
// The following are some useful utilities for debuggung
|
||||
|
||||
BasicBlock *llvm::getParentBlock(Value *v) {
|
||||
|
@ -23,8 +23,7 @@
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
namespace llvm
|
||||
{
|
||||
namespace llvm {
|
||||
|
||||
#define NVCL_IMAGE2D_READONLY_FUNCNAME "__is_image2D_readonly"
|
||||
#define NVCL_IMAGE3D_READONLY_FUNCNAME "__is_image3D_readonly"
|
||||
@ -64,8 +63,7 @@ bool isBarrierIntrinsic(llvm::Intrinsic::ID);
|
||||
/// to pass into type construction of CallInst ctors. This turns a null
|
||||
/// terminated list of pointers (or other value types) into a real live vector.
|
||||
///
|
||||
template<typename T>
|
||||
inline std::vector<T> make_vector(T A, ...) {
|
||||
template <typename T> inline std::vector<T> make_vector(T A, ...) {
|
||||
va_list Args;
|
||||
va_start(Args, A);
|
||||
std::vector<T> Result;
|
||||
@ -78,8 +76,8 @@ inline std::vector<T> make_vector(T A, ...) {
|
||||
|
||||
bool isMemorySpaceTransferIntrinsic(Intrinsic::ID id);
|
||||
const Value *skipPointerTransfer(const Value *V, bool ignore_GEP_indices);
|
||||
const Value *skipPointerTransfer(const Value *V,
|
||||
std::set<const Value *> &processed);
|
||||
const Value *
|
||||
skipPointerTransfer(const Value *V, std::set<const Value *> &processed);
|
||||
BasicBlock *getParentBlock(Value *v);
|
||||
Function *getParentFunction(Value *v);
|
||||
void dumpBlock(Value *v, char *blockName);
|
||||
|
@ -18,8 +18,7 @@ using namespace llvm;
|
||||
|
||||
namespace llvm {
|
||||
|
||||
bool isParamLoad(const MachineInstr *MI)
|
||||
{
|
||||
bool isParamLoad(const MachineInstr *MI) {
|
||||
if ((MI->getOpcode() != NVPTX::LD_i32_avar) &&
|
||||
(MI->getOpcode() != NVPTX::LD_i64_avar))
|
||||
return false;
|
||||
@ -30,13 +29,11 @@ bool isParamLoad(const MachineInstr *MI)
|
||||
return true;
|
||||
}
|
||||
|
||||
#define DATA_MASK 0x7f
|
||||
#define DIGIT_WIDTH 7
|
||||
#define MORE_BYTES 0x80
|
||||
#define DATA_MASK 0x7f
|
||||
#define DIGIT_WIDTH 7
|
||||
#define MORE_BYTES 0x80
|
||||
|
||||
static int encode_leb128(uint64_t val, int *nbytes,
|
||||
char *space, int splen)
|
||||
{
|
||||
static int encode_leb128(uint64_t val, int *nbytes, char *space, int splen) {
|
||||
char *a;
|
||||
char *end = space + splen;
|
||||
|
||||
@ -61,29 +58,30 @@ static int encode_leb128(uint64_t val, int *nbytes,
|
||||
#undef DIGIT_WIDTH
|
||||
#undef MORE_BYTES
|
||||
|
||||
uint64_t encode_leb128(const char *str)
|
||||
{
|
||||
union { uint64_t x; char a[8]; } temp64;
|
||||
uint64_t encode_leb128(const char *str) {
|
||||
union {
|
||||
uint64_t x;
|
||||
char a[8];
|
||||
} temp64;
|
||||
|
||||
temp64.x = 0;
|
||||
|
||||
for (unsigned i=0,e=strlen(str); i!=e; ++i)
|
||||
temp64.a[i] = str[e-1-i];
|
||||
for (unsigned i = 0, e = strlen(str); i != e; ++i)
|
||||
temp64.a[i] = str[e - 1 - i];
|
||||
|
||||
char encoded[16];
|
||||
int nbytes;
|
||||
|
||||
int retval = encode_leb128(temp64.x, &nbytes, encoded, 16);
|
||||
|
||||
(void)retval;
|
||||
assert(retval == 0 &&
|
||||
"Encoding to leb128 failed");
|
||||
(void) retval;
|
||||
assert(retval == 0 && "Encoding to leb128 failed");
|
||||
|
||||
assert(nbytes <= 8 &&
|
||||
"Cannot support register names with leb128 encoding > 8 bytes");
|
||||
|
||||
temp64.x = 0;
|
||||
for (int i=0; i<nbytes; ++i)
|
||||
for (int i = 0; i < nbytes; ++i)
|
||||
temp64.a[i] = encoded[i];
|
||||
|
||||
return temp64.x;
|
||||
|
@ -17,7 +17,7 @@ Target llvm::TheNVPTXTarget64;
|
||||
|
||||
extern "C" void LLVMInitializeNVPTXTargetInfo() {
|
||||
RegisterTarget<Triple::nvptx> X(TheNVPTXTarget32, "nvptx",
|
||||
"NVIDIA PTX 32-bit");
|
||||
"NVIDIA PTX 32-bit");
|
||||
RegisterTarget<Triple::nvptx64> Y(TheNVPTXTarget64, "nvptx64",
|
||||
"NVIDIA PTX 64-bit");
|
||||
"NVIDIA PTX 64-bit");
|
||||
}
|
||||
|
@ -24,22 +24,21 @@ enum {
|
||||
CLK_LUMINANCE = 0x10B9
|
||||
|
||||
#if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1)
|
||||
,
|
||||
,
|
||||
CLK_Rx = 0x10BA,
|
||||
CLK_RGx = 0x10BB,
|
||||
CLK_RGBx = 0x10BC
|
||||
#endif
|
||||
};
|
||||
|
||||
|
||||
typedef enum clk_channel_type {
|
||||
// valid formats for float return types
|
||||
CLK_SNORM_INT8 = 0x10D0, // four channel RGBA unorm8
|
||||
CLK_SNORM_INT16 = 0x10D1, // four channel RGBA unorm16
|
||||
CLK_UNORM_INT8 = 0x10D2, // four channel RGBA unorm8
|
||||
CLK_UNORM_INT16 = 0x10D3, // four channel RGBA unorm16
|
||||
CLK_HALF_FLOAT = 0x10DD, // four channel RGBA half
|
||||
CLK_FLOAT = 0x10DE, // four channel RGBA float
|
||||
CLK_SNORM_INT8 = 0x10D0, // four channel RGBA unorm8
|
||||
CLK_SNORM_INT16 = 0x10D1, // four channel RGBA unorm16
|
||||
CLK_UNORM_INT8 = 0x10D2, // four channel RGBA unorm8
|
||||
CLK_UNORM_INT16 = 0x10D3, // four channel RGBA unorm16
|
||||
CLK_HALF_FLOAT = 0x10DD, // four channel RGBA half
|
||||
CLK_FLOAT = 0x10DE, // four channel RGBA float
|
||||
|
||||
#if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1)
|
||||
CLK_UNORM_SHORT_565 = 0x10D4,
|
||||
@ -48,7 +47,7 @@ typedef enum clk_channel_type {
|
||||
#endif
|
||||
|
||||
// valid only for integer return types
|
||||
CLK_SIGNED_INT8 = 0x10D7,
|
||||
CLK_SIGNED_INT8 = 0x10D7,
|
||||
CLK_SIGNED_INT16 = 0x10D8,
|
||||
CLK_SIGNED_INT32 = 0x10D9,
|
||||
CLK_UNSIGNED_INT8 = 0x10DA,
|
||||
@ -56,70 +55,68 @@ typedef enum clk_channel_type {
|
||||
CLK_UNSIGNED_INT32 = 0x10DC,
|
||||
|
||||
// CI SPI for CPU
|
||||
__CLK_UNORM_INT8888 , // four channel ARGB unorm8
|
||||
__CLK_UNORM_INT8888R, // four channel BGRA unorm8
|
||||
__CLK_UNORM_INT8888, // four channel ARGB unorm8
|
||||
__CLK_UNORM_INT8888R, // four channel BGRA unorm8
|
||||
|
||||
__CLK_VALID_IMAGE_TYPE_COUNT,
|
||||
__CLK_INVALID_IMAGE_TYPE = __CLK_VALID_IMAGE_TYPE_COUNT,
|
||||
__CLK_VALID_IMAGE_TYPE_MASK_BITS = 4, // number of bits required to
|
||||
// represent any image type
|
||||
__CLK_VALID_IMAGE_TYPE_MASK = ( 1 << __CLK_VALID_IMAGE_TYPE_MASK_BITS ) - 1
|
||||
}clk_channel_type;
|
||||
__CLK_VALID_IMAGE_TYPE_MASK_BITS = 4, // number of bits required to
|
||||
// represent any image type
|
||||
__CLK_VALID_IMAGE_TYPE_MASK = (1 << __CLK_VALID_IMAGE_TYPE_MASK_BITS) - 1
|
||||
} clk_channel_type;
|
||||
|
||||
typedef enum clk_sampler_type {
|
||||
__CLK_ADDRESS_BASE = 0,
|
||||
CLK_ADDRESS_NONE = 0 << __CLK_ADDRESS_BASE,
|
||||
CLK_ADDRESS_CLAMP = 1 << __CLK_ADDRESS_BASE,
|
||||
CLK_ADDRESS_CLAMP_TO_EDGE = 2 << __CLK_ADDRESS_BASE,
|
||||
CLK_ADDRESS_REPEAT = 3 << __CLK_ADDRESS_BASE,
|
||||
CLK_ADDRESS_MIRROR = 4 << __CLK_ADDRESS_BASE,
|
||||
__CLK_ADDRESS_BASE = 0,
|
||||
CLK_ADDRESS_NONE = 0 << __CLK_ADDRESS_BASE,
|
||||
CLK_ADDRESS_CLAMP = 1 << __CLK_ADDRESS_BASE,
|
||||
CLK_ADDRESS_CLAMP_TO_EDGE = 2 << __CLK_ADDRESS_BASE,
|
||||
CLK_ADDRESS_REPEAT = 3 << __CLK_ADDRESS_BASE,
|
||||
CLK_ADDRESS_MIRROR = 4 << __CLK_ADDRESS_BASE,
|
||||
|
||||
#if (__NV_CL_C_VERSION >= __NV_CL_C_VERSION_1_1)
|
||||
CLK_ADDRESS_MIRRORED_REPEAT = CLK_ADDRESS_MIRROR,
|
||||
CLK_ADDRESS_MIRRORED_REPEAT = CLK_ADDRESS_MIRROR,
|
||||
#endif
|
||||
__CLK_ADDRESS_MASK = CLK_ADDRESS_NONE | CLK_ADDRESS_CLAMP |
|
||||
CLK_ADDRESS_CLAMP_TO_EDGE |
|
||||
CLK_ADDRESS_REPEAT | CLK_ADDRESS_MIRROR,
|
||||
__CLK_ADDRESS_BITS = 3, // number of bits required to
|
||||
// represent address info
|
||||
__CLK_ADDRESS_MASK =
|
||||
CLK_ADDRESS_NONE | CLK_ADDRESS_CLAMP | CLK_ADDRESS_CLAMP_TO_EDGE |
|
||||
CLK_ADDRESS_REPEAT | CLK_ADDRESS_MIRROR,
|
||||
__CLK_ADDRESS_BITS = 3, // number of bits required to
|
||||
// represent address info
|
||||
|
||||
__CLK_NORMALIZED_BASE = __CLK_ADDRESS_BITS,
|
||||
CLK_NORMALIZED_COORDS_FALSE = 0,
|
||||
CLK_NORMALIZED_COORDS_TRUE = 1 << __CLK_NORMALIZED_BASE,
|
||||
__CLK_NORMALIZED_MASK = CLK_NORMALIZED_COORDS_FALSE |
|
||||
CLK_NORMALIZED_COORDS_TRUE,
|
||||
__CLK_NORMALIZED_BITS = 1, // number of bits required to
|
||||
// represent normalization
|
||||
__CLK_NORMALIZED_BASE = __CLK_ADDRESS_BITS,
|
||||
CLK_NORMALIZED_COORDS_FALSE = 0,
|
||||
CLK_NORMALIZED_COORDS_TRUE = 1 << __CLK_NORMALIZED_BASE,
|
||||
__CLK_NORMALIZED_MASK =
|
||||
CLK_NORMALIZED_COORDS_FALSE | CLK_NORMALIZED_COORDS_TRUE,
|
||||
__CLK_NORMALIZED_BITS = 1, // number of bits required to
|
||||
// represent normalization
|
||||
|
||||
__CLK_FILTER_BASE = __CLK_NORMALIZED_BASE +
|
||||
__CLK_NORMALIZED_BITS,
|
||||
CLK_FILTER_NEAREST = 0 << __CLK_FILTER_BASE,
|
||||
CLK_FILTER_LINEAR = 1 << __CLK_FILTER_BASE,
|
||||
CLK_FILTER_ANISOTROPIC = 2 << __CLK_FILTER_BASE,
|
||||
__CLK_FILTER_MASK = CLK_FILTER_NEAREST | CLK_FILTER_LINEAR |
|
||||
CLK_FILTER_ANISOTROPIC,
|
||||
__CLK_FILTER_BITS = 2, // number of bits required to
|
||||
// represent address info
|
||||
__CLK_FILTER_BASE = __CLK_NORMALIZED_BASE + __CLK_NORMALIZED_BITS,
|
||||
CLK_FILTER_NEAREST = 0 << __CLK_FILTER_BASE,
|
||||
CLK_FILTER_LINEAR = 1 << __CLK_FILTER_BASE,
|
||||
CLK_FILTER_ANISOTROPIC = 2 << __CLK_FILTER_BASE,
|
||||
__CLK_FILTER_MASK =
|
||||
CLK_FILTER_NEAREST | CLK_FILTER_LINEAR | CLK_FILTER_ANISOTROPIC,
|
||||
__CLK_FILTER_BITS = 2, // number of bits required to
|
||||
// represent address info
|
||||
|
||||
__CLK_MIP_BASE = __CLK_FILTER_BASE + __CLK_FILTER_BITS,
|
||||
CLK_MIP_NEAREST = 0 << __CLK_MIP_BASE,
|
||||
CLK_MIP_LINEAR = 1 << __CLK_MIP_BASE,
|
||||
CLK_MIP_ANISOTROPIC = 2 << __CLK_MIP_BASE,
|
||||
__CLK_MIP_MASK = CLK_MIP_NEAREST | CLK_MIP_LINEAR |
|
||||
CLK_MIP_ANISOTROPIC,
|
||||
__CLK_MIP_BITS = 2,
|
||||
__CLK_MIP_BASE = __CLK_FILTER_BASE + __CLK_FILTER_BITS,
|
||||
CLK_MIP_NEAREST = 0 << __CLK_MIP_BASE,
|
||||
CLK_MIP_LINEAR = 1 << __CLK_MIP_BASE,
|
||||
CLK_MIP_ANISOTROPIC = 2 << __CLK_MIP_BASE,
|
||||
__CLK_MIP_MASK = CLK_MIP_NEAREST | CLK_MIP_LINEAR | CLK_MIP_ANISOTROPIC,
|
||||
__CLK_MIP_BITS = 2,
|
||||
|
||||
__CLK_SAMPLER_BITS = __CLK_MIP_BASE + __CLK_MIP_BITS,
|
||||
__CLK_SAMPLER_MASK = __CLK_MIP_MASK | __CLK_FILTER_MASK |
|
||||
__CLK_NORMALIZED_MASK | __CLK_ADDRESS_MASK,
|
||||
__CLK_SAMPLER_BITS = __CLK_MIP_BASE + __CLK_MIP_BITS,
|
||||
__CLK_SAMPLER_MASK = __CLK_MIP_MASK | __CLK_FILTER_MASK |
|
||||
__CLK_NORMALIZED_MASK | __CLK_ADDRESS_MASK,
|
||||
|
||||
__CLK_ANISOTROPIC_RATIO_BITS = 5,
|
||||
__CLK_ANISOTROPIC_RATIO_MASK = (int) 0x80000000 >>
|
||||
(__CLK_ANISOTROPIC_RATIO_BITS-1)
|
||||
__CLK_ANISOTROPIC_RATIO_BITS = 5,
|
||||
__CLK_ANISOTROPIC_RATIO_MASK =
|
||||
(int) 0x80000000 >> (__CLK_ANISOTROPIC_RATIO_BITS - 1)
|
||||
} clk_sampler_type;
|
||||
|
||||
// Memory synchronization
|
||||
#define CLK_LOCAL_MEM_FENCE (1 << 0)
|
||||
#define CLK_GLOBAL_MEM_FENCE (1 << 1)
|
||||
#define CLK_LOCAL_MEM_FENCE (1 << 0)
|
||||
#define CLK_GLOBAL_MEM_FENCE (1 << 1)
|
||||
|
||||
#endif // __CL_COMMON_DEFINES_H__
|
||||
|
Loading…
x
Reference in New Issue
Block a user