mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-26 04:34:41 +00:00
[NVPTX] Re-enable support for virtual registers in the final output
Now that 3.3 is branched, we are re-enabling virtual registers to help iron out bugs before the next release. Some of the post-RA passes do not play well with virtual registers, so we disable them for now. The needed functionality of the PrologEpilogInserter pass is copied to a new backend-specific NVPTXPrologEpilog pass. The test for this commit is not breaking the existing tests. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@182998 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
7ae921dbec
commit
5443e7d790
@ -24,6 +24,7 @@ set(NVPTXCodeGen_sources
|
||||
NVPTXUtilities.cpp
|
||||
NVVMReflect.cpp
|
||||
NVPTXGenericToNVVM.cpp
|
||||
NVPTXPrologEpilogPass.cpp
|
||||
)
|
||||
|
||||
add_llvm_target(NVPTXCodeGen ${NVPTXCodeGen_sources})
|
||||
|
@ -27,6 +27,7 @@
|
||||
namespace llvm {
|
||||
class NVPTXTargetMachine;
|
||||
class FunctionPass;
|
||||
class MachineFunctionPass;
|
||||
class formatted_raw_ostream;
|
||||
|
||||
namespace NVPTXCC {
|
||||
@ -66,6 +67,7 @@ FunctionPass *createNVPTXReMatBlockPass(NVPTXTargetMachine &);
|
||||
ModulePass *createGenericToNVVMPass();
|
||||
ModulePass *createNVVMReflectPass();
|
||||
ModulePass *createNVVMReflectPass(const StringMap<int>& Mapping);
|
||||
MachineFunctionPass *createNVPTXPrologEpilogPass();
|
||||
|
||||
bool isImageOrSamplerVal(const Value *, const Module *);
|
||||
|
||||
|
@ -436,9 +436,7 @@ void NVPTXAsmPrinter::EmitFunctionEntryLabel() {
|
||||
}
|
||||
|
||||
void NVPTXAsmPrinter::EmitFunctionBodyStart() {
|
||||
const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
|
||||
unsigned numRegClasses = TRI.getNumRegClasses();
|
||||
VRidGlobal2LocalMap = new std::map<unsigned, unsigned>[numRegClasses + 1];
|
||||
VRegMapping.clear();
|
||||
OutStreamer.EmitRawText(StringRef("{\n"));
|
||||
setAndEmitFunctionVirtualRegisters(*MF);
|
||||
|
||||
@ -450,7 +448,7 @@ void NVPTXAsmPrinter::EmitFunctionBodyStart() {
|
||||
|
||||
void NVPTXAsmPrinter::EmitFunctionBodyEnd() {
|
||||
OutStreamer.EmitRawText(StringRef("}\n"));
|
||||
delete[] VRidGlobal2LocalMap;
|
||||
VRegMapping.clear();
|
||||
}
|
||||
|
||||
void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
|
||||
@ -507,9 +505,8 @@ void NVPTXAsmPrinter::emitKernelFunctionDirectives(const Function &F,
|
||||
void NVPTXAsmPrinter::getVirtualRegisterName(unsigned vr, bool isVec,
|
||||
raw_ostream &O) {
|
||||
const TargetRegisterClass *RC = MRI->getRegClass(vr);
|
||||
unsigned id = RC->getID();
|
||||
|
||||
std::map<unsigned, unsigned> ®map = VRidGlobal2LocalMap[id];
|
||||
DenseMap<unsigned, unsigned> ®map = VRegMapping[RC];
|
||||
unsigned mapped_vr = regmap[vr];
|
||||
|
||||
if (!isVec) {
|
||||
@ -1709,48 +1706,36 @@ void NVPTXAsmPrinter::setAndEmitFunctionVirtualRegisters(
|
||||
for (unsigned i = 0; i < numVRs; i++) {
|
||||
unsigned int vr = TRI->index2VirtReg(i);
|
||||
const TargetRegisterClass *RC = MRI->getRegClass(vr);
|
||||
std::map<unsigned, unsigned> ®map = VRidGlobal2LocalMap[RC->getID()];
|
||||
DenseMap<unsigned, unsigned> ®map = VRegMapping[RC];
|
||||
int n = regmap.size();
|
||||
regmap.insert(std::make_pair(vr, n + 1));
|
||||
}
|
||||
|
||||
// Emit register declarations
|
||||
// @TODO: Extract out the real register usage
|
||||
O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n";
|
||||
O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n";
|
||||
O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n";
|
||||
O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n";
|
||||
O << "\t.reg .s64 %rl<" << NVPTXNumRegisters << ">;\n";
|
||||
O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n";
|
||||
O << "\t.reg .f64 %fl<" << NVPTXNumRegisters << ">;\n";
|
||||
// O << "\t.reg .pred %p<" << NVPTXNumRegisters << ">;\n";
|
||||
// O << "\t.reg .s16 %rc<" << NVPTXNumRegisters << ">;\n";
|
||||
// O << "\t.reg .s16 %rs<" << NVPTXNumRegisters << ">;\n";
|
||||
// O << "\t.reg .s32 %r<" << NVPTXNumRegisters << ">;\n";
|
||||
// O << "\t.reg .s64 %rl<" << NVPTXNumRegisters << ">;\n";
|
||||
// O << "\t.reg .f32 %f<" << NVPTXNumRegisters << ">;\n";
|
||||
// O << "\t.reg .f64 %fl<" << NVPTXNumRegisters << ">;\n";
|
||||
|
||||
// Emit declaration of the virtual registers or 'physical' registers for
|
||||
// each register class
|
||||
//for (unsigned i=0; i< numRegClasses; i++) {
|
||||
// std::map<unsigned, unsigned> ®map = VRidGlobal2LocalMap[i];
|
||||
// const TargetRegisterClass *RC = TRI->getRegClass(i);
|
||||
// std::string rcname = getNVPTXRegClassName(RC);
|
||||
// std::string rcStr = getNVPTXRegClassStr(RC);
|
||||
// //int n = regmap.size();
|
||||
// if (!isNVPTXVectorRegClass(RC)) {
|
||||
// O << "\t.reg " << rcname << " \t" << rcStr << "<"
|
||||
// << NVPTXNumRegisters << ">;\n";
|
||||
// }
|
||||
for (unsigned i=0; i< TRI->getNumRegClasses(); i++) {
|
||||
const TargetRegisterClass *RC = TRI->getRegClass(i);
|
||||
DenseMap<unsigned, unsigned> ®map = VRegMapping[RC];
|
||||
std::string rcname = getNVPTXRegClassName(RC);
|
||||
std::string rcStr = getNVPTXRegClassStr(RC);
|
||||
int n = regmap.size();
|
||||
|
||||
// Only declare those registers that may be used. And do not emit vector
|
||||
// registers as
|
||||
// they are all elementized to scalar registers.
|
||||
//if (n && !isNVPTXVectorRegClass(RC)) {
|
||||
// if (RegAllocNilUsed) {
|
||||
// O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1)
|
||||
// << ">;\n";
|
||||
// }
|
||||
// else {
|
||||
// O << "\t.reg " << rcname << " \t" << StrToUpper(rcStr)
|
||||
// << "<" << 32 << ">;\n";
|
||||
// }
|
||||
//}
|
||||
//}
|
||||
// Only declare those registers that may be used.
|
||||
if (n) {
|
||||
O << "\t.reg " << rcname << " \t" << rcStr << "<" << (n+1)
|
||||
<< ">;\n";
|
||||
}
|
||||
}
|
||||
|
||||
OutStreamer.EmitRawText(O.str());
|
||||
}
|
||||
|
@ -243,7 +243,9 @@ private:
|
||||
// The contents are specific for each
|
||||
// MachineFunction. But the size of the
|
||||
// array is not.
|
||||
std::map<unsigned, unsigned> *VRidGlobal2LocalMap;
|
||||
typedef DenseMap<unsigned, unsigned> VRegMap;
|
||||
typedef DenseMap<const TargetRegisterClass *, VRegMap> VRegRCMap;
|
||||
VRegRCMap VRegMapping;
|
||||
// cache the subtarget here.
|
||||
const NVPTXSubtarget &nvptxSubtarget;
|
||||
// Build the map between type name and ID based on module's type
|
||||
@ -281,7 +283,6 @@ public:
|
||||
: AsmPrinter(TM, Streamer),
|
||||
nvptxSubtarget(TM.getSubtarget<NVPTXSubtarget>()) {
|
||||
CurrentBankselLabelInBasicBlock = "";
|
||||
VRidGlobal2LocalMap = NULL;
|
||||
reader = NULL;
|
||||
}
|
||||
|
||||
|
@ -32,36 +32,36 @@ NVPTXInstrInfo::NVPTXInstrInfo(NVPTXTargetMachine &tm)
|
||||
void NVPTXInstrInfo::copyPhysReg(
|
||||
MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL,
|
||||
unsigned DestReg, unsigned SrcReg, bool KillSrc) const {
|
||||
if (NVPTX::Int32RegsRegClass.contains(DestReg) &&
|
||||
NVPTX::Int32RegsRegClass.contains(SrcReg))
|
||||
const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
|
||||
const TargetRegisterClass *DestRC = MRI.getRegClass(DestReg);
|
||||
const TargetRegisterClass *SrcRC = MRI.getRegClass(SrcReg);
|
||||
|
||||
if (DestRC != SrcRC)
|
||||
report_fatal_error("Attempted to created cross-class register copy");
|
||||
|
||||
if (DestRC == &NVPTX::Int32RegsRegClass)
|
||||
BuildMI(MBB, I, DL, get(NVPTX::IMOV32rr), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
else if (NVPTX::Int8RegsRegClass.contains(DestReg) &&
|
||||
NVPTX::Int8RegsRegClass.contains(SrcReg))
|
||||
BuildMI(MBB, I, DL, get(NVPTX::IMOV8rr), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
else if (NVPTX::Int1RegsRegClass.contains(DestReg) &&
|
||||
NVPTX::Int1RegsRegClass.contains(SrcReg))
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
else if (DestRC == &NVPTX::Int1RegsRegClass)
|
||||
BuildMI(MBB, I, DL, get(NVPTX::IMOV1rr), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
else if (NVPTX::Float32RegsRegClass.contains(DestReg) &&
|
||||
NVPTX::Float32RegsRegClass.contains(SrcReg))
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
else if (DestRC == &NVPTX::Float32RegsRegClass)
|
||||
BuildMI(MBB, I, DL, get(NVPTX::FMOV32rr), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
else if (NVPTX::Int16RegsRegClass.contains(DestReg) &&
|
||||
NVPTX::Int16RegsRegClass.contains(SrcReg))
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
else if (DestRC == &NVPTX::Int16RegsRegClass)
|
||||
BuildMI(MBB, I, DL, get(NVPTX::IMOV16rr), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
else if (NVPTX::Int64RegsRegClass.contains(DestReg) &&
|
||||
NVPTX::Int64RegsRegClass.contains(SrcReg))
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
else if (DestRC == &NVPTX::Int8RegsRegClass)
|
||||
BuildMI(MBB, I, DL, get(NVPTX::IMOV8rr), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
else if (DestRC == &NVPTX::Int64RegsRegClass)
|
||||
BuildMI(MBB, I, DL, get(NVPTX::IMOV64rr), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
else if (NVPTX::Float64RegsRegClass.contains(DestReg) &&
|
||||
NVPTX::Float64RegsRegClass.contains(SrcReg))
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
else if (DestRC == &NVPTX::Float64RegsRegClass)
|
||||
BuildMI(MBB, I, DL, get(NVPTX::FMOV64rr), DestReg)
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
.addReg(SrcReg, getKillRegState(KillSrc));
|
||||
else {
|
||||
llvm_unreachable("Don't know how to copy a register");
|
||||
llvm_unreachable("Bad register copy");
|
||||
}
|
||||
}
|
||||
|
||||
|
225
lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
Normal file
225
lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp
Normal file
@ -0,0 +1,225 @@
|
||||
//===-- NVPTXPrologEpilogPass.cpp - NVPTX prolog/epilog inserter ----------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file is a copy of the generic LLVM PrologEpilogInserter pass, modified
|
||||
// to remove unneeded functionality and to handle virtual registers. Most code
|
||||
// here is a copy of PrologEpilogInserter.cpp.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "NVPTX.h"
|
||||
#include "llvm/Pass.h"
|
||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/Target/TargetFrameLowering.h"
|
||||
#include "llvm/Target/TargetRegisterInfo.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
namespace {
|
||||
class NVPTXPrologEpilogPass : public MachineFunctionPass {
|
||||
public:
|
||||
static char ID;
|
||||
NVPTXPrologEpilogPass() : MachineFunctionPass(ID) {}
|
||||
|
||||
virtual bool runOnMachineFunction(MachineFunction &MF);
|
||||
|
||||
private:
|
||||
void calculateFrameObjectOffsets(MachineFunction &Fn);
|
||||
};
|
||||
}
|
||||
|
||||
MachineFunctionPass *llvm::createNVPTXPrologEpilogPass() {
|
||||
return new NVPTXPrologEpilogPass();
|
||||
}
|
||||
|
||||
char NVPTXPrologEpilogPass::ID = 0;
|
||||
|
||||
bool NVPTXPrologEpilogPass::runOnMachineFunction(MachineFunction &MF) {
|
||||
const TargetMachine &TM = MF.getTarget();
|
||||
const TargetFrameLowering &TFI = *TM.getFrameLowering();
|
||||
const TargetRegisterInfo &TRI = *TM.getRegisterInfo();
|
||||
bool Modified = false;
|
||||
|
||||
calculateFrameObjectOffsets(MF);
|
||||
|
||||
for (MachineFunction::iterator BB = MF.begin(), E = MF.end(); BB != E; ++BB) {
|
||||
for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) {
|
||||
MachineInstr *MI = I;
|
||||
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
|
||||
if (!MI->getOperand(i).isFI())
|
||||
continue;
|
||||
TRI.eliminateFrameIndex(MI, 0, i, NULL);
|
||||
Modified = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add function prolog/epilog
|
||||
TFI.emitPrologue(MF);
|
||||
|
||||
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
|
||||
// If last instruction is a return instruction, add an epilogue
|
||||
if (!I->empty() && I->back().isReturn())
|
||||
TFI.emitEpilogue(MF, *I);
|
||||
}
|
||||
|
||||
return Modified;
|
||||
}
|
||||
|
||||
/// AdjustStackOffset - Helper function used to adjust the stack frame offset.
|
||||
static inline void
|
||||
AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx,
|
||||
bool StackGrowsDown, int64_t &Offset,
|
||||
unsigned &MaxAlign) {
|
||||
// If the stack grows down, add the object size to find the lowest address.
|
||||
if (StackGrowsDown)
|
||||
Offset += MFI->getObjectSize(FrameIdx);
|
||||
|
||||
unsigned Align = MFI->getObjectAlignment(FrameIdx);
|
||||
|
||||
// If the alignment of this object is greater than that of the stack, then
|
||||
// increase the stack alignment to match.
|
||||
MaxAlign = std::max(MaxAlign, Align);
|
||||
|
||||
// Adjust to alignment boundary.
|
||||
Offset = (Offset + Align - 1) / Align * Align;
|
||||
|
||||
if (StackGrowsDown) {
|
||||
DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << -Offset << "]\n");
|
||||
MFI->setObjectOffset(FrameIdx, -Offset); // Set the computed offset
|
||||
} else {
|
||||
DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") at SP[" << Offset << "]\n");
|
||||
MFI->setObjectOffset(FrameIdx, Offset);
|
||||
Offset += MFI->getObjectSize(FrameIdx);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
NVPTXPrologEpilogPass::calculateFrameObjectOffsets(MachineFunction &Fn) {
|
||||
const TargetFrameLowering &TFI = *Fn.getTarget().getFrameLowering();
|
||||
const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
|
||||
|
||||
bool StackGrowsDown =
|
||||
TFI.getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown;
|
||||
|
||||
// Loop over all of the stack objects, assigning sequential addresses...
|
||||
MachineFrameInfo *MFI = Fn.getFrameInfo();
|
||||
|
||||
// Start at the beginning of the local area.
|
||||
// The Offset is the distance from the stack top in the direction
|
||||
// of stack growth -- so it's always nonnegative.
|
||||
int LocalAreaOffset = TFI.getOffsetOfLocalArea();
|
||||
if (StackGrowsDown)
|
||||
LocalAreaOffset = -LocalAreaOffset;
|
||||
assert(LocalAreaOffset >= 0
|
||||
&& "Local area offset should be in direction of stack growth");
|
||||
int64_t Offset = LocalAreaOffset;
|
||||
|
||||
// If there are fixed sized objects that are preallocated in the local area,
|
||||
// non-fixed objects can't be allocated right at the start of local area.
|
||||
// We currently don't support filling in holes in between fixed sized
|
||||
// objects, so we adjust 'Offset' to point to the end of last fixed sized
|
||||
// preallocated object.
|
||||
for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) {
|
||||
int64_t FixedOff;
|
||||
if (StackGrowsDown) {
|
||||
// The maximum distance from the stack pointer is at lower address of
|
||||
// the object -- which is given by offset. For down growing stack
|
||||
// the offset is negative, so we negate the offset to get the distance.
|
||||
FixedOff = -MFI->getObjectOffset(i);
|
||||
} else {
|
||||
// The maximum distance from the start pointer is at the upper
|
||||
// address of the object.
|
||||
FixedOff = MFI->getObjectOffset(i) + MFI->getObjectSize(i);
|
||||
}
|
||||
if (FixedOff > Offset) Offset = FixedOff;
|
||||
}
|
||||
|
||||
// NOTE: We do not have a call stack
|
||||
|
||||
unsigned MaxAlign = MFI->getMaxAlignment();
|
||||
|
||||
// No scavenger
|
||||
|
||||
// FIXME: Once this is working, then enable flag will change to a target
|
||||
// check for whether the frame is large enough to want to use virtual
|
||||
// frame index registers. Functions which don't want/need this optimization
|
||||
// will continue to use the existing code path.
|
||||
if (MFI->getUseLocalStackAllocationBlock()) {
|
||||
unsigned Align = MFI->getLocalFrameMaxAlign();
|
||||
|
||||
// Adjust to alignment boundary.
|
||||
Offset = (Offset + Align - 1) / Align * Align;
|
||||
|
||||
DEBUG(dbgs() << "Local frame base offset: " << Offset << "\n");
|
||||
|
||||
// Resolve offsets for objects in the local block.
|
||||
for (unsigned i = 0, e = MFI->getLocalFrameObjectCount(); i != e; ++i) {
|
||||
std::pair<int, int64_t> Entry = MFI->getLocalFrameObjectMap(i);
|
||||
int64_t FIOffset = (StackGrowsDown ? -Offset : Offset) + Entry.second;
|
||||
DEBUG(dbgs() << "alloc FI(" << Entry.first << ") at SP[" <<
|
||||
FIOffset << "]\n");
|
||||
MFI->setObjectOffset(Entry.first, FIOffset);
|
||||
}
|
||||
// Allocate the local block
|
||||
Offset += MFI->getLocalFrameSize();
|
||||
|
||||
MaxAlign = std::max(Align, MaxAlign);
|
||||
}
|
||||
|
||||
// No stack protector
|
||||
|
||||
// Then assign frame offsets to stack objects that are not used to spill
|
||||
// callee saved registers.
|
||||
for (unsigned i = 0, e = MFI->getObjectIndexEnd(); i != e; ++i) {
|
||||
if (MFI->isObjectPreAllocated(i) &&
|
||||
MFI->getUseLocalStackAllocationBlock())
|
||||
continue;
|
||||
if (MFI->isDeadObjectIndex(i))
|
||||
continue;
|
||||
|
||||
AdjustStackOffset(MFI, i, StackGrowsDown, Offset, MaxAlign);
|
||||
}
|
||||
|
||||
// No scavenger
|
||||
|
||||
if (!TFI.targetHandlesStackFrameRounding()) {
|
||||
// If we have reserved argument space for call sites in the function
|
||||
// immediately on entry to the current function, count it as part of the
|
||||
// overall stack size.
|
||||
if (MFI->adjustsStack() && TFI.hasReservedCallFrame(Fn))
|
||||
Offset += MFI->getMaxCallFrameSize();
|
||||
|
||||
// Round up the size to a multiple of the alignment. If the function has
|
||||
// any calls or alloca's, align to the target's StackAlignment value to
|
||||
// ensure that the callee's frame or the alloca data is suitably aligned;
|
||||
// otherwise, for leaf functions, align to the TransientStackAlignment
|
||||
// value.
|
||||
unsigned StackAlign;
|
||||
if (MFI->adjustsStack() || MFI->hasVarSizedObjects() ||
|
||||
(RegInfo->needsStackRealignment(Fn) && MFI->getObjectIndexEnd() != 0))
|
||||
StackAlign = TFI.getStackAlignment();
|
||||
else
|
||||
StackAlign = TFI.getTransientStackAlignment();
|
||||
|
||||
// If the frame pointer is eliminated, all frame offsets will be relative to
|
||||
// SP not FP. Align to MaxAlign so this works.
|
||||
StackAlign = std::max(StackAlign, MaxAlign);
|
||||
unsigned AlignMask = StackAlign - 1;
|
||||
Offset = (Offset + AlignMask) & ~uint64_t(AlignMask);
|
||||
}
|
||||
|
||||
// Update frame info to pretend that this is part of the stack...
|
||||
int64_t StackSize = Offset - LocalAreaOffset;
|
||||
MFI->setStackSize(StackSize);
|
||||
}
|
@ -57,9 +57,9 @@ std::string getNVPTXRegClassStr(TargetRegisterClass const *RC) {
|
||||
return "%f";
|
||||
}
|
||||
if (RC == &NVPTX::Float64RegsRegClass) {
|
||||
return "%fd";
|
||||
return "%fl";
|
||||
} else if (RC == &NVPTX::Int64RegsRegClass) {
|
||||
return "%rd";
|
||||
return "%rl";
|
||||
} else if (RC == &NVPTX::Int32RegsRegClass) {
|
||||
return "%r";
|
||||
} else if (RC == &NVPTX::Int16RegsRegClass) {
|
||||
|
@ -107,6 +107,10 @@ public:
|
||||
virtual void addIRPasses();
|
||||
virtual bool addInstSelector();
|
||||
virtual bool addPreRegAlloc();
|
||||
virtual bool addPostRegAlloc();
|
||||
|
||||
virtual void addFastRegAlloc(FunctionPass *RegAllocPass);
|
||||
virtual void addOptimizedRegAlloc(FunctionPass *RegAllocPass);
|
||||
};
|
||||
} // end anonymous namespace
|
||||
|
||||
@ -116,6 +120,15 @@ TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {
|
||||
}
|
||||
|
||||
void NVPTXPassConfig::addIRPasses() {
|
||||
// The following passes are known to not play well with virtual regs hanging
|
||||
// around after register allocation (which in our case, is *all* registers).
|
||||
// We explicitly disable them here. We do, however, need some functionality
|
||||
// of the PrologEpilogCodeInserter pass, so we emulate that behavior in the
|
||||
// NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp).
|
||||
disablePass(&PrologEpilogCodeInserterID);
|
||||
disablePass(&MachineCopyPropagationID);
|
||||
disablePass(&BranchFolderPassID);
|
||||
|
||||
TargetPassConfig::addIRPasses();
|
||||
addPass(createGenericToNVVMPass());
|
||||
}
|
||||
@ -129,3 +142,17 @@ bool NVPTXPassConfig::addInstSelector() {
|
||||
}
|
||||
|
||||
bool NVPTXPassConfig::addPreRegAlloc() { return false; }
|
||||
bool NVPTXPassConfig::addPostRegAlloc() {
|
||||
addPass(createNVPTXPrologEpilogPass());
|
||||
return false;
|
||||
}
|
||||
|
||||
void NVPTXPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
|
||||
// No reg alloc
|
||||
addPass(&StrongPHIEliminationID);
|
||||
}
|
||||
|
||||
void NVPTXPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
|
||||
// No reg alloc
|
||||
addPass(&StrongPHIEliminationID);
|
||||
}
|
||||
|
@ -2,231 +2,231 @@
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
|
||||
|
||||
define ptx_device i32 @test_tid_x() {
|
||||
; CHECK: mov.u32 %r0, %tid.x;
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %tid.x;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.tid.x()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_tid_y() {
|
||||
; CHECK: mov.u32 %r0, %tid.y;
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %tid.y;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.tid.y()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_tid_z() {
|
||||
; CHECK: mov.u32 %r0, %tid.z;
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %tid.z;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.tid.z()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_tid_w() {
|
||||
; CHECK: mov.u32 %r0, %tid.w;
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %tid.w;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.tid.w()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_ntid_x() {
|
||||
; CHECK: mov.u32 %r0, %ntid.x;
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.x;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.ntid.x()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_ntid_y() {
|
||||
; CHECK: mov.u32 %r0, %ntid.y;
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.y;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.ntid.y()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_ntid_z() {
|
||||
; CHECK: mov.u32 %r0, %ntid.z;
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.z;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.ntid.z()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_ntid_w() {
|
||||
; CHECK: mov.u32 %r0, %ntid.w;
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %ntid.w;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.ntid.w()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_laneid() {
|
||||
; CHECK: mov.u32 %r0, %laneid;
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %laneid;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.laneid()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_warpid() {
|
||||
; CHECK: mov.u32 %r0, %warpid;
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %warpid;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.warpid()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_nwarpid() {
|
||||
; CHECK: mov.u32 %r0, %nwarpid;
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %nwarpid;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.nwarpid()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_ctaid_x() {
|
||||
; CHECK: mov.u32 %r0, %ctaid.x;
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.x;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.ctaid.x()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_ctaid_y() {
|
||||
; CHECK: mov.u32 %r0, %ctaid.y;
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.y;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.ctaid.y()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_ctaid_z() {
|
||||
; CHECK: mov.u32 %r0, %ctaid.z;
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.z;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.ctaid.z()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_ctaid_w() {
|
||||
; CHECK: mov.u32 %r0, %ctaid.w;
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %ctaid.w;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.ctaid.w()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_nctaid_x() {
|
||||
; CHECK: mov.u32 %r0, %nctaid.x;
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.x;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.nctaid.x()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_nctaid_y() {
|
||||
; CHECK: mov.u32 %r0, %nctaid.y;
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.y;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.nctaid.y()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_nctaid_z() {
|
||||
; CHECK: mov.u32 %r0, %nctaid.z;
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.z;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.nctaid.z()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_nctaid_w() {
|
||||
; CHECK: mov.u32 %r0, %nctaid.w;
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %nctaid.w;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.nctaid.w()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_smid() {
|
||||
; CHECK: mov.u32 %r0, %smid;
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %smid;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.smid()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_nsmid() {
|
||||
; CHECK: mov.u32 %r0, %nsmid;
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %nsmid;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.nsmid()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_gridid() {
|
||||
; CHECK: mov.u32 %r0, %gridid;
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %gridid;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.gridid()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_lanemask_eq() {
|
||||
; CHECK: mov.u32 %r0, %lanemask_eq;
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_eq;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.lanemask.eq()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_lanemask_le() {
|
||||
; CHECK: mov.u32 %r0, %lanemask_le;
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_le;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.lanemask.le()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_lanemask_lt() {
|
||||
; CHECK: mov.u32 %r0, %lanemask_lt;
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_lt;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.lanemask.lt()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_lanemask_ge() {
|
||||
; CHECK: mov.u32 %r0, %lanemask_ge;
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_ge;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.lanemask.ge()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_lanemask_gt() {
|
||||
; CHECK: mov.u32 %r0, %lanemask_gt;
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %lanemask_gt;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.lanemask.gt()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_clock() {
|
||||
; CHECK: mov.u32 %r0, %clock;
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %clock;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.clock()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i64 @test_clock64() {
|
||||
; CHECK: mov.u64 %rl0, %clock64;
|
||||
; CHECK: mov.u64 %rl{{[0-9]+}}, %clock64;
|
||||
; CHECK: ret;
|
||||
%x = call i64 @llvm.ptx.read.clock64()
|
||||
ret i64 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_pm0() {
|
||||
; CHECK: mov.u32 %r0, %pm0;
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %pm0;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.pm0()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_pm1() {
|
||||
; CHECK: mov.u32 %r0, %pm1;
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %pm1;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.pm1()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_pm2() {
|
||||
; CHECK: mov.u32 %r0, %pm2;
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %pm2;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.pm2()
|
||||
ret i32 %x
|
||||
}
|
||||
|
||||
define ptx_device i32 @test_pm3() {
|
||||
; CHECK: mov.u32 %r0, %pm3;
|
||||
; CHECK: mov.u32 %r{{[0-9]+}}, %pm3;
|
||||
; CHECK: ret;
|
||||
%x = call i32 @llvm.ptx.read.pm3()
|
||||
ret i32 %x
|
||||
|
@ -2,14 +2,14 @@
|
||||
; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 | FileCheck %s
|
||||
|
||||
define ptx_device float @test_fabsf(float %f) {
|
||||
; CHECK: abs.f32 %f0, %f0;
|
||||
; CHECK: abs.f32 %f{{[0-9]+}}, %f{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%x = call float @llvm.fabs.f32(float %f)
|
||||
ret float %x
|
||||
}
|
||||
|
||||
define ptx_device double @test_fabs(double %d) {
|
||||
; CHECK: abs.f64 %fl0, %fl0;
|
||||
; CHECK: abs.f64 %fl{{[0-9]+}}, %fl{{[0-9]+}};
|
||||
; CHECK: ret;
|
||||
%x = call double @llvm.fabs.f64(double %d)
|
||||
ret double %x
|
||||
|
Loading…
x
Reference in New Issue
Block a user