mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-23 20:34:58 +00:00
Fix PR3241: Currently EmitCopyFromReg emits a copy from the physical register to a virtual register unless it requires an expensive cross class copy. That means we are only treating "expensive to copy" register dependency as physical register dependency.
Also future proof the scheduler to handle "normal" physical register dependencies. The code is not exercised yet. llvm-svn: 62074
This commit is contained in:
parent
e6451daef4
commit
5e17ea36e1
@ -485,7 +485,7 @@ namespace llvm {
|
||||
protected:
|
||||
void AddMemOperand(MachineInstr *MI, const MachineMemOperand &MO);
|
||||
|
||||
void EmitCrossRCCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap);
|
||||
void EmitPhysRegCopy(SUnit *SU, DenseMap<SUnit*, unsigned> &VRBaseMap);
|
||||
|
||||
/// ForceUnitLatencies - Return true if all scheduling edges should be given a
|
||||
/// latency value of one. The default is to return false; schedulers may
|
||||
|
@ -36,7 +36,7 @@ void ScheduleDAG::EmitNoop() {
|
||||
TII->insertNoop(*BB, BB->end());
|
||||
}
|
||||
|
||||
void ScheduleDAG::EmitCrossRCCopy(SUnit *SU,
|
||||
void ScheduleDAG::EmitPhysRegCopy(SUnit *SU,
|
||||
DenseMap<SUnit*, unsigned> &VRBaseMap) {
|
||||
for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
|
||||
I != E; ++I) {
|
||||
@ -49,12 +49,11 @@ void ScheduleDAG::EmitCrossRCCopy(SUnit *SU,
|
||||
unsigned Reg = 0;
|
||||
for (SUnit::const_succ_iterator II = SU->Succs.begin(),
|
||||
EE = SU->Succs.end(); II != EE; ++II) {
|
||||
if (I->getReg()) {
|
||||
Reg = I->getReg();
|
||||
if (II->getReg()) {
|
||||
Reg = II->getReg();
|
||||
break;
|
||||
}
|
||||
}
|
||||
assert(I->getReg() && "Unknown physical register!");
|
||||
TII->copyRegToReg(*BB, BB->end(), Reg, VRI->second,
|
||||
SU->CopyDstRC, SU->CopySrcRC);
|
||||
} else {
|
||||
|
@ -28,7 +28,7 @@ using namespace llvm;
|
||||
|
||||
STATISTIC(NumUnfolds, "Number of nodes unfolded");
|
||||
STATISTIC(NumDups, "Number of duplicated nodes");
|
||||
STATISTIC(NumCCCopies, "Number of cross class copies");
|
||||
STATISTIC(NumPRCopies, "Number of physical copies");
|
||||
|
||||
static RegisterScheduler
|
||||
fastDAGScheduler("fast", "Fast suboptimal list scheduling",
|
||||
@ -93,10 +93,10 @@ private:
|
||||
void ReleasePred(SUnit *SU, SDep *PredEdge);
|
||||
void ScheduleNodeBottomUp(SUnit*, unsigned);
|
||||
SUnit *CopyAndMoveSuccessors(SUnit*);
|
||||
void InsertCCCopiesAndMoveSuccs(SUnit*, unsigned,
|
||||
const TargetRegisterClass*,
|
||||
const TargetRegisterClass*,
|
||||
SmallVector<SUnit*, 2>&);
|
||||
void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
|
||||
const TargetRegisterClass*,
|
||||
const TargetRegisterClass*,
|
||||
SmallVector<SUnit*, 2>&);
|
||||
bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
|
||||
void ListScheduleBottomUp();
|
||||
|
||||
@ -361,17 +361,16 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) {
|
||||
DelDeps.push_back(std::make_pair(SuccSU, D));
|
||||
}
|
||||
}
|
||||
for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) {
|
||||
for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
|
||||
RemovePred(DelDeps[i].first, DelDeps[i].second);
|
||||
}
|
||||
|
||||
++NumDups;
|
||||
return NewSU;
|
||||
}
|
||||
|
||||
/// InsertCCCopiesAndMoveSuccs - Insert expensive cross register class copies
|
||||
/// and move all scheduled successors of the given SUnit to the last copy.
|
||||
void ScheduleDAGFast::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
|
||||
/// InsertCopiesAndMoveSuccs - Insert register copies and move all
|
||||
/// scheduled successors of the given SUnit to the last copy.
|
||||
void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
|
||||
const TargetRegisterClass *DestRC,
|
||||
const TargetRegisterClass *SrcRC,
|
||||
SmallVector<SUnit*, 2> &Copies) {
|
||||
@ -408,7 +407,7 @@ void ScheduleDAGFast::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
|
||||
Copies.push_back(CopyFromSU);
|
||||
Copies.push_back(CopyToSU);
|
||||
|
||||
++NumCCCopies;
|
||||
++NumPRCopies;
|
||||
}
|
||||
|
||||
/// getPhysicalRegisterVT - Returns the ValueType of the physical register
|
||||
@ -524,19 +523,22 @@ void ScheduleDAGFast::ListScheduleBottomUp() {
|
||||
assert(LRegs.size() == 1 && "Can't handle this yet!");
|
||||
unsigned Reg = LRegs[0];
|
||||
SUnit *LRDef = LiveRegDefs[Reg];
|
||||
SUnit *NewDef = CopyAndMoveSuccessors(LRDef);
|
||||
MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
|
||||
const TargetRegisterClass *RC =
|
||||
TRI->getPhysicalRegisterRegClass(Reg, VT);
|
||||
const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
|
||||
|
||||
// If cross copy register class is null, then it must be possible copy
|
||||
// the value directly. Do not try duplicate the def.
|
||||
SUnit *NewDef = 0;
|
||||
if (DestRC)
|
||||
NewDef = CopyAndMoveSuccessors(LRDef);
|
||||
else
|
||||
DestRC = RC;
|
||||
if (!NewDef) {
|
||||
// Issue expensive cross register class copies.
|
||||
MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
|
||||
const TargetRegisterClass *RC =
|
||||
TRI->getPhysicalRegisterRegClass(Reg, VT);
|
||||
const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
|
||||
if (!DestRC) {
|
||||
assert(false && "Don't know how to copy this physical register!");
|
||||
abort();
|
||||
}
|
||||
// Issue copies, these can be expensive cross register class copies.
|
||||
SmallVector<SUnit*, 2> Copies;
|
||||
InsertCCCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
|
||||
InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
|
||||
DOUT << "Adding an edge from SU # " << TrySU->NodeNum
|
||||
<< " to SU #" << Copies.front()->NodeNum << "\n";
|
||||
AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
|
||||
|
@ -35,7 +35,7 @@ using namespace llvm;
|
||||
STATISTIC(NumBacktracks, "Number of times scheduler backtracked");
|
||||
STATISTIC(NumUnfolds, "Number of nodes unfolded");
|
||||
STATISTIC(NumDups, "Number of duplicated nodes");
|
||||
STATISTIC(NumCCCopies, "Number of cross class copies");
|
||||
STATISTIC(NumPRCopies, "Number of physical register copies");
|
||||
|
||||
static RegisterScheduler
|
||||
burrListDAGScheduler("list-burr",
|
||||
@ -121,10 +121,10 @@ private:
|
||||
void UnscheduleNodeBottomUp(SUnit*);
|
||||
void BacktrackBottomUp(SUnit*, unsigned, unsigned&);
|
||||
SUnit *CopyAndMoveSuccessors(SUnit*);
|
||||
void InsertCCCopiesAndMoveSuccs(SUnit*, unsigned,
|
||||
const TargetRegisterClass*,
|
||||
const TargetRegisterClass*,
|
||||
SmallVector<SUnit*, 2>&);
|
||||
void InsertCopiesAndMoveSuccs(SUnit*, unsigned,
|
||||
const TargetRegisterClass*,
|
||||
const TargetRegisterClass*,
|
||||
SmallVector<SUnit*, 2>&);
|
||||
bool DelayForLiveRegsBottomUp(SUnit*, SmallVector<unsigned, 4>&);
|
||||
void ListScheduleTopDown();
|
||||
void ListScheduleBottomUp();
|
||||
@ -517,11 +517,11 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) {
|
||||
return NewSU;
|
||||
}
|
||||
|
||||
/// InsertCCCopiesAndMoveSuccs - Insert expensive cross register class copies
|
||||
/// and move all scheduled successors of the given SUnit to the last copy.
|
||||
void ScheduleDAGRRList::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
|
||||
const TargetRegisterClass *DestRC,
|
||||
const TargetRegisterClass *SrcRC,
|
||||
/// InsertCopiesAndMoveSuccs - Insert register copies and move all
|
||||
/// scheduled successors of the given SUnit to the last copy.
|
||||
void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
|
||||
const TargetRegisterClass *DestRC,
|
||||
const TargetRegisterClass *SrcRC,
|
||||
SmallVector<SUnit*, 2> &Copies) {
|
||||
SUnit *CopyFromSU = CreateNewSUnit(NULL);
|
||||
CopyFromSU->CopySrcRC = SrcRC;
|
||||
@ -546,9 +546,8 @@ void ScheduleDAGRRList::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
|
||||
DelDeps.push_back(std::make_pair(SuccSU, *I));
|
||||
}
|
||||
}
|
||||
for (unsigned i = 0, e = DelDeps.size(); i != e; ++i) {
|
||||
for (unsigned i = 0, e = DelDeps.size(); i != e; ++i)
|
||||
RemovePred(DelDeps[i].first, DelDeps[i].second);
|
||||
}
|
||||
|
||||
AddPred(CopyFromSU, SDep(SU, SDep::Data, SU->Latency, Reg));
|
||||
AddPred(CopyToSU, SDep(CopyFromSU, SDep::Data, CopyFromSU->Latency, 0));
|
||||
@ -559,7 +558,7 @@ void ScheduleDAGRRList::InsertCCCopiesAndMoveSuccs(SUnit *SU, unsigned Reg,
|
||||
Copies.push_back(CopyFromSU);
|
||||
Copies.push_back(CopyToSU);
|
||||
|
||||
++NumCCCopies;
|
||||
++NumPRCopies;
|
||||
}
|
||||
|
||||
/// getPhysicalRegisterVT - Returns the ValueType of the physical register
|
||||
@ -705,27 +704,32 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
|
||||
}
|
||||
|
||||
if (!CurSU) {
|
||||
// Can't backtrack. Try duplicating the nodes that produces these
|
||||
// "expensive to copy" values to break the dependency. In case even
|
||||
// that doesn't work, insert cross class copies.
|
||||
// Can't backtrack. If it's too expensive to copy the value, then try
|
||||
// duplicate the nodes that produces these "too expensive to copy"
|
||||
// values to break the dependency. In case even that doesn't work,
|
||||
// insert cross class copies.
|
||||
// If it's not too expensive, i.e. cost != -1, issue copies.
|
||||
SUnit *TrySU = NotReady[0];
|
||||
SmallVector<unsigned, 4> &LRegs = LRegsMap[TrySU];
|
||||
assert(LRegs.size() == 1 && "Can't handle this yet!");
|
||||
unsigned Reg = LRegs[0];
|
||||
SUnit *LRDef = LiveRegDefs[Reg];
|
||||
SUnit *NewDef = CopyAndMoveSuccessors(LRDef);
|
||||
MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
|
||||
const TargetRegisterClass *RC =
|
||||
TRI->getPhysicalRegisterRegClass(Reg, VT);
|
||||
const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
|
||||
|
||||
// If cross copy register class is null, then it must be possible copy
|
||||
// the value directly. Do not try duplicate the def.
|
||||
SUnit *NewDef = 0;
|
||||
if (DestRC)
|
||||
NewDef = CopyAndMoveSuccessors(LRDef);
|
||||
else
|
||||
DestRC = RC;
|
||||
if (!NewDef) {
|
||||
// Issue expensive cross register class copies.
|
||||
MVT VT = getPhysicalRegisterVT(LRDef->getNode(), Reg, TII);
|
||||
const TargetRegisterClass *RC =
|
||||
TRI->getPhysicalRegisterRegClass(Reg, VT);
|
||||
const TargetRegisterClass *DestRC = TRI->getCrossCopyRegClass(RC);
|
||||
if (!DestRC) {
|
||||
assert(false && "Don't know how to copy this physical register!");
|
||||
abort();
|
||||
}
|
||||
// Issue copies, these can be expensive cross register class copies.
|
||||
SmallVector<SUnit*, 2> Copies;
|
||||
InsertCCCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
|
||||
InsertCopiesAndMoveSuccs(LRDef, Reg, DestRC, RC, Copies);
|
||||
DOUT << "Adding an edge from SU #" << TrySU->NodeNum
|
||||
<< " to SU #" << Copies.front()->NodeNum << "\n";
|
||||
AddPred(TrySU, SDep(Copies.front(), SDep::Order, /*Latency=*/1,
|
||||
|
@ -39,11 +39,11 @@ SUnit *ScheduleDAGSDNodes::Clone(SUnit *Old) {
|
||||
|
||||
/// CheckForPhysRegDependency - Check if the dependency between def and use of
|
||||
/// a specified operand is a physical register dependency. If so, returns the
|
||||
/// register.
|
||||
/// register and the cost of copying the register.
|
||||
static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
|
||||
const TargetRegisterInfo *TRI,
|
||||
const TargetInstrInfo *TII,
|
||||
unsigned &PhysReg) {
|
||||
unsigned &PhysReg, int &Cost) {
|
||||
if (Op != 2 || User->getOpcode() != ISD::CopyToReg)
|
||||
return;
|
||||
|
||||
@ -55,8 +55,12 @@ static void CheckForPhysRegDependency(SDNode *Def, SDNode *User, unsigned Op,
|
||||
if (Def->isMachineOpcode()) {
|
||||
const TargetInstrDesc &II = TII->get(Def->getMachineOpcode());
|
||||
if (ResNo >= II.getNumDefs() &&
|
||||
II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg)
|
||||
II.ImplicitDefs[ResNo - II.getNumDefs()] == Reg) {
|
||||
PhysReg = Reg;
|
||||
const TargetRegisterClass *RC =
|
||||
TRI->getPhysicalRegisterRegClass(Reg, Def->getValueType(ResNo));
|
||||
Cost = RC->getCopyCost();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -179,10 +183,18 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
|
||||
bool isChain = OpVT == MVT::Other;
|
||||
|
||||
unsigned PhysReg = 0;
|
||||
int Cost = 1;
|
||||
// Determine if this is a physical register dependency.
|
||||
CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg);
|
||||
CheckForPhysRegDependency(OpN, N, i, TRI, TII, PhysReg, Cost);
|
||||
assert((PhysReg == 0 || !isChain) &&
|
||||
"Chain dependence via physreg data?");
|
||||
// FIXME: See ScheduleDAGSDNodes::EmitCopyFromReg. For now, scheduler
|
||||
// emits a copy from the physical register to a virtual register unless
|
||||
// it requires a cross class copy (cost < 0). That means we are only
|
||||
// treating "expensive to copy" register dependency as physical register
|
||||
// dependency. This may change in the future though.
|
||||
if (Cost >= 0)
|
||||
PhysReg = 0;
|
||||
SU->addPred(SDep(OpSU, isChain ? SDep::Order : SDep::Data,
|
||||
OpSU->Latency, PhysReg));
|
||||
}
|
||||
@ -252,10 +264,12 @@ unsigned ScheduleDAGSDNodes::ComputeMemOperandsEnd(SDNode *Node) {
|
||||
|
||||
|
||||
void ScheduleDAGSDNodes::dumpNode(const SUnit *SU) const {
|
||||
if (SU->getNode())
|
||||
SU->getNode()->dump(DAG);
|
||||
else
|
||||
cerr << "CROSS RC COPY ";
|
||||
if (!SU->getNode()) {
|
||||
cerr << "PHYS REG COPY\n";
|
||||
return;
|
||||
}
|
||||
|
||||
SU->getNode()->dump(DAG);
|
||||
cerr << "\n";
|
||||
SmallVector<SDNode *, 4> FlaggedNodes;
|
||||
for (SDNode *N = SU->getNode()->getFlaggedNode(); N; N = N->getFlaggedNode())
|
||||
|
@ -629,6 +629,12 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
|
||||
|
||||
// For pre-regalloc scheduling, create instructions corresponding to the
|
||||
// SDNode and any flagged SDNodes and append them to the block.
|
||||
if (!SU->getNode()) {
|
||||
// Emit a copy.
|
||||
EmitPhysRegCopy(SU, CopyVRBaseMap);
|
||||
continue;
|
||||
}
|
||||
|
||||
SmallVector<SDNode *, 4> FlaggedNodes;
|
||||
for (SDNode *N = SU->getNode()->getFlaggedNode(); N; N = N->getFlaggedNode())
|
||||
FlaggedNodes.push_back(N);
|
||||
@ -636,10 +642,7 @@ MachineBasicBlock *ScheduleDAGSDNodes::EmitSchedule() {
|
||||
EmitNode(FlaggedNodes.back(), SU->OrigNode != SU, VRBaseMap);
|
||||
FlaggedNodes.pop_back();
|
||||
}
|
||||
if (!SU->getNode())
|
||||
EmitCrossRCCopy(SU, CopyVRBaseMap);
|
||||
else
|
||||
EmitNode(SU->getNode(), SU->OrigNode != SU, VRBaseMap);
|
||||
EmitNode(SU->getNode(), SU->OrigNode != SU, VRBaseMap);
|
||||
}
|
||||
|
||||
return BB;
|
||||
|
26
test/CodeGen/X86/pr3244.ll
Normal file
26
test/CodeGen/X86/pr3244.ll
Normal file
@ -0,0 +1,26 @@
|
||||
; RUN: llvm-as < %s | llc -march=x86
|
||||
; PR3244
|
||||
|
||||
@g_62 = external global i16 ; <i16*> [#uses=1]
|
||||
@g_487 = external global i32 ; <i32*> [#uses=1]
|
||||
|
||||
define i32 @func_42(i32 %p_43, i32 %p_44, i32 %p_45, i32 %p_46) nounwind {
|
||||
entry:
|
||||
%0 = load i16* @g_62, align 2 ; <i16> [#uses=1]
|
||||
%1 = load i32* @g_487, align 4 ; <i32> [#uses=1]
|
||||
%2 = trunc i16 %0 to i8 ; <i8> [#uses=1]
|
||||
%3 = trunc i32 %1 to i8 ; <i8> [#uses=1]
|
||||
%4 = tail call i32 (...)* @func_7(i64 -4455561449541442965, i32 1)
|
||||
nounwind ; <i32> [#uses=1]
|
||||
%5 = trunc i32 %4 to i8 ; <i8> [#uses=1]
|
||||
%6 = mul i8 %3, %2 ; <i8> [#uses=1]
|
||||
%7 = mul i8 %6, %5 ; <i8> [#uses=1]
|
||||
%8 = sext i8 %7 to i16 ; <i16> [#uses=1]
|
||||
%9 = tail call i32 @func_85(i16 signext %8, i32 1, i32 1) nounwind
|
||||
; <i32> [#uses=0]
|
||||
ret i32 undef
|
||||
}
|
||||
|
||||
declare i32 @func_7(...)
|
||||
|
||||
declare i32 @func_85(i16 signext, i32, i32)
|
Loading…
x
Reference in New Issue
Block a user