mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-13 06:29:59 +00:00
Fixup register kills after scheduling.
llvm-svn: 80002
This commit is contained in:
parent
b388728ba7
commit
047f69da86
@ -40,6 +40,7 @@
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/ADT/Statistic.h"
|
||||
#include <map>
|
||||
#include <set>
|
||||
using namespace llvm;
|
||||
|
||||
STATISTIC(NumNoops, "Number of noops inserted");
|
||||
@ -140,6 +141,11 @@ namespace {
|
||||
/// Schedule - Schedule the instruction range using list scheduling.
|
||||
///
|
||||
void Schedule();
|
||||
|
||||
/// FixupKills - Fix register kill flags that have been made
|
||||
/// invalid due to scheduling
|
||||
///
|
||||
void FixupKills(MachineBasicBlock *MBB);
|
||||
|
||||
/// Observe - Update liveness information to account for the current
|
||||
/// instruction, which will not be scheduled.
|
||||
@ -150,6 +156,11 @@ namespace {
|
||||
///
|
||||
void FinishBlock();
|
||||
|
||||
/// GenerateLivenessForKills - If true then generate Def/Kill
|
||||
/// information for use in updating register kill. If false then
|
||||
/// generate Def/Kill information for anti-dependence breaking.
|
||||
bool GenerateLivenessForKills;
|
||||
|
||||
private:
|
||||
void PrescanInstruction(MachineInstr *MI);
|
||||
void ScanInstruction(MachineInstr *MI, unsigned Count);
|
||||
@ -202,6 +213,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
|
||||
for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
|
||||
MBB != MBBe; ++MBB) {
|
||||
// Initialize register live-range state for scheduling in this block.
|
||||
Scheduler.GenerateLivenessForKills = false;
|
||||
Scheduler.StartBlock(MBB);
|
||||
|
||||
// Schedule each sequence of instructions not interrupted by a label
|
||||
@ -228,6 +240,12 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
|
||||
|
||||
// Clean up register live-range state.
|
||||
Scheduler.FinishBlock();
|
||||
|
||||
// Initialize register live-range state again and update register kills
|
||||
Scheduler.GenerateLivenessForKills = true;
|
||||
Scheduler.StartBlock(MBB);
|
||||
Scheduler.FixupKills(MBB);
|
||||
Scheduler.FinishBlock();
|
||||
}
|
||||
|
||||
return true;
|
||||
@ -287,26 +305,28 @@ void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) {
|
||||
}
|
||||
}
|
||||
|
||||
// Consider callee-saved registers as live-out, since we're running after
|
||||
// prologue/epilogue insertion so there's no way to add additional
|
||||
// saved registers.
|
||||
//
|
||||
// TODO: If the callee saves and restores these, then we can potentially
|
||||
// use them between the save and the restore. To do that, we could scan
|
||||
// the exit blocks to see which of these registers are defined.
|
||||
// Alternatively, callee-saved registers that aren't saved and restored
|
||||
// could be marked live-in in every block.
|
||||
for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) {
|
||||
unsigned Reg = *I;
|
||||
Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
|
||||
KillIndices[Reg] = BB->size();
|
||||
DefIndices[Reg] = ~0u;
|
||||
// Repeat, for all aliases.
|
||||
for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
|
||||
unsigned AliasReg = *Alias;
|
||||
Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
|
||||
KillIndices[AliasReg] = BB->size();
|
||||
DefIndices[AliasReg] = ~0u;
|
||||
if (!GenerateLivenessForKills) {
|
||||
// Consider callee-saved registers as live-out, since we're running after
|
||||
// prologue/epilogue insertion so there's no way to add additional
|
||||
// saved registers.
|
||||
//
|
||||
// TODO: If the callee saves and restores these, then we can potentially
|
||||
// use them between the save and the restore. To do that, we could scan
|
||||
// the exit blocks to see which of these registers are defined.
|
||||
// Alternatively, callee-saved registers that aren't saved and restored
|
||||
// could be marked live-in in every block.
|
||||
for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) {
|
||||
unsigned Reg = *I;
|
||||
Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
|
||||
KillIndices[Reg] = BB->size();
|
||||
DefIndices[Reg] = ~0u;
|
||||
// Repeat, for all aliases.
|
||||
for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
|
||||
unsigned AliasReg = *Alias;
|
||||
Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
|
||||
KillIndices[AliasReg] = BB->size();
|
||||
DefIndices[AliasReg] = ~0u;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -467,11 +487,17 @@ void SchedulePostRATDList::ScanInstruction(MachineInstr *MI,
|
||||
Classes[SubregReg] = 0;
|
||||
RegRefs.erase(SubregReg);
|
||||
}
|
||||
// Conservatively mark super-registers as unusable.
|
||||
// Conservatively mark super-registers as unusable. If
|
||||
// initializing for kill updating, then mark all supers as defined
|
||||
// as well.
|
||||
for (const unsigned *Super = TRI->getSuperRegisters(Reg);
|
||||
*Super; ++Super) {
|
||||
unsigned SuperReg = *Super;
|
||||
Classes[SuperReg] = reinterpret_cast<TargetRegisterClass *>(-1);
|
||||
if (GenerateLivenessForKills) {
|
||||
DefIndices[SuperReg] = Count;
|
||||
KillIndices[SuperReg] = ~0u;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
|
||||
@ -753,6 +779,53 @@ bool SchedulePostRATDList::BreakAntiDependencies() {
|
||||
return Changed;
|
||||
}
|
||||
|
||||
/// FixupKills - Fix the register kill flags, they may have been made
|
||||
/// incorrect by instruction reordering.
|
||||
///
|
||||
void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
|
||||
DEBUG(errs() << "Fixup kills for BB ID#" << MBB->getNumber() << '\n');
|
||||
|
||||
std::set<unsigned> killedRegs;
|
||||
BitVector ReservedRegs = TRI->getReservedRegs(MF);
|
||||
|
||||
unsigned Count = MBB->size();
|
||||
for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin();
|
||||
I != E; --Count) {
|
||||
MachineInstr *MI = --I;
|
||||
|
||||
// After regalloc, IMPLICIT_DEF instructions aren't safe to treat as
|
||||
// dependence-breaking. In the case of an INSERT_SUBREG, the IMPLICIT_DEF
|
||||
// is left behind appearing to clobber the super-register, while the
|
||||
// subregister needs to remain live. So we just ignore them.
|
||||
if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF)
|
||||
continue;
|
||||
|
||||
PrescanInstruction(MI);
|
||||
ScanInstruction(MI, Count);
|
||||
|
||||
// Examine all used registers and set kill flag. When a register
|
||||
// is used multiple times we only set the kill flag on the first
|
||||
// use.
|
||||
killedRegs.clear();
|
||||
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
|
||||
MachineOperand &MO = MI->getOperand(i);
|
||||
if (!MO.isReg() || !MO.isUse()) continue;
|
||||
unsigned Reg = MO.getReg();
|
||||
if ((Reg == 0) || ReservedRegs.test(Reg)) continue;
|
||||
|
||||
bool kill = ((KillIndices[Reg] == Count) &&
|
||||
(killedRegs.find(Reg) == killedRegs.end()));
|
||||
if (MO.isKill() != kill) {
|
||||
MO.setIsKill(kill);
|
||||
DEBUG(errs() << "Fixed " << MO << " in ");
|
||||
DEBUG(MI->dump());
|
||||
}
|
||||
|
||||
killedRegs.insert(Reg);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Top-Down Scheduling
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
40
test/CodeGen/ARM/2009-08-21-PostRAKill.ll
Normal file
40
test/CodeGen/ARM/2009-08-21-PostRAKill.ll
Normal file
@ -0,0 +1,40 @@
|
||||
; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 -mcpu=cortex-a8 -disable-post-RA-scheduler=0 -avoid-hazards
|
||||
|
||||
; ModuleID = '<stdin>'
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
|
||||
target triple = "armv7-apple-darwin9"
|
||||
|
||||
%struct.tree = type { i32, double, double, %struct.tree*, %struct.tree*, %struct.tree*, %struct.tree* }
|
||||
@g = common global %struct.tree* null
|
||||
|
||||
define arm_apcscc %struct.tree* @tsp(%struct.tree* %t, i32 %nproc) nounwind {
|
||||
entry:
|
||||
%t.idx51.val.i = load double* null ; <double> [#uses=1]
|
||||
br i1 undef, label %bb4.i, label %bb.i
|
||||
|
||||
bb.i: ; preds = %entry
|
||||
unreachable
|
||||
|
||||
bb4.i: ; preds = %entry
|
||||
%0 = load %struct.tree** @g, align 4 ; <%struct.tree*> [#uses=2]
|
||||
%.idx45.i = getelementptr %struct.tree* %0, i32 0, i32 1 ; <double*> [#uses=1]
|
||||
%.idx45.val.i = load double* %.idx45.i ; <double> [#uses=1]
|
||||
%.idx46.i = getelementptr %struct.tree* %0, i32 0, i32 2 ; <double*> [#uses=1]
|
||||
%.idx46.val.i = load double* %.idx46.i ; <double> [#uses=1]
|
||||
%1 = fsub double 0.000000e+00, %.idx45.val.i ; <double> [#uses=2]
|
||||
%2 = fmul double %1, %1 ; <double> [#uses=1]
|
||||
%3 = fsub double %t.idx51.val.i, %.idx46.val.i ; <double> [#uses=2]
|
||||
%4 = fmul double %3, %3 ; <double> [#uses=1]
|
||||
%5 = fadd double %2, %4 ; <double> [#uses=1]
|
||||
%6 = tail call double @llvm.sqrt.f64(double %5) nounwind ; <double> [#uses=1]
|
||||
br i1 undef, label %bb7.i4, label %bb6.i
|
||||
|
||||
bb6.i: ; preds = %bb4.i
|
||||
br label %bb7.i4
|
||||
|
||||
bb7.i4: ; preds = %bb6.i, %bb4.i
|
||||
%tton1.0.i = phi double [ %6, %bb6.i ], [ undef, %bb4.i ] ; <double> [#uses=0]
|
||||
unreachable
|
||||
}
|
||||
|
||||
declare double @llvm.sqrt.f64(double) nounwind readonly
|
38
test/CodeGen/ARM/2009-08-21-PostRAKill2.ll
Normal file
38
test/CodeGen/ARM/2009-08-21-PostRAKill2.ll
Normal file
@ -0,0 +1,38 @@
|
||||
; RUN: llvm-as < %s | llc -asm-verbose=false -O3 -relocation-model=pic -disable-fp-elim -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -disable-post-RA-scheduler=0 -avoid-hazards
|
||||
|
||||
; ModuleID = '<stdin>'
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
|
||||
target triple = "armv7-apple-darwin9"
|
||||
|
||||
%struct.anon = type { [3 x double], double, %struct.node*, [64 x %struct.bnode*], [64 x %struct.bnode*] }
|
||||
%struct.bnode = type { i16, double, [3 x double], i32, i32, [3 x double], [3 x double], [3 x double], double, %struct.bnode*, %struct.bnode* }
|
||||
%struct.icstruct = type { [3 x i32], i16 }
|
||||
%struct.node = type { i16, double, [3 x double], i32, i32 }
|
||||
|
||||
declare arm_apcscc double @floor(double) nounwind readnone
|
||||
|
||||
define void @intcoord(%struct.icstruct* noalias nocapture sret %agg.result, i1 %a, double %b) {
|
||||
entry:
|
||||
br i1 %a, label %bb3, label %bb1
|
||||
|
||||
bb1: ; preds = %entry
|
||||
unreachable
|
||||
|
||||
bb3: ; preds = %entry
|
||||
br i1 %a, label %bb7, label %bb5
|
||||
|
||||
bb5: ; preds = %bb3
|
||||
unreachable
|
||||
|
||||
bb7: ; preds = %bb3
|
||||
br i1 %a, label %bb11, label %bb9
|
||||
|
||||
bb9: ; preds = %bb7
|
||||
%0 = tail call arm_apcscc double @floor(double %b) nounwind readnone ; <double> [#uses=0]
|
||||
br label %bb11
|
||||
|
||||
bb11: ; preds = %bb9, %bb7
|
||||
%1 = getelementptr %struct.icstruct* %agg.result, i32 0, i32 0, i32 0 ; <i32*> [#uses=1]
|
||||
store i32 0, i32* %1
|
||||
ret void
|
||||
}
|
31
test/CodeGen/ARM/2009-08-21-PostRAKill3.ll
Normal file
31
test/CodeGen/ARM/2009-08-21-PostRAKill3.ll
Normal file
@ -0,0 +1,31 @@
|
||||
; RUN: llvm-as < %s | llc -asm-verbose=false -O3 -relocation-model=pic -disable-fp-elim -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -disable-post-RA-scheduler=0 -avoid-hazards
|
||||
|
||||
; ModuleID = '<stdin>'
|
||||
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
|
||||
target triple = "armv7-apple-darwin9"
|
||||
|
||||
%struct.Hosp = type { i32, i32, i32, %struct.List, %struct.List, %struct.List, %struct.List }
|
||||
%struct.List = type { %struct.List*, %struct.Patient*, %struct.List* }
|
||||
%struct.Patient = type { i32, i32, i32, %struct.Village* }
|
||||
%struct.Village = type { [4 x %struct.Village*], %struct.Village*, %struct.List, %struct.Hosp, i32, i32 }
|
||||
|
||||
define arm_apcscc %struct.Village* @alloc_tree(i32 %level, i32 %label, %struct.Village* %back, i1 %p) nounwind {
|
||||
entry:
|
||||
br i1 %p, label %bb8, label %bb1
|
||||
|
||||
bb1: ; preds = %entry
|
||||
%0 = malloc %struct.Village ; <%struct.Village*> [#uses=3]
|
||||
%exp2 = call double @ldexp(double 1.000000e+00, i32 %level) nounwind ; <double> [#uses=1]
|
||||
%.c = fptosi double %exp2 to i32 ; <i32> [#uses=1]
|
||||
store i32 %.c, i32* null
|
||||
%1 = getelementptr %struct.Village* %0, i32 0, i32 3, i32 6, i32 0 ; <%struct.List**> [#uses=1]
|
||||
store %struct.List* null, %struct.List** %1
|
||||
%2 = getelementptr %struct.Village* %0, i32 0, i32 3, i32 6, i32 2 ; <%struct.List**> [#uses=1]
|
||||
store %struct.List* null, %struct.List** %2
|
||||
ret %struct.Village* %0
|
||||
|
||||
bb8: ; preds = %entry
|
||||
ret %struct.Village* null
|
||||
}
|
||||
|
||||
declare double @ldexp(double, i32)
|
Loading…
Reference in New Issue
Block a user