Fixup register kills after scheduling.

llvm-svn: 80002
This commit is contained in:
David Goodwin 2009-08-25 17:03:05 +00:00
parent b388728ba7
commit 047f69da86
4 changed files with 203 additions and 21 deletions

View File

@ -40,6 +40,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/Statistic.h"
#include <map>
#include <set>
using namespace llvm;
STATISTIC(NumNoops, "Number of noops inserted");
@ -140,6 +141,11 @@ namespace {
/// Schedule - Schedule the instruction range using list scheduling.
///
void Schedule();
/// FixupKills - Fix register kill flags that have been made
/// invalid due to scheduling
///
void FixupKills(MachineBasicBlock *MBB);
/// Observe - Update liveness information to account for the current
/// instruction, which will not be scheduled.
@ -150,6 +156,11 @@ namespace {
///
void FinishBlock();
/// GenerateLivenessForKills - If true then generate Def/Kill
/// information for use in updating register kill. If false then
/// generate Def/Kill information for anti-dependence breaking.
bool GenerateLivenessForKills;
private:
void PrescanInstruction(MachineInstr *MI);
void ScanInstruction(MachineInstr *MI, unsigned Count);
@ -202,6 +213,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
for (MachineFunction::iterator MBB = Fn.begin(), MBBe = Fn.end();
MBB != MBBe; ++MBB) {
// Initialize register live-range state for scheduling in this block.
Scheduler.GenerateLivenessForKills = false;
Scheduler.StartBlock(MBB);
// Schedule each sequence of instructions not interrupted by a label
@ -228,6 +240,12 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
// Clean up register live-range state.
Scheduler.FinishBlock();
// Initialize register live-range state again and update register kills
Scheduler.GenerateLivenessForKills = true;
Scheduler.StartBlock(MBB);
Scheduler.FixupKills(MBB);
Scheduler.FinishBlock();
}
return true;
@ -287,26 +305,28 @@ void SchedulePostRATDList::StartBlock(MachineBasicBlock *BB) {
}
}
// Consider callee-saved registers as live-out, since we're running after
// prologue/epilogue insertion so there's no way to add additional
// saved registers.
//
// TODO: If the callee saves and restores these, then we can potentially
// use them between the save and the restore. To do that, we could scan
// the exit blocks to see which of these registers are defined.
// Alternatively, callee-saved registers that aren't saved and restored
// could be marked live-in in every block.
for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) {
unsigned Reg = *I;
Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
KillIndices[Reg] = BB->size();
DefIndices[Reg] = ~0u;
// Repeat, for all aliases.
for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
unsigned AliasReg = *Alias;
Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
KillIndices[AliasReg] = BB->size();
DefIndices[AliasReg] = ~0u;
if (!GenerateLivenessForKills) {
// Consider callee-saved registers as live-out, since we're running after
// prologue/epilogue insertion so there's no way to add additional
// saved registers.
//
// TODO: If the callee saves and restores these, then we can potentially
// use them between the save and the restore. To do that, we could scan
// the exit blocks to see which of these registers are defined.
// Alternatively, callee-saved registers that aren't saved and restored
// could be marked live-in in every block.
for (const unsigned *I = TRI->getCalleeSavedRegs(); *I; ++I) {
unsigned Reg = *I;
Classes[Reg] = reinterpret_cast<TargetRegisterClass *>(-1);
KillIndices[Reg] = BB->size();
DefIndices[Reg] = ~0u;
// Repeat, for all aliases.
for (const unsigned *Alias = TRI->getAliasSet(Reg); *Alias; ++Alias) {
unsigned AliasReg = *Alias;
Classes[AliasReg] = reinterpret_cast<TargetRegisterClass *>(-1);
KillIndices[AliasReg] = BB->size();
DefIndices[AliasReg] = ~0u;
}
}
}
}
@ -467,11 +487,17 @@ void SchedulePostRATDList::ScanInstruction(MachineInstr *MI,
Classes[SubregReg] = 0;
RegRefs.erase(SubregReg);
}
// Conservatively mark super-registers as unusable.
// Conservatively mark super-registers as unusable. If
// initializing for kill updating, then mark all supers as defined
// as well.
for (const unsigned *Super = TRI->getSuperRegisters(Reg);
*Super; ++Super) {
unsigned SuperReg = *Super;
Classes[SuperReg] = reinterpret_cast<TargetRegisterClass *>(-1);
if (GenerateLivenessForKills) {
DefIndices[SuperReg] = Count;
KillIndices[SuperReg] = ~0u;
}
}
}
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
@ -753,6 +779,53 @@ bool SchedulePostRATDList::BreakAntiDependencies() {
return Changed;
}
/// FixupKills - Fix the register kill flags, they may have been made
/// incorrect by instruction reordering.
///
void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
DEBUG(errs() << "Fixup kills for BB ID#" << MBB->getNumber() << '\n');
std::set<unsigned> killedRegs;
BitVector ReservedRegs = TRI->getReservedRegs(MF);
unsigned Count = MBB->size();
for (MachineBasicBlock::iterator I = MBB->end(), E = MBB->begin();
I != E; --Count) {
MachineInstr *MI = --I;
// After regalloc, IMPLICIT_DEF instructions aren't safe to treat as
// dependence-breaking. In the case of an INSERT_SUBREG, the IMPLICIT_DEF
// is left behind appearing to clobber the super-register, while the
// subregister needs to remain live. So we just ignore them.
if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF)
continue;
PrescanInstruction(MI);
ScanInstruction(MI, Count);
// Examine all used registers and set kill flag. When a register
// is used multiple times we only set the kill flag on the first
// use.
killedRegs.clear();
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = MI->getOperand(i);
if (!MO.isReg() || !MO.isUse()) continue;
unsigned Reg = MO.getReg();
if ((Reg == 0) || ReservedRegs.test(Reg)) continue;
bool kill = ((KillIndices[Reg] == Count) &&
(killedRegs.find(Reg) == killedRegs.end()));
if (MO.isKill() != kill) {
MO.setIsKill(kill);
DEBUG(errs() << "Fixed " << MO << " in ");
DEBUG(MI->dump());
}
killedRegs.insert(Reg);
}
}
}
//===----------------------------------------------------------------------===//
// Top-Down Scheduling
//===----------------------------------------------------------------------===//

View File

@ -0,0 +1,40 @@
; RUN: llvm-as < %s | llc -march=arm -mattr=+vfp2 -mcpu=cortex-a8 -disable-post-RA-scheduler=0 -avoid-hazards
; ModuleID = '<stdin>'
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
target triple = "armv7-apple-darwin9"
%struct.tree = type { i32, double, double, %struct.tree*, %struct.tree*, %struct.tree*, %struct.tree* }
@g = common global %struct.tree* null
define arm_apcscc %struct.tree* @tsp(%struct.tree* %t, i32 %nproc) nounwind {
entry:
%t.idx51.val.i = load double* null ; <double> [#uses=1]
br i1 undef, label %bb4.i, label %bb.i
bb.i: ; preds = %entry
unreachable
bb4.i: ; preds = %entry
%0 = load %struct.tree** @g, align 4 ; <%struct.tree*> [#uses=2]
%.idx45.i = getelementptr %struct.tree* %0, i32 0, i32 1 ; <double*> [#uses=1]
%.idx45.val.i = load double* %.idx45.i ; <double> [#uses=1]
%.idx46.i = getelementptr %struct.tree* %0, i32 0, i32 2 ; <double*> [#uses=1]
%.idx46.val.i = load double* %.idx46.i ; <double> [#uses=1]
%1 = fsub double 0.000000e+00, %.idx45.val.i ; <double> [#uses=2]
%2 = fmul double %1, %1 ; <double> [#uses=1]
%3 = fsub double %t.idx51.val.i, %.idx46.val.i ; <double> [#uses=2]
%4 = fmul double %3, %3 ; <double> [#uses=1]
%5 = fadd double %2, %4 ; <double> [#uses=1]
%6 = tail call double @llvm.sqrt.f64(double %5) nounwind ; <double> [#uses=1]
br i1 undef, label %bb7.i4, label %bb6.i
bb6.i: ; preds = %bb4.i
br label %bb7.i4
bb7.i4: ; preds = %bb6.i, %bb4.i
%tton1.0.i = phi double [ %6, %bb6.i ], [ undef, %bb4.i ] ; <double> [#uses=0]
unreachable
}
declare double @llvm.sqrt.f64(double) nounwind readonly

View File

@ -0,0 +1,38 @@
; RUN: llvm-as < %s | llc -asm-verbose=false -O3 -relocation-model=pic -disable-fp-elim -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -disable-post-RA-scheduler=0 -avoid-hazards
; ModuleID = '<stdin>'
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
target triple = "armv7-apple-darwin9"
%struct.anon = type { [3 x double], double, %struct.node*, [64 x %struct.bnode*], [64 x %struct.bnode*] }
%struct.bnode = type { i16, double, [3 x double], i32, i32, [3 x double], [3 x double], [3 x double], double, %struct.bnode*, %struct.bnode* }
%struct.icstruct = type { [3 x i32], i16 }
%struct.node = type { i16, double, [3 x double], i32, i32 }
declare arm_apcscc double @floor(double) nounwind readnone
define void @intcoord(%struct.icstruct* noalias nocapture sret %agg.result, i1 %a, double %b) {
entry:
br i1 %a, label %bb3, label %bb1
bb1: ; preds = %entry
unreachable
bb3: ; preds = %entry
br i1 %a, label %bb7, label %bb5
bb5: ; preds = %bb3
unreachable
bb7: ; preds = %bb3
br i1 %a, label %bb11, label %bb9
bb9: ; preds = %bb7
%0 = tail call arm_apcscc double @floor(double %b) nounwind readnone ; <double> [#uses=0]
br label %bb11
bb11: ; preds = %bb9, %bb7
%1 = getelementptr %struct.icstruct* %agg.result, i32 0, i32 0, i32 0 ; <i32*> [#uses=1]
store i32 0, i32* %1
ret void
}

View File

@ -0,0 +1,31 @@
; RUN: llvm-as < %s | llc -asm-verbose=false -O3 -relocation-model=pic -disable-fp-elim -mtriple=thumbv7-apple-darwin -mcpu=cortex-a8 -disable-post-RA-scheduler=0 -avoid-hazards
; ModuleID = '<stdin>'
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
target triple = "armv7-apple-darwin9"
%struct.Hosp = type { i32, i32, i32, %struct.List, %struct.List, %struct.List, %struct.List }
%struct.List = type { %struct.List*, %struct.Patient*, %struct.List* }
%struct.Patient = type { i32, i32, i32, %struct.Village* }
%struct.Village = type { [4 x %struct.Village*], %struct.Village*, %struct.List, %struct.Hosp, i32, i32 }
define arm_apcscc %struct.Village* @alloc_tree(i32 %level, i32 %label, %struct.Village* %back, i1 %p) nounwind {
entry:
br i1 %p, label %bb8, label %bb1
bb1: ; preds = %entry
%0 = malloc %struct.Village ; <%struct.Village*> [#uses=3]
%exp2 = call double @ldexp(double 1.000000e+00, i32 %level) nounwind ; <double> [#uses=1]
%.c = fptosi double %exp2 to i32 ; <i32> [#uses=1]
store i32 %.c, i32* null
%1 = getelementptr %struct.Village* %0, i32 0, i32 3, i32 6, i32 0 ; <%struct.List**> [#uses=1]
store %struct.List* null, %struct.List** %1
%2 = getelementptr %struct.Village* %0, i32 0, i32 3, i32 6, i32 2 ; <%struct.List**> [#uses=1]
store %struct.List* null, %struct.List** %2
ret %struct.Village* %0
bb8: ; preds = %entry
ret %struct.Village* null
}
declare double @ldexp(double, i32)