The check for coalescing a virtual register to a physical register, e.g.

cl = EXTRACT_SUBREG reg1024, 1, is overly conservative. It should check
for overlaps of vr's live interval with the super registers of the
physical register (ECX in this case) and let JoinIntervals() handle checking
the coalescing feasibility against the physical register (cl in this case).

llvm-svn: 98251
This commit is contained in:
Evan Cheng 2010-03-11 08:20:21 +00:00
parent 9f2c113fab
commit 4ef6d8fa15
4 changed files with 61 additions and 15 deletions

View File

@ -127,11 +127,11 @@ namespace llvm {
bool conflictsWithPhysReg(const LiveInterval &li, VirtRegMap &vrm,
unsigned reg);
/// conflictsWithPhysRegRef - Similar to conflictsWithPhysRegRef except
/// it can check use as well.
bool conflictsWithPhysRegRef(LiveInterval &li, unsigned Reg,
bool CheckUse,
SmallPtrSet<MachineInstr*,32> &JoinedCopies);
/// conflictsWithSubPhysRegRef - Similar to conflictsWithPhysRegRef except
/// it checks for sub-register reference and it can check use as well.
bool conflictsWithSubPhysRegRef(LiveInterval &li, unsigned Reg,
bool CheckUse,
SmallPtrSet<MachineInstr*,32> &JoinedCopies);
// Interval creation
LiveInterval &getOrCreateInterval(unsigned reg) {

View File

@ -218,9 +218,9 @@ bool LiveIntervals::conflictsWithPhysReg(const LiveInterval &li,
return false;
}
/// conflictsWithPhysRegRef - Similar to conflictsWithPhysRegRef except
/// it can check use as well.
bool LiveIntervals::conflictsWithPhysRegRef(LiveInterval &li,
/// conflictsWithSubPhysRegRef - Similar to conflictsWithPhysRegRef except
/// it checks for sub-register reference and it can check use as well.
bool LiveIntervals::conflictsWithSubPhysRegRef(LiveInterval &li,
unsigned Reg, bool CheckUse,
SmallPtrSet<MachineInstr*,32> &JoinedCopies) {
for (LiveInterval::Ranges::const_iterator

View File

@ -1260,10 +1260,10 @@ SimpleRegisterCoalescing::CanJoinExtractSubRegToPhysReg(unsigned DstReg,
RealDstReg = tri_->getMatchingSuperReg(DstReg, SubIdx, RC);
assert(RealDstReg && "Invalid extract_subreg instruction!");
LiveInterval &RHS = li_->getInterval(SrcReg);
// For this type of EXTRACT_SUBREG, conservatively
// check if the live interval of the source register interfere with the
// actual super physical register we are trying to coalesce with.
LiveInterval &RHS = li_->getInterval(SrcReg);
if (li_->hasInterval(RealDstReg) &&
RHS.overlaps(li_->getInterval(RealDstReg))) {
DEBUG({
@ -1273,7 +1273,11 @@ SimpleRegisterCoalescing::CanJoinExtractSubRegToPhysReg(unsigned DstReg,
return false; // Not coalescable
}
for (const unsigned* SR = tri_->getSubRegisters(RealDstReg); *SR; ++SR)
if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) {
// Do not check DstReg or its sub-register. JoinIntervals() will take care
// of that.
if (*SR != DstReg &&
!tri_->isSubRegister(DstReg, *SR) &&
li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) {
DEBUG({
dbgs() << "Interfere with sub-register ";
li_->getInterval(*SR).print(dbgs(), tri_);
@ -1294,9 +1298,9 @@ SimpleRegisterCoalescing::CanJoinInsertSubRegToPhysReg(unsigned DstReg,
RealSrcReg = tri_->getMatchingSuperReg(SrcReg, SubIdx, RC);
assert(RealSrcReg && "Invalid extract_subreg instruction!");
LiveInterval &RHS = li_->getInterval(DstReg);
LiveInterval &LHS = li_->getInterval(DstReg);
if (li_->hasInterval(RealSrcReg) &&
RHS.overlaps(li_->getInterval(RealSrcReg))) {
LHS.overlaps(li_->getInterval(RealSrcReg))) {
DEBUG({
dbgs() << "Interfere with register ";
li_->getInterval(RealSrcReg).print(dbgs(), tri_);
@ -1304,7 +1308,11 @@ SimpleRegisterCoalescing::CanJoinInsertSubRegToPhysReg(unsigned DstReg,
return false; // Not coalescable
}
for (const unsigned* SR = tri_->getSubRegisters(RealSrcReg); *SR; ++SR)
if (li_->hasInterval(*SR) && RHS.overlaps(li_->getInterval(*SR))) {
// Do not check SrcReg or its sub-register. JoinIntervals() will take care
// of that.
if (*SR != SrcReg &&
!tri_->isSubRegister(SrcReg, *SR) &&
li_->hasInterval(*SR) && LHS.overlaps(li_->getInterval(*SR))) {
DEBUG({
dbgs() << "Interfere with sub-register ";
li_->getInterval(*SR).print(dbgs(), tri_);
@ -1476,6 +1484,9 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
return false; // Not coalescable.
}
// FIXME: The following checks are somewhat conservative. Perhaps a better
// way to implement this is to treat this as coalescing a vr with the
// super physical register.
if (isExtSubReg) {
if (!CanJoinExtractSubRegToPhysReg(DstReg, SrcReg, SubIdx, RealDstReg))
return false; // Not coalescable
@ -2205,7 +2216,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
li_->intervalIsInOneMBB(RHS) &&
li_->getApproximateInstructionCount(RHS) <= 10) {
// Perform a more exhaustive check for some common cases.
if (li_->conflictsWithPhysRegRef(RHS, LHS.reg, true, JoinedCopies))
if (li_->conflictsWithSubPhysRegRef(RHS, LHS.reg, true, JoinedCopies))
return false;
} else {
for (const unsigned* SR = tri_->getSubRegisters(LHS.reg); *SR; ++SR)
@ -2222,7 +2233,7 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
if (LHS.containsOneValue() &&
li_->getApproximateInstructionCount(LHS) <= 10) {
// Perform a more exhaustive check for some common cases.
if (li_->conflictsWithPhysRegRef(LHS, RHS.reg, false, JoinedCopies))
if (li_->conflictsWithSubPhysRegRef(LHS, RHS.reg, false, JoinedCopies))
return false;
} else {
for (const unsigned* SR = tri_->getSubRegisters(RHS.reg); *SR; ++SR)

View File

@ -0,0 +1,35 @@
; RUN: llc < %s -mtriple=i386-apple-darwin | FileCheck %s
; rdar://5571034
define void @foo(i32* nocapture %quadrant, i32* nocapture %ptr, i32 %bbSize, i32 %bbStart, i32 %shifts) nounwind ssp {
; CHECK: foo:
entry:
%j.03 = add i32 %bbSize, -1 ; <i32> [#uses=2]
%0 = icmp sgt i32 %j.03, -1 ; <i1> [#uses=1]
br i1 %0, label %bb.nph, label %return
bb.nph: ; preds = %entry
%tmp9 = add i32 %bbStart, %bbSize ; <i32> [#uses=1]
%tmp10 = add i32 %tmp9, -1 ; <i32> [#uses=1]
br label %bb
bb: ; preds = %bb, %bb.nph
; CHECK: %bb
; CHECK-NOT: movb {{.*}}l, %cl
; CHECK: sarl %cl
%indvar = phi i32 [ 0, %bb.nph ], [ %indvar.next, %bb ] ; <i32> [#uses=3]
%j.06 = sub i32 %j.03, %indvar ; <i32> [#uses=1]
%tmp11 = sub i32 %tmp10, %indvar ; <i32> [#uses=1]
%scevgep = getelementptr i32* %ptr, i32 %tmp11 ; <i32*> [#uses=1]
%1 = load i32* %scevgep, align 4 ; <i32> [#uses=1]
%2 = ashr i32 %j.06, %shifts ; <i32> [#uses=1]
%3 = and i32 %2, 65535 ; <i32> [#uses=1]
%4 = getelementptr inbounds i32* %quadrant, i32 %1 ; <i32*> [#uses=1]
store i32 %3, i32* %4, align 4
%indvar.next = add i32 %indvar, 1 ; <i32> [#uses=2]
%exitcond = icmp eq i32 %indvar.next, %bbSize ; <i1> [#uses=1]
br i1 %exitcond, label %return, label %bb
return: ; preds = %bb, %entry
ret void
}