From 02b66c3a32e9e282654666d0fcac10c10d399f18 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Mon, 10 Mar 2008 08:11:32 +0000 Subject: [PATCH] - Fix a subtle bug in RemoveCopyByCommutingDef. ALR is the live range where the source is defined; BLR is the live range which is defined by the copy. If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g. A = or A, B ... B = A ... C = A ... = B then do not add kills of A to the newly created B interval. - Also fix some kill info update bug. llvm-svn: 48141 --- lib/CodeGen/SimpleRegisterCoalescing.cpp | 59 +++++++++++++++++++++--- lib/CodeGen/SimpleRegisterCoalescing.h | 4 ++ test/CodeGen/X86/coalescer-commute5.ll | 21 +++++++++ 3 files changed, 77 insertions(+), 7 deletions(-) create mode 100644 test/CodeGen/X86/coalescer-commute5.ll diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp index d7c2d962b5a..65c8a5b99b1 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.cpp +++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp @@ -310,11 +310,26 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, unsigned OpIdx = NewMI->findRegisterUseOperandIdx(IntA.reg, false); NewMI->getOperand(OpIdx).setIsKill(); - // Update uses of IntA of the specific Val# with IntB. bool BHasPHIKill = BValNo->hasPHIKill; SmallVector BDeadValNos; SmallVector BKills; std::map BExtend; + + // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g. + // A = or A, B + // ... + // B = A + // ... + // C = A + // ... + // = B + // + // then do not add kills of A to the newly created B interval. + bool Extended = BLR->end > ALR->end && ALR->end != ALR->start; + if (Extended) + BExtend[ALR->end] = BLR->end; + + // Update uses of IntA of the specific Val# with IntB. for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(IntA.reg), UE = mri_->use_end(); UI != UE;) { MachineOperand &UseMO = UI.getOperand(); @@ -329,8 +344,12 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, UseMO.setReg(NewReg); if (UseMI == CopyMI) continue; - if (UseMO.isKill()) - BKills.push_back(li_->getUseIndex(UseIdx)+1); + if (UseMO.isKill()) { + if (Extended) + UseMO.setIsKill(false); + else + BKills.push_back(li_->getUseIndex(UseIdx)+1); + } unsigned SrcReg, DstReg; if (!tii_->isMoveInstr(*UseMI, SrcReg, DstReg)) continue; @@ -347,9 +366,8 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA, JoinedCopies.insert(UseMI); // If this is a kill but it's going to be removed, the last use // of the same val# is the new kill. - if (UseMO.isKill()) { + if (UseMO.isKill()) BKills.pop_back(); - } } } @@ -451,6 +469,29 @@ SimpleRegisterCoalescing::UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg, } } +/// RemoveUnnecessaryKills - Remove kill markers that are no longer accurate +/// due to live range lengthening as the result of coalescing. +void SimpleRegisterCoalescing::RemoveUnnecessaryKills(unsigned Reg, + LiveInterval &LI) { + for (MachineRegisterInfo::use_iterator UI = mri_->use_begin(Reg), + UE = mri_->use_end(); UI != UE; ++UI) { + MachineOperand &UseMO = UI.getOperand(); + if (UseMO.isKill()) { + MachineInstr *UseMI = UseMO.getParent(); + unsigned SReg, DReg; + if (!tii_->isMoveInstr(*UseMI, SReg, DReg)) + continue; + unsigned UseIdx = li_->getUseIndex(li_->getInstructionIndex(UseMI)); + if (JoinedCopies.count(UseMI)) + continue; + LiveInterval::const_iterator UI = LI.FindLiveRangeContaining(UseIdx); + assert(UI != LI.end()); + if (!LI.isKill(UI->valno, UseIdx+1)) + UseMO.setIsKill(false); + } + } +} + /// ShortenDeadCopyLiveRange - Shorten a live range as it's artificially /// extended by a dead copy. Mark the last use (if any) of the val# as kill /// as ends the live range there. If there isn't another use, then this @@ -803,6 +844,12 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { // Remember to delete the copy instruction. JoinedCopies.insert(CopyMI); + // Some live range has been lengthened due to colaescing, eliminate the + // unnecessary kills. + RemoveUnnecessaryKills(SrcReg, *ResDstInt); + if (TargetRegisterInfo::isVirtualRegister(DstReg)) + RemoveUnnecessaryKills(DstReg, *ResDstInt); + // SrcReg is guarateed to be the register whose live interval that is // being merged. li_->removeInterval(SrcReg); @@ -1481,8 +1528,6 @@ SimpleRegisterCoalescing::lastRegisterUse(unsigned Start, unsigned End, } -/// RemoveUnnecessaryKills - Remove kill markers that are no longer accurate -/// due to live range lengthening as the result of coalescing. void SimpleRegisterCoalescing::printRegName(unsigned reg) const { if (TargetRegisterInfo::isPhysicalRegister(reg)) cerr << tri_->getName(reg); diff --git a/lib/CodeGen/SimpleRegisterCoalescing.h b/lib/CodeGen/SimpleRegisterCoalescing.h index 543d4704d10..cf204a538cf 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.h +++ b/lib/CodeGen/SimpleRegisterCoalescing.h @@ -201,6 +201,10 @@ namespace llvm { /// subregister. void UpdateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx); + /// RemoveUnnecessaryKills - Remove kill markers that are no longer accurate + /// due to live range lengthening as the result of coalescing. + void RemoveUnnecessaryKills(unsigned Reg, LiveInterval &LI); + /// ShortenDeadCopyLiveRange - Shorten a live range as it's artificially /// extended by a dead copy. Mark the last use (if any) of the val# as kill /// as ends the live range there. If there isn't another use, then this diff --git a/test/CodeGen/X86/coalescer-commute5.ll b/test/CodeGen/X86/coalescer-commute5.ll new file mode 100644 index 00000000000..c730ea76e98 --- /dev/null +++ b/test/CodeGen/X86/coalescer-commute5.ll @@ -0,0 +1,21 @@ +; RUN: llvm-as < %s | llc -mtriple=i686-apple-darwin -mattr=+sse2 | not grep movaps + +define i32 @t() { +entry: + br i1 true, label %bb1664, label %bb1656 +bb1656: ; preds = %entry + ret i32 0 +bb1664: ; preds = %entry + %tmp4297 = bitcast <16 x i8> zeroinitializer to <2 x i64> ; <<2 x i64>> [#uses=2] + %tmp4351 = call <16 x i8> @llvm.x86.sse2.pcmpeq.b( <16 x i8> zeroinitializer, <16 x i8> zeroinitializer ) nounwind readnone ; <<16 x i8>> [#uses=0] + br i1 false, label %bb5310, label %bb4743 +bb4743: ; preds = %bb1664 + %tmp4360.not28 = or <2 x i64> zeroinitializer, %tmp4297 ; <<2 x i64>> [#uses=1] + br label %bb5310 +bb5310: ; preds = %bb4743, %bb1664 + %tmp4360.not28.pn = phi <2 x i64> [ %tmp4360.not28, %bb4743 ], [ %tmp4297, %bb1664 ] ; <<2 x i64>> [#uses=1] + %tmp4415.not.pn = or <2 x i64> zeroinitializer, %tmp4360.not28.pn ; <<2 x i64>> [#uses=0] + ret i32 0 +} + +declare <16 x i8> @llvm.x86.sse2.pcmpeq.b(<16 x i8>, <16 x i8>) nounwind readnone