Insert IMPLICIT_DEFS for undef uses in tail merging

Tail merging can convert an undef use into a normal one when creating a
common tail. Doing so can make the register live out from a block which
previously contained the undef use. To keep the liveness up-to-date,
insert IMPLICIT_DEFs in such blocks when necessary.

To enable this patch the computeLiveIns() function which used to
compute live-ins for a block and set them immediately is split into new
functions:
- computeLiveIns() just computes the live-ins in a LivePhysRegs set.
- addLiveIns() applies the live-ins to a block live-in list.
- computeAndAddLiveIns() is a convenience function combining the other
  two functions and behaving like computeLiveIns() before this patch.

Based on a patch by Krzysztof Parzyszek <kparzysz@codeaurora.org>

Differential Revision: https://reviews.llvm.org/D37034

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@312668 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Matthias Braun 2017-09-06 20:45:24 +00:00
parent 41915c0baa
commit 109c6e02f7
9 changed files with 240 additions and 93 deletions

View File

@ -159,12 +159,18 @@ inline raw_ostream &operator<<(raw_ostream &OS, const LivePhysRegs& LR) {
return OS;
}
/// \brief Computes the live-in list for \p MBB assuming all of its successors
/// live-in lists are up-to-date. Uses the given LivePhysReg instance \p
/// LiveRegs; This is just here to avoid repeated heap allocations when calling
/// this multiple times in a pass.
void computeLiveIns(LivePhysRegs &LiveRegs, const MachineRegisterInfo &MRI,
MachineBasicBlock &MBB);
/// \brief Computes registers live-in to \p MBB assuming all of its successors
/// live-in lists are up-to-date. Puts the result into the given LivePhysReg
/// instance \p LiveRegs.
void computeLiveIns(LivePhysRegs &LiveRegs, const MachineBasicBlock &MBB);
/// Adds registers contained in \p LiveRegs to the block live-in list of \p MBB.
/// Does not add reserved registers.
void addLiveIns(MachineBasicBlock &MBB, const LivePhysRegs &LiveRegs);
/// Convenience function combining computeLiveIns() and addLiveIns().
void computeAndAddLiveIns(LivePhysRegs &LiveRegs,
MachineBasicBlock &MBB);
} // end namespace llvm

View File

@ -31,6 +31,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
@ -365,15 +366,37 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
return TailLen;
}
void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
MachineBasicBlock *NewDest) {
TII->ReplaceTailWithBranchTo(OldInst, NewDest);
void BranchFolder::replaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
MachineBasicBlock &NewDest) {
if (UpdateLiveIns) {
NewDest->clearLiveIns();
computeLiveIns(LiveRegs, *MRI, *NewDest);
// OldInst should always point to an instruction.
MachineBasicBlock &OldMBB = *OldInst->getParent();
LiveRegs.clear();
LiveRegs.addLiveOuts(OldMBB);
// Move backward to the place where will insert the jump.
MachineBasicBlock::iterator I = OldMBB.end();
do {
--I;
LiveRegs.stepBackward(*I);
} while (I != OldInst);
// Merging the tails may have switched some undef operand to non-undef ones.
// Add IMPLICIT_DEFS into OldMBB as necessary to have a definition of the
// register.
for (MachineBasicBlock::RegisterMaskPair P : NewDest.liveins()) {
// We computed the liveins with computeLiveIn earlier and should only see
// full registers:
assert(P.LaneMask == LaneBitmask::getAll() &&
"Can only handle full register.");
MCPhysReg Reg = P.PhysReg;
if (!LiveRegs.available(*MRI, Reg))
continue;
DebugLoc DL;
BuildMI(OldMBB, OldInst, DL, TII->get(TargetOpcode::IMPLICIT_DEF), Reg);
}
}
TII->ReplaceTailWithBranchTo(OldInst, &NewDest);
++NumTailMerge;
}
@ -408,7 +431,7 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
MBBFreqInfo.setBlockFreq(NewMBB, MBBFreqInfo.getBlockFreq(&CurMBB));
if (UpdateLiveIns)
computeLiveIns(LiveRegs, *MRI, *NewMBB);
computeAndAddLiveIns(LiveRegs, *NewMBB);
// Add the new block to the funclet.
const auto &FuncletI = FuncletMembership.find(&CurMBB);
@ -766,43 +789,6 @@ bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
return true;
}
void BranchFolder::MergeCommonTailDebugLocs(unsigned commonTailIndex) {
MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();
std::vector<MachineBasicBlock::iterator> NextCommonInsts(SameTails.size());
for (unsigned int i = 0 ; i != SameTails.size() ; ++i) {
if (i != commonTailIndex)
NextCommonInsts[i] = SameTails[i].getTailStartPos();
else {
assert(SameTails[i].getTailStartPos() == MBB->begin() &&
"MBB is not a common tail only block");
}
}
for (auto &MI : *MBB) {
if (MI.isDebugValue())
continue;
DebugLoc DL = MI.getDebugLoc();
for (unsigned int i = 0 ; i < NextCommonInsts.size() ; i++) {
if (i == commonTailIndex)
continue;
auto &Pos = NextCommonInsts[i];
assert(Pos != SameTails[i].getBlock()->end() &&
"Reached BB end within common tail");
while (Pos->isDebugValue()) {
++Pos;
assert(Pos != SameTails[i].getBlock()->end() &&
"Reached BB end within common tail");
}
assert(MI.isIdenticalTo(*Pos) && "Expected matching MIIs!");
DL = DILocation::getMergedLocation(DL, Pos->getDebugLoc());
NextCommonInsts[i] = ++Pos;
}
MI.setDebugLoc(DL);
}
}
static void
mergeOperations(MachineBasicBlock::iterator MBBIStartPos,
MachineBasicBlock &MBBCommon) {
@ -853,6 +839,67 @@ mergeOperations(MachineBasicBlock::iterator MBBIStartPos,
}
}
void BranchFolder::mergeCommonTails(unsigned commonTailIndex) {
MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();
std::vector<MachineBasicBlock::iterator> NextCommonInsts(SameTails.size());
for (unsigned int i = 0 ; i != SameTails.size() ; ++i) {
if (i != commonTailIndex) {
NextCommonInsts[i] = SameTails[i].getTailStartPos();
mergeOperations(SameTails[i].getTailStartPos(), *MBB);
} else {
assert(SameTails[i].getTailStartPos() == MBB->begin() &&
"MBB is not a common tail only block");
}
}
for (auto &MI : *MBB) {
if (MI.isDebugValue())
continue;
DebugLoc DL = MI.getDebugLoc();
for (unsigned int i = 0 ; i < NextCommonInsts.size() ; i++) {
if (i == commonTailIndex)
continue;
auto &Pos = NextCommonInsts[i];
assert(Pos != SameTails[i].getBlock()->end() &&
"Reached BB end within common tail");
while (Pos->isDebugValue()) {
++Pos;
assert(Pos != SameTails[i].getBlock()->end() &&
"Reached BB end within common tail");
}
assert(MI.isIdenticalTo(*Pos) && "Expected matching MIIs!");
DL = DILocation::getMergedLocation(DL, Pos->getDebugLoc());
NextCommonInsts[i] = ++Pos;
}
MI.setDebugLoc(DL);
}
if (UpdateLiveIns) {
LivePhysRegs NewLiveIns(*TRI);
computeLiveIns(NewLiveIns, *MBB);
// The flag merging may lead to some register uses no longer using the
// <undef> flag, add IMPLICIT_DEFs in the predecessors as necessary.
for (MachineBasicBlock *Pred : MBB->predecessors()) {
LiveRegs.init(*TRI);
LiveRegs.addLiveOuts(*Pred);
MachineBasicBlock::iterator InsertBefore = Pred->getFirstTerminator();
for (unsigned Reg : NewLiveIns) {
if (!LiveRegs.available(*MRI, Reg))
continue;
DebugLoc DL;
BuildMI(*Pred, InsertBefore, DL, TII->get(TargetOpcode::IMPLICIT_DEF),
Reg);
}
}
MBB->clearLiveIns();
addLiveIns(*MBB, NewLiveIns);
}
}
// See if any of the blocks in MergePotentials (which all have SuccBB as a
// successor, or all have no successor if it is null) can be tail-merged.
// If there is a successor, any blocks in MergePotentials that are not
@ -955,8 +1002,9 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
// Recompute common tail MBB's edge weights and block frequency.
setCommonTailEdgeWeights(*MBB);
// Merge debug locations across identical instructions for common tail.
MergeCommonTailDebugLocs(commonTailIndex);
// Merge debug locations, MMOs and undef flags across identical instructions
// for common tail.
mergeCommonTails(commonTailIndex);
// MBB is common tail. Adjust all other BB's to jump to this one.
// Traversal must be forwards so erases work.
@ -967,10 +1015,8 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
continue;
DEBUG(dbgs() << "BB#" << SameTails[i].getBlock()->getNumber()
<< (i == e-1 ? "" : ", "));
// Merge operations (MMOs, undef flags)
mergeOperations(SameTails[i].getTailStartPos(), *MBB);
// Hack the end off BB i, making it jump to BB commonTailIndex instead.
ReplaceTailWithBranchTo(SameTails[i].getTailStartPos(), MBB);
replaceTailWithBranchTo(SameTails[i].getTailStartPos(), *MBB);
// BB i is no longer a predecessor of SuccBB; remove it from the worklist.
MergePotentials.erase(SameTails[i].getMPIter());
}

View File

@ -146,8 +146,8 @@ namespace llvm {
/// Delete the instruction OldInst and everything after it, replacing it
/// with an unconditional branch to NewDest.
void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
MachineBasicBlock *NewDest);
void replaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
MachineBasicBlock &NewDest);
/// Given a machine basic block and an iterator into it, split the MBB so
/// that the part before the iterator falls into the part starting at the
@ -182,8 +182,8 @@ namespace llvm {
unsigned &commonTailIndex);
/// Create merged DebugLocs of identical instructions across SameTails and
/// assign it to the instruction in common tail.
void MergeCommonTailDebugLocs(unsigned commonTailIndex);
/// assign it to the instruction in common tail; merge MMOs and undef flags.
void mergeCommonTails(unsigned commonTailIndex);
bool OptimizeBranches(MachineFunction &MF);

View File

@ -259,7 +259,7 @@ MachineBasicBlock *BranchRelaxation::splitBlockBeforeInstr(MachineInstr &MI,
// Need to fix live-in lists if we track liveness.
if (TRI->trackLivenessAfterRegAlloc(*MF))
computeLiveIns(LiveRegs, MF->getRegInfo(), *NewBB);
computeAndAddLiveIns(LiveRegs, *NewBB);
++NumSplit;
@ -348,7 +348,7 @@ bool BranchRelaxation::fixupConditionalBranch(MachineInstr &MI) {
// Need to fix live-in lists if we track liveness.
if (TRI->trackLivenessAfterRegAlloc(*MF))
computeLiveIns(LiveRegs, MF->getRegInfo(), NewBB);
computeAndAddLiveIns(LiveRegs, NewBB);
}
// We now have an appropriate fall-through block in place (either naturally or

View File

@ -218,16 +218,22 @@ void LivePhysRegs::addLiveIns(const MachineBasicBlock &MBB) {
}
void llvm::computeLiveIns(LivePhysRegs &LiveRegs,
const MachineRegisterInfo &MRI,
MachineBasicBlock &MBB) {
const MachineBasicBlock &MBB) {
const MachineFunction &MF = *MBB.getParent();
const MachineRegisterInfo &MRI = MF.getRegInfo();
const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
assert(MBB.livein_empty());
LiveRegs.init(TRI);
LiveRegs.addLiveOutsNoPristines(MBB);
for (MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend()))
for (const MachineInstr &MI : make_range(MBB.rbegin(), MBB.rend()))
LiveRegs.stepBackward(MI);
}
for (unsigned Reg : LiveRegs) {
void llvm::addLiveIns(MachineBasicBlock &MBB, const LivePhysRegs &LiveRegs) {
assert(MBB.livein_empty() && "Expected empty live-in list");
const MachineFunction &MF = *MBB.getParent();
const MachineRegisterInfo &MRI = MF.getRegInfo();
const TargetRegisterInfo &TRI = *MRI.getTargetRegisterInfo();
for (MCPhysReg Reg : LiveRegs) {
if (MRI.isReserved(Reg))
continue;
// Skip the register if we are about to add one of its super registers.
@ -243,3 +249,9 @@ void llvm::computeLiveIns(LivePhysRegs &LiveRegs,
MBB.addLiveIn(Reg);
}
}
void llvm::computeAndAddLiveIns(LivePhysRegs &LiveRegs,
MachineBasicBlock &MBB) {
computeLiveIns(LiveRegs, MBB);
addLiveIns(MBB, LiveRegs);
}

View File

@ -672,16 +672,15 @@ bool AArch64ExpandPseudo::expandCMP_SWAP(
MI.eraseFromParent();
// Recompute livein lists.
const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
LivePhysRegs LiveRegs;
computeLiveIns(LiveRegs, MRI, *DoneBB);
computeLiveIns(LiveRegs, MRI, *StoreBB);
computeLiveIns(LiveRegs, MRI, *LoadCmpBB);
computeAndAddLiveIns(LiveRegs, *DoneBB);
computeAndAddLiveIns(LiveRegs, *StoreBB);
computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
// Do an extra pass around the loop to get loop carried registers right.
StoreBB->clearLiveIns();
computeLiveIns(LiveRegs, MRI, *StoreBB);
computeAndAddLiveIns(LiveRegs, *StoreBB);
LoadCmpBB->clearLiveIns();
computeLiveIns(LiveRegs, MRI, *LoadCmpBB);
computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
return true;
}
@ -766,16 +765,15 @@ bool AArch64ExpandPseudo::expandCMP_SWAP_128(
MI.eraseFromParent();
// Recompute liveness bottom up.
const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
LivePhysRegs LiveRegs;
computeLiveIns(LiveRegs, MRI, *DoneBB);
computeLiveIns(LiveRegs, MRI, *StoreBB);
computeLiveIns(LiveRegs, MRI, *LoadCmpBB);
computeAndAddLiveIns(LiveRegs, *DoneBB);
computeAndAddLiveIns(LiveRegs, *StoreBB);
computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
// Do an extra pass in the loop to get the loop carried dependencies right.
StoreBB->clearLiveIns();
computeLiveIns(LiveRegs, MRI, *StoreBB);
computeAndAddLiveIns(LiveRegs, *StoreBB);
LoadCmpBB->clearLiveIns();
computeLiveIns(LiveRegs, MRI, *LoadCmpBB);
computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
return true;
}

View File

@ -860,16 +860,15 @@ bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB,
MI.eraseFromParent();
// Recompute livein lists.
const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
LivePhysRegs LiveRegs;
computeLiveIns(LiveRegs, MRI, *DoneBB);
computeLiveIns(LiveRegs, MRI, *StoreBB);
computeLiveIns(LiveRegs, MRI, *LoadCmpBB);
computeAndAddLiveIns(LiveRegs, *DoneBB);
computeAndAddLiveIns(LiveRegs, *StoreBB);
computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
// Do an extra pass around the loop to get loop carried registers right.
StoreBB->clearLiveIns();
computeLiveIns(LiveRegs, MRI, *StoreBB);
computeAndAddLiveIns(LiveRegs, *StoreBB);
LoadCmpBB->clearLiveIns();
computeLiveIns(LiveRegs, MRI, *LoadCmpBB);
computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
return true;
}
@ -980,16 +979,15 @@ bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB,
MI.eraseFromParent();
// Recompute livein lists.
const MachineRegisterInfo &MRI = MBB.getParent()->getRegInfo();
LivePhysRegs LiveRegs;
computeLiveIns(LiveRegs, MRI, *DoneBB);
computeLiveIns(LiveRegs, MRI, *StoreBB);
computeLiveIns(LiveRegs, MRI, *LoadCmpBB);
computeAndAddLiveIns(LiveRegs, *DoneBB);
computeAndAddLiveIns(LiveRegs, *StoreBB);
computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
// Do an extra pass around the loop to get loop carried registers right.
StoreBB->clearLiveIns();
computeLiveIns(LiveRegs, MRI, *StoreBB);
computeAndAddLiveIns(LiveRegs, *StoreBB);
LoadCmpBB->clearLiveIns();
computeLiveIns(LiveRegs, MRI, *LoadCmpBB);
computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
return true;
}

View File

@ -0,0 +1,87 @@
# RUN: llc -march=hexagon -run-pass branch-folder %s -o - -verify-machineinstrs | FileCheck %s
# Branch folding will perform tail merging of bb.1 and bb.2, and bb.2 will
# become the common tail. The use of R0 in bb.2 is <undef> while the
# corresponding use in bb.1 is not. The common tail will have the <undef>
# flag removed, which will cause R0 to become a live-in to bb.2. The problem
# is that R0 is not live-out from all predecessors of bb.2, namely is not
# live-out from bb.0. To remedy that, the branch folder should add an
# IMPLICIT_DEF to that block.
# CHECK-LABEL: name: func0
# CHECK-LABEL: bb.0:
# CHECK: %r0 = IMPLICIT_DEF
# CHECK-LABEL: bb.1:
# CHECK-LABEL: bb.2:
# CHECK: liveins: %r0
# CHECK: PS_storerhabs 0, %r0
# CHECK: PS_jmpret
---
name: func0
tracksRegLiveness: true
body: |
bb.0:
liveins: %r31
successors: %bb.1, %bb.2
J2_jumpt undef %p0, %bb.2, implicit-def %pc
J2_jump %bb.1, implicit-def %pc
bb.1:
liveins: %r31
successors: %bb.3
%r0 = L2_loadruh_io undef %r1, 0
PS_storerhabs 0, killed %r0
J2_jump %bb.3, implicit-def %pc
bb.2:
liveins: %r31
successors: %bb.3
PS_storerhabs 0, undef %r0
J2_jump %bb.3, implicit-def %pc
bb.3:
liveins: %r31
PS_jmpret killed %r31, implicit-def %pc
...
---
# CHECK-LABEL: name: func1
# CHECK-LABEL: bb.1:
# CHECK: %r0 = IMPLICIT_DEF
# CHECK-LABEL: bb.2:
# CHECK-LABEL: bb.3:
# CHECK: liveins: %r0
# CHECK: PS_storerhabs 0, killed %r0
# CHECK: PS_jmpret
name: func1
tracksRegLiveness: true
body: |
bb.0:
liveins: %r31
successors: %bb.1, %bb.2
J2_jumpt undef %p0, %bb.2, implicit-def %pc
J2_jump %bb.1, implicit-def %pc
bb.1:
liveins: %r31
successors: %bb.3
%r1 = A2_tfrsi 1
PS_storerhabs 0, undef %r0
%r0 = A2_tfrsi 1
J2_jump %bb.3, implicit-def %pc
bb.2:
liveins: %r31
successors: %bb.3
%r0 = L2_loadruh_io undef %r1, 0
PS_storerhabs 0, killed %r0
%r0 = A2_tfrsi 1
J2_jump %bb.3, implicit-def %pc
bb.3:
liveins: %r31
PS_jmpret killed %r31, implicit undef %r0, implicit-def %pc
...

View File

@ -13,13 +13,13 @@ tracksRegLiveness: true
body: |
bb.0:
liveins: %p2, %r0
liveins: %p0:0x1, %p2, %r0
successors: %bb.1, %bb.2
J2_jumpt killed %p2, %bb.1, implicit-def %pc
J2_jump %bb.2, implicit-def %pc
bb.1:
liveins: %r0, %r19
liveins: %p0:0x1, %r0, %r19
successors: %bb.3
%r2 = A2_tfrsi 4
%r1 = COPY %r19
@ -28,7 +28,7 @@ body: |
J2_jump %bb.3, implicit-def %pc
bb.2:
liveins: %r0, %r18
liveins: %p0:0x1, %r0, %r18
successors: %bb.3
%r2 = A2_tfrsi 5
%r1 = L2_loadrh_io %r18, 0