mirror of
https://github.com/RPCS3/llvm.git
synced 2025-04-12 19:18:48 +00:00
[WebAssembly] Use dominator information to improve BLOCK placement
Always starting blocks at the top of their containing loops works, but creates unnecessarily deep nesting because it makes all blocks in a loop overlap. Refine the BLOCK placement algorithm to start blocks at nearest common dominating points instead, which significantly shrinks them and reduces overlapping. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@253876 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
8452e84d36
commit
96bd946285
@ -29,6 +29,7 @@
|
||||
#include "MCTargetDesc/WebAssemblyMCTargetDesc.h"
|
||||
#include "WebAssemblySubtarget.h"
|
||||
#include "llvm/ADT/SCCIterator.h"
|
||||
#include "llvm/CodeGen/MachineDominators.h"
|
||||
#include "llvm/CodeGen/MachineFunction.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineLoopInfo.h"
|
||||
@ -47,6 +48,8 @@ class WebAssemblyCFGStackify final : public MachineFunctionPass {
|
||||
|
||||
void getAnalysisUsage(AnalysisUsage &AU) const override {
|
||||
AU.setPreservesCFG();
|
||||
AU.addRequired<MachineDominatorTree>();
|
||||
AU.addPreserved<MachineDominatorTree>();
|
||||
AU.addRequired<MachineLoopInfo>();
|
||||
AU.addPreserved<MachineLoopInfo>();
|
||||
MachineFunctionPass::getAnalysisUsage(AU);
|
||||
@ -107,6 +110,11 @@ struct POStackEntry {
|
||||
};
|
||||
} // end anonymous namespace
|
||||
|
||||
static bool LoopContains(const MachineLoop *Loop,
|
||||
const MachineBasicBlock *MBB) {
|
||||
return Loop ? Loop->contains(MBB) : true;
|
||||
}
|
||||
|
||||
POStackEntry::POStackEntry(MachineBasicBlock *MBB, MachineFunction &MF,
|
||||
const MachineLoopInfo &MLI)
|
||||
: MBB(MBB), Succs(MBB->successors()) {
|
||||
@ -124,11 +132,11 @@ POStackEntry::POStackEntry(MachineBasicBlock *MBB, MachineFunction &MF,
|
||||
|
||||
// Keep loops contiguous by preferring the block that's in the same
|
||||
// loop.
|
||||
MachineLoop *LoopA = MLI.getLoopFor(A);
|
||||
MachineLoop *LoopB = MLI.getLoopFor(B);
|
||||
if (LoopA == Loop && LoopB != Loop)
|
||||
bool LoopContainsA = LoopContains(Loop, A);
|
||||
bool LoopContainsB = LoopContains(Loop, B);
|
||||
if (LoopContainsA && !LoopContainsB)
|
||||
return true;
|
||||
if (LoopA != Loop && LoopB == Loop)
|
||||
if (!LoopContainsA && LoopContainsB)
|
||||
return false;
|
||||
|
||||
// Minimize perturbation by preferring the block which is the immediate
|
||||
@ -192,12 +200,17 @@ static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI) {
|
||||
#ifndef NDEBUG
|
||||
for (auto &MBB : MF)
|
||||
if (MachineLoop *Loop = MLI.getLoopFor(&MBB)) {
|
||||
// Assert that loops are contiguous.
|
||||
assert(Loop->getHeader() == Loop->getTopBlock());
|
||||
assert((Loop->getHeader() == &MBB ||
|
||||
Loop->contains(
|
||||
MLI.getLoopFor(&*prev(MachineFunction::iterator(&MBB))))) &&
|
||||
"Loop isn't contiguous");
|
||||
// Assert that all containing loops are contiguous.
|
||||
for (MachineLoop *L = Loop; L; L = L->getParentLoop()) {
|
||||
if (&MBB == L->getHeader()) {
|
||||
assert(&MBB == L->getTopBlock());
|
||||
} else {
|
||||
assert(&MBB != L->getTopBlock());
|
||||
assert(L->contains(
|
||||
MLI.getLoopFor(&*prev(MachineFunction::iterator(&MBB)))) &&
|
||||
"Loop isn't contiguous");
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Assert that non-loops have no backedge predecessors.
|
||||
for (auto Pred : MBB.predecessors())
|
||||
@ -207,45 +220,68 @@ static void SortBlocks(MachineFunction &MF, const MachineLoopInfo &MLI) {
|
||||
#endif
|
||||
}
|
||||
|
||||
/// Insert BLOCK markers at appropriate places.
|
||||
static void PlaceBlockMarkers(MachineBasicBlock &MBB, MachineBasicBlock &Succ,
|
||||
MachineFunction &MF, const MachineLoopInfo &MLI,
|
||||
const WebAssemblyInstrInfo &TII) {
|
||||
// Backward branches are loop backedges, and we place the LOOP markers
|
||||
// separately. So only consider forward branches here.
|
||||
if (Succ.getNumber() <= MBB.getNumber())
|
||||
static int GetLoopDepth(const MachineLoop *Loop) {
|
||||
return Loop ? Loop->getLoopDepth() : 0;
|
||||
}
|
||||
|
||||
/// Insert a BLOCK marker for branches to MBB (if needed).
|
||||
static void PlaceBlockMarkers(MachineBasicBlock &MBB, MachineFunction &MF,
|
||||
const WebAssemblyInstrInfo &TII,
|
||||
MachineDominatorTree &MDT,
|
||||
const MachineLoopInfo &MLI) {
|
||||
// Place the BLOCK for forward non-fallthrough branches. Put it at the nearest
|
||||
// common dominator of all preceding predecesors so that we minimize the time
|
||||
// that it's on the stack, which reduces overall stack height.
|
||||
MachineBasicBlock *Header = nullptr;
|
||||
bool IsBranchedTo = false;
|
||||
int MBBNumber = MBB.getNumber();
|
||||
for (MachineBasicBlock *Pred : MBB.predecessors())
|
||||
if (Pred->getNumber() < MBBNumber) {
|
||||
Header = Header ? MDT.findNearestCommonDominator(Header, Pred) : Pred;
|
||||
if (!Pred->isLayoutSuccessor(&MBB) ||
|
||||
!(Pred->empty() || !Pred->back().isBarrier()))
|
||||
IsBranchedTo = true;
|
||||
}
|
||||
if (!Header)
|
||||
return;
|
||||
if (!IsBranchedTo)
|
||||
return;
|
||||
|
||||
// Place the BLOCK for a forward branch. For simplicity, we just insert
|
||||
// blocks immediately inside loop boundaries.
|
||||
MachineLoop *Loop = MLI.getLoopFor(&Succ);
|
||||
MachineBasicBlock &Header = *(Loop ? Loop->getHeader() : &MF.front());
|
||||
MachineBasicBlock::iterator InsertPos = Header.begin(), End = Header.end();
|
||||
if (InsertPos != End) {
|
||||
if (InsertPos->getOpcode() == WebAssembly::LOOP)
|
||||
++InsertPos;
|
||||
int SuccNumber = Succ.getNumber();
|
||||
// Position the BLOCK in nesting order.
|
||||
for (; InsertPos != End && InsertPos->getOpcode() == WebAssembly::BLOCK;
|
||||
++InsertPos) {
|
||||
int N = InsertPos->getOperand(0).getMBB()->getNumber();
|
||||
if (N < SuccNumber)
|
||||
break;
|
||||
// If there's already a BLOCK for Succ, we don't need another.
|
||||
if (N == SuccNumber)
|
||||
return;
|
||||
}
|
||||
MachineBasicBlock::iterator InsertPos;
|
||||
MachineLoop *HeaderLoop = MLI.getLoopFor(Header);
|
||||
int MBBLoopDepth = GetLoopDepth(MLI.getLoopFor(&MBB));
|
||||
int HeaderLoopDepth = GetLoopDepth(HeaderLoop);
|
||||
if (HeaderLoopDepth > MBBLoopDepth) {
|
||||
// The nearest common dominating point is more deeply nested. Insert the
|
||||
// BLOCK just above the LOOP.
|
||||
for (int i = 0; i < HeaderLoopDepth - 1 - MBBLoopDepth; ++i)
|
||||
HeaderLoop = HeaderLoop->getParentLoop();
|
||||
Header = HeaderLoop->getHeader();
|
||||
InsertPos = Header->begin();
|
||||
// Don't insert a BLOCK if we can reuse a loop exit label though.
|
||||
if (InsertPos != Header->end() &&
|
||||
InsertPos->getOpcode() == WebAssembly::LOOP &&
|
||||
InsertPos->getOperand(0).getMBB() == &MBB)
|
||||
return;
|
||||
} else {
|
||||
// Insert the BLOCK as late in the block as we can, but before any existing
|
||||
// BLOCKs.
|
||||
InsertPos = Header->getFirstTerminator();
|
||||
while (InsertPos != Header->begin() &&
|
||||
std::prev(InsertPos)->getOpcode() == WebAssembly::BLOCK)
|
||||
--InsertPos;
|
||||
}
|
||||
|
||||
BuildMI(Header, InsertPos, DebugLoc(), TII.get(WebAssembly::BLOCK))
|
||||
.addMBB(&Succ);
|
||||
BuildMI(*Header, InsertPos, DebugLoc(), TII.get(WebAssembly::BLOCK))
|
||||
.addMBB(&MBB);
|
||||
}
|
||||
|
||||
/// Insert LOOP and BLOCK markers at appropriate places.
|
||||
static void PlaceMarkers(MachineFunction &MF, const MachineLoopInfo &MLI,
|
||||
const WebAssemblyInstrInfo &TII) {
|
||||
const WebAssemblyInstrInfo &TII,
|
||||
MachineDominatorTree &MDT) {
|
||||
for (auto &MBB : MF) {
|
||||
// Place the LOOP for loops.
|
||||
// Place the LOOP for MBB if MBB is the header of a loop.
|
||||
if (MachineLoop *Loop = MLI.getLoopFor(&MBB))
|
||||
if (Loop->getHeader() == &MBB) {
|
||||
// The operand of a LOOP is the first block after the loop. If the loop
|
||||
@ -260,20 +296,29 @@ static void PlaceMarkers(MachineFunction &MF, const MachineLoopInfo &MLI,
|
||||
.addMBB(&*Iter);
|
||||
}
|
||||
|
||||
// Check for forward branches and switches that need BLOCKS placed.
|
||||
for (auto &Term : MBB.terminators())
|
||||
for (auto &MO : Term.operands())
|
||||
if (MO.isMBB())
|
||||
PlaceBlockMarkers(MBB, *MO.getMBB(), MF, MLI, TII);
|
||||
// Place the BLOCK for MBB if MBB is branched to from above.
|
||||
PlaceBlockMarkers(MBB, MF, TII, MDT, MLI);
|
||||
}
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
static bool
|
||||
IsOnStack(const SmallVectorImpl<std::pair<MachineBasicBlock *, bool>> &Stack,
|
||||
const MachineBasicBlock *MBB) {
|
||||
for (const auto &Pair : Stack)
|
||||
if (Pair.first == MBB)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
bool WebAssemblyCFGStackify::runOnMachineFunction(MachineFunction &MF) {
|
||||
DEBUG(dbgs() << "********** CFG Stackifying **********\n"
|
||||
"********** Function: "
|
||||
<< MF.getName() << '\n');
|
||||
|
||||
const auto &MLI = getAnalysis<MachineLoopInfo>();
|
||||
auto &MDT = getAnalysis<MachineDominatorTree>();
|
||||
const auto &TII = *MF.getSubtarget<WebAssemblySubtarget>().getInstrInfo();
|
||||
|
||||
// RPO sorting needs all loops to be single-entry.
|
||||
@ -283,7 +328,41 @@ bool WebAssemblyCFGStackify::runOnMachineFunction(MachineFunction &MF) {
|
||||
SortBlocks(MF, MLI);
|
||||
|
||||
// Place the BLOCK and LOOP markers to indicate the beginnings of scopes.
|
||||
PlaceMarkers(MF, MLI, TII);
|
||||
PlaceMarkers(MF, MLI, TII, MDT);
|
||||
|
||||
#ifndef NDEBUG
|
||||
// Verify that block and loop beginnings and endings are in FIFO order, and
|
||||
// that all references to blocks are to blocks on the stack at the point of
|
||||
// the reference.
|
||||
SmallVector<std::pair<MachineBasicBlock *, bool>, 0> Stack;
|
||||
for (auto &MBB : MF) {
|
||||
while (!Stack.empty() && Stack.back().first == &MBB)
|
||||
if (Stack.back().second) {
|
||||
assert(Stack.size() >= 2);
|
||||
Stack.pop_back();
|
||||
Stack.pop_back();
|
||||
} else {
|
||||
assert(Stack.size() >= 1);
|
||||
Stack.pop_back();
|
||||
}
|
||||
for (auto &MI : MBB)
|
||||
switch (MI.getOpcode()) {
|
||||
case WebAssembly::LOOP:
|
||||
Stack.push_back(std::make_pair(&MBB, false));
|
||||
Stack.push_back(std::make_pair(MI.getOperand(0).getMBB(), true));
|
||||
break;
|
||||
case WebAssembly::BLOCK:
|
||||
Stack.push_back(std::make_pair(MI.getOperand(0).getMBB(), false));
|
||||
break;
|
||||
default:
|
||||
for (const MachineOperand &MO : MI.explicit_operands())
|
||||
if (MO.isMBB())
|
||||
assert(IsOnStack(Stack, MO.getMBB()));
|
||||
break;
|
||||
}
|
||||
}
|
||||
assert(Stack.empty());
|
||||
#endif
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -99,9 +99,9 @@ for.end:
|
||||
|
||||
; CHECK-LABEL: doublediamond:
|
||||
; CHECK: block BB3_5{{$}}
|
||||
; CHECK: block BB3_4{{$}}
|
||||
; CHECK: block BB3_2{{$}}
|
||||
; CHECK: br_if $pop{{[0-9]+}}, BB3_2{{$}}
|
||||
; CHECK: block BB3_4{{$}}
|
||||
; CHECK: br_if $pop{{[0-9]+}}, BB3_4{{$}}
|
||||
; CHECK: br BB3_5{{$}}
|
||||
; CHECK: BB3_4:
|
||||
@ -216,8 +216,8 @@ exit:
|
||||
|
||||
; CHECK-LABEL: doubletriangle:
|
||||
; CHECK: block BB9_4{{$}}
|
||||
; CHECK: block BB9_3{{$}}
|
||||
; CHECK: br_if $pop{{[0-9]+}}, BB9_4{{$}}
|
||||
; CHECK: block BB9_3{{$}}
|
||||
; CHECK: br_if $pop{{[0-9]+}}, BB9_3{{$}}
|
||||
; CHECK: BB9_3:
|
||||
; CHECK: BB9_4:
|
||||
@ -271,6 +271,45 @@ exit:
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
; CHECK-LABEL: doublediamond_in_a_loop:
|
||||
; CHECK: BB11_1:
|
||||
; CHECK: loop BB11_7{{$}}
|
||||
; CHECK: block BB11_6{{$}}
|
||||
; CHECK: block BB11_3{{$}}
|
||||
; CHECK: br_if $pop{{.*}}, BB11_3{{$}}
|
||||
; CHECK: br BB11_6{{$}}
|
||||
; CHECK: BB11_3:
|
||||
; CHECK: block BB11_5{{$}}
|
||||
; CHECK: br_if $pop{{.*}}, BB11_5{{$}}
|
||||
; CHECK: br BB11_6{{$}}
|
||||
; CHECK: BB11_5:
|
||||
; CHECK: BB11_6:
|
||||
; CHECK: br BB11_1{{$}}
|
||||
define i32 @doublediamond_in_a_loop(i32 %a, i32 %b, i32* %p) {
|
||||
entry:
|
||||
br label %header
|
||||
header:
|
||||
%c = icmp eq i32 %a, 0
|
||||
%d = icmp eq i32 %b, 0
|
||||
store volatile i32 0, i32* %p
|
||||
br i1 %c, label %true, label %false
|
||||
true:
|
||||
store volatile i32 1, i32* %p
|
||||
br label %exit
|
||||
false:
|
||||
store volatile i32 2, i32* %p
|
||||
br i1 %d, label %ft, label %ff
|
||||
ft:
|
||||
store volatile i32 3, i32* %p
|
||||
br label %exit
|
||||
ff:
|
||||
store volatile i32 4, i32* %p
|
||||
br label %exit
|
||||
exit:
|
||||
store volatile i32 5, i32* %p
|
||||
br label %header
|
||||
}
|
||||
|
||||
; Test that nested loops are handled.
|
||||
|
||||
declare void @bar()
|
||||
|
Loading…
x
Reference in New Issue
Block a user