mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-26 05:00:26 +00:00
- CodeGenPrepare does not split loop back edges but it only knows about back edges of single block loops. It now does a DFS walk to find loop back edges.
- Use SplitBlockPredecessors to factor out common predecessors of the critical edge destination. This is disabled for now due to some regressions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@61248 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
a33649e98c
commit
ab63152871
@ -30,6 +30,7 @@
|
|||||||
#include "llvm/ADT/DenseMap.h"
|
#include "llvm/ADT/DenseMap.h"
|
||||||
#include "llvm/ADT/SmallSet.h"
|
#include "llvm/ADT/SmallSet.h"
|
||||||
#include "llvm/Support/CallSite.h"
|
#include "llvm/Support/CallSite.h"
|
||||||
|
#include "llvm/Support/CommandLine.h"
|
||||||
#include "llvm/Support/Compiler.h"
|
#include "llvm/Support/Compiler.h"
|
||||||
#include "llvm/Support/Debug.h"
|
#include "llvm/Support/Debug.h"
|
||||||
#include "llvm/Support/GetElementPtrTypeIterator.h"
|
#include "llvm/Support/GetElementPtrTypeIterator.h"
|
||||||
@ -37,11 +38,18 @@
|
|||||||
using namespace llvm;
|
using namespace llvm;
|
||||||
using namespace llvm::PatternMatch;
|
using namespace llvm::PatternMatch;
|
||||||
|
|
||||||
|
static cl::opt<bool> FactorCommonPreds("split-critical-paths-tweak",
|
||||||
|
cl::init(false), cl::Hidden);
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
class VISIBILITY_HIDDEN CodeGenPrepare : public FunctionPass {
|
class VISIBILITY_HIDDEN CodeGenPrepare : public FunctionPass {
|
||||||
/// TLI - Keep a pointer of a TargetLowering to consult for determining
|
/// TLI - Keep a pointer of a TargetLowering to consult for determining
|
||||||
/// transformation profitability.
|
/// transformation profitability.
|
||||||
const TargetLowering *TLI;
|
const TargetLowering *TLI;
|
||||||
|
|
||||||
|
/// BackEdges - Keep a set of all the loop back edges.
|
||||||
|
///
|
||||||
|
SmallSet<std::pair<BasicBlock*,BasicBlock*>, 8> BackEdges;
|
||||||
public:
|
public:
|
||||||
static char ID; // Pass identification, replacement for typeid
|
static char ID; // Pass identification, replacement for typeid
|
||||||
explicit CodeGenPrepare(const TargetLowering *tli = 0)
|
explicit CodeGenPrepare(const TargetLowering *tli = 0)
|
||||||
@ -58,6 +66,7 @@ namespace {
|
|||||||
bool OptimizeInlineAsmInst(Instruction *I, CallSite CS,
|
bool OptimizeInlineAsmInst(Instruction *I, CallSite CS,
|
||||||
DenseMap<Value*,Value*> &SunkAddrs);
|
DenseMap<Value*,Value*> &SunkAddrs);
|
||||||
bool OptimizeExtUses(Instruction *I);
|
bool OptimizeExtUses(Instruction *I);
|
||||||
|
void findLoopBackEdges(Function &F);
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -69,10 +78,55 @@ FunctionPass *llvm::createCodeGenPreparePass(const TargetLowering *TLI) {
|
|||||||
return new CodeGenPrepare(TLI);
|
return new CodeGenPrepare(TLI);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// findLoopBackEdges - Do a DFS walk to find loop back edges.
|
||||||
|
///
|
||||||
|
void CodeGenPrepare::findLoopBackEdges(Function &F) {
|
||||||
|
SmallPtrSet<BasicBlock*, 8> Visited;
|
||||||
|
SmallVector<std::pair<BasicBlock*, succ_iterator>, 8> VisitStack;
|
||||||
|
SmallPtrSet<BasicBlock*, 8> InStack;
|
||||||
|
|
||||||
|
BasicBlock *BB = &F.getEntryBlock();
|
||||||
|
if (succ_begin(BB) == succ_end(BB))
|
||||||
|
return;
|
||||||
|
Visited.insert(BB);
|
||||||
|
VisitStack.push_back(std::make_pair(BB, succ_begin(BB)));
|
||||||
|
InStack.insert(BB);
|
||||||
|
do {
|
||||||
|
std::pair<BasicBlock*, succ_iterator> &Top = VisitStack.back();
|
||||||
|
BasicBlock *ParentBB = Top.first;
|
||||||
|
succ_iterator &I = Top.second;
|
||||||
|
|
||||||
|
bool FoundNew = false;
|
||||||
|
while (I != succ_end(ParentBB)) {
|
||||||
|
BB = *I++;
|
||||||
|
if (Visited.insert(BB)) {
|
||||||
|
FoundNew = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
// Successor is in VisitStack, it's a back edge.
|
||||||
|
if (InStack.count(BB))
|
||||||
|
BackEdges.insert(std::make_pair(ParentBB, BB));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (FoundNew) {
|
||||||
|
// Go down one level if there is a unvisited successor.
|
||||||
|
InStack.insert(BB);
|
||||||
|
VisitStack.push_back(std::make_pair(BB, succ_begin(BB)));
|
||||||
|
} else {
|
||||||
|
// Go up one level.
|
||||||
|
std::pair<BasicBlock*, succ_iterator> &Pop = VisitStack.back();
|
||||||
|
InStack.erase(Pop.first);
|
||||||
|
VisitStack.pop_back();
|
||||||
|
}
|
||||||
|
} while (!VisitStack.empty());
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
bool CodeGenPrepare::runOnFunction(Function &F) {
|
bool CodeGenPrepare::runOnFunction(Function &F) {
|
||||||
bool EverMadeChange = false;
|
bool EverMadeChange = false;
|
||||||
|
|
||||||
|
findLoopBackEdges(F);
|
||||||
|
|
||||||
// First pass, eliminate blocks that contain only PHI nodes and an
|
// First pass, eliminate blocks that contain only PHI nodes and an
|
||||||
// unconditional branch.
|
// unconditional branch.
|
||||||
EverMadeChange |= EliminateMostlyEmptyBlocks(F);
|
EverMadeChange |= EliminateMostlyEmptyBlocks(F);
|
||||||
@ -262,7 +316,9 @@ void CodeGenPrepare::EliminateMostlyEmptyBlock(BasicBlock *BB) {
|
|||||||
/// phi nodes (otherwise critical edges are ok). If there is already another
|
/// phi nodes (otherwise critical edges are ok). If there is already another
|
||||||
/// predecessor of the succ that is empty (and thus has no phi nodes), use it
|
/// predecessor of the succ that is empty (and thus has no phi nodes), use it
|
||||||
/// instead of introducing a new block.
|
/// instead of introducing a new block.
|
||||||
static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum, Pass *P) {
|
static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum,
|
||||||
|
SmallSet<std::pair<BasicBlock*,BasicBlock*>, 8> &BackEdges,
|
||||||
|
Pass *P) {
|
||||||
BasicBlock *TIBB = TI->getParent();
|
BasicBlock *TIBB = TI->getParent();
|
||||||
BasicBlock *Dest = TI->getSuccessor(SuccNum);
|
BasicBlock *Dest = TI->getSuccessor(SuccNum);
|
||||||
assert(isa<PHINode>(Dest->begin()) &&
|
assert(isa<PHINode>(Dest->begin()) &&
|
||||||
@ -271,9 +327,10 @@ static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum, Pass *P) {
|
|||||||
// As a hack, never split backedges of loops. Even though the copy for any
|
// As a hack, never split backedges of loops. Even though the copy for any
|
||||||
// PHIs inserted on the backedge would be dead for exits from the loop, we
|
// PHIs inserted on the backedge would be dead for exits from the loop, we
|
||||||
// assume that the cost of *splitting* the backedge would be too high.
|
// assume that the cost of *splitting* the backedge would be too high.
|
||||||
if (Dest == TIBB)
|
if (BackEdges.count(std::make_pair(TIBB, Dest)))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
if (!FactorCommonPreds) {
|
||||||
/// TIPHIValues - This array is lazily computed to determine the values of
|
/// TIPHIValues - This array is lazily computed to determine the values of
|
||||||
/// PHIs in Dest that TI would provide.
|
/// PHIs in Dest that TI would provide.
|
||||||
SmallVector<Value*, 32> TIPHIValues;
|
SmallVector<Value*, 32> TIPHIValues;
|
||||||
@ -318,8 +375,42 @@ static void SplitEdgeNicely(TerminatorInst *TI, unsigned SuccNum, Pass *P) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
SplitCriticalEdge(TI, SuccNum, P, true);
|
SplitCriticalEdge(TI, SuccNum, P, true);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
PHINode *PN;
|
||||||
|
SmallVector<Value*, 8> TIPHIValues;
|
||||||
|
for (BasicBlock::iterator I = Dest->begin();
|
||||||
|
(PN = dyn_cast<PHINode>(I)); ++I)
|
||||||
|
TIPHIValues.push_back(PN->getIncomingValueForBlock(TIBB));
|
||||||
|
|
||||||
|
SmallVector<BasicBlock*, 8> IdenticalPreds;
|
||||||
|
for (pred_iterator PI = pred_begin(Dest), E = pred_end(Dest); PI != E; ++PI) {
|
||||||
|
BasicBlock *Pred = *PI;
|
||||||
|
if (BackEdges.count(std::make_pair(Pred, Dest)))
|
||||||
|
continue;
|
||||||
|
if (PI == TIBB)
|
||||||
|
IdenticalPreds.push_back(Pred);
|
||||||
|
else {
|
||||||
|
bool Identical = true;
|
||||||
|
unsigned PHINo = 0;
|
||||||
|
for (BasicBlock::iterator I = Dest->begin();
|
||||||
|
(PN = dyn_cast<PHINode>(I)); ++I, ++PHINo)
|
||||||
|
if (TIPHIValues[PHINo] != PN->getIncomingValueForBlock(Pred)) {
|
||||||
|
Identical = false;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if (Identical)
|
||||||
|
IdenticalPreds.push_back(Pred);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
assert(!IdenticalPreds.empty());
|
||||||
|
SplitBlockPredecessors(Dest, &IdenticalPreds[0], IdenticalPreds.size(),
|
||||||
|
".critedge", P);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop
|
/// OptimizeNoopCopyExpression - If the specified cast instruction is a noop
|
||||||
/// copy (e.g. it's casting from one pointer type to another, int->uint, or
|
/// copy (e.g. it's casting from one pointer type to another, int->uint, or
|
||||||
/// int->sbyte on PPC), sink it into user blocks to reduce the number of virtual
|
/// int->sbyte on PPC), sink it into user blocks to reduce the number of virtual
|
||||||
@ -1350,16 +1441,15 @@ bool CodeGenPrepare::OptimizeExtUses(Instruction *I) {
|
|||||||
bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
|
bool CodeGenPrepare::OptimizeBlock(BasicBlock &BB) {
|
||||||
bool MadeChange = false;
|
bool MadeChange = false;
|
||||||
|
|
||||||
// Split all critical edges where the dest block has a PHI and where the phi
|
// Split all critical edges where the dest block has a PHI.
|
||||||
// has shared immediate operands.
|
|
||||||
TerminatorInst *BBTI = BB.getTerminator();
|
TerminatorInst *BBTI = BB.getTerminator();
|
||||||
if (BBTI->getNumSuccessors() > 1) {
|
if (BBTI->getNumSuccessors() > 1) {
|
||||||
for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i)
|
for (unsigned i = 0, e = BBTI->getNumSuccessors(); i != e; ++i) {
|
||||||
if (isa<PHINode>(BBTI->getSuccessor(i)->begin()) &&
|
BasicBlock *SuccBB = BBTI->getSuccessor(i);
|
||||||
isCriticalEdge(BBTI, i, true))
|
if (isa<PHINode>(SuccBB->begin()) && isCriticalEdge(BBTI, i, true))
|
||||||
SplitEdgeNicely(BBTI, i, this);
|
SplitEdgeNicely(BBTI, i, BackEdges, this);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Keep track of non-local addresses that have been sunk into this block.
|
// Keep track of non-local addresses that have been sunk into this block.
|
||||||
// This allows us to avoid inserting duplicate code for blocks with multiple
|
// This allows us to avoid inserting duplicate code for blocks with multiple
|
||||||
|
50
test/CodeGen/X86/critical-edge-split.ll
Normal file
50
test/CodeGen/X86/critical-edge-split.ll
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
; RUN: llvm-as < %s | llc -mtriple=i386-apple-darwin -stats -info-output-file - | grep asm-printer | grep 31
|
||||||
|
|
||||||
|
%CC = type { %Register }
|
||||||
|
%II = type { %"struct.XX::II::$_74" }
|
||||||
|
%JITFunction = type %YYValue* (%CC*, %YYValue**)
|
||||||
|
%YYValue = type { i32 (...)** }
|
||||||
|
%Register = type { %"struct.XX::ByteCodeFeatures" }
|
||||||
|
%"struct.XX::ByteCodeFeatures" = type { i32 }
|
||||||
|
%"struct.XX::II::$_74" = type { i8* }
|
||||||
|
@llvm.used = appending global [1 x i8*] [ i8* bitcast (%JITFunction* @loop to i8*) ], section "llvm.metadata" ; <[1 x i8*]*> [#uses=0]
|
||||||
|
|
||||||
|
define %YYValue* @loop(%CC*, %YYValue**) nounwind {
|
||||||
|
; <label>:2
|
||||||
|
%3 = getelementptr %CC* %0, i32 -9 ; <%CC*> [#uses=1]
|
||||||
|
%4 = bitcast %CC* %3 to %YYValue** ; <%YYValue**> [#uses=2]
|
||||||
|
%5 = load %YYValue** %4 ; <%YYValue*> [#uses=3]
|
||||||
|
%unique_1.i = ptrtoint %YYValue* %5 to i1 ; <i1> [#uses=1]
|
||||||
|
br i1 %unique_1.i, label %loop, label %11
|
||||||
|
|
||||||
|
loop: ; preds = %6, %2
|
||||||
|
%.1 = phi %YYValue* [ inttoptr (i32 1 to %YYValue*), %2 ], [ %intAddValue, %6 ] ; <%YYValue*> [#uses=3]
|
||||||
|
%immediateCmp = icmp slt %YYValue* %.1, %5 ; <i1> [#uses=1]
|
||||||
|
br i1 %immediateCmp, label %6, label %8
|
||||||
|
|
||||||
|
; <label>:6 ; preds = %loop
|
||||||
|
%lhsInt = ptrtoint %YYValue* %.1 to i32 ; <i32> [#uses=1]
|
||||||
|
%7 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %lhsInt, i32 2) ; <{ i32, i1 }> [#uses=2]
|
||||||
|
%intAdd = extractvalue { i32, i1 } %7, 0 ; <i32> [#uses=1]
|
||||||
|
%intAddValue = inttoptr i32 %intAdd to %YYValue* ; <%YYValue*> [#uses=1]
|
||||||
|
%intAddOverflow = extractvalue { i32, i1 } %7, 1 ; <i1> [#uses=1]
|
||||||
|
br i1 %intAddOverflow, label %.loopexit, label %loop
|
||||||
|
|
||||||
|
; <label>:8 ; preds = %loop
|
||||||
|
ret %YYValue* inttoptr (i32 10 to %YYValue*)
|
||||||
|
|
||||||
|
.loopexit: ; preds = %6
|
||||||
|
%9 = bitcast %CC* %0 to %YYValue** ; <%YYValue**> [#uses=1]
|
||||||
|
store %YYValue* %.1, %YYValue** %9
|
||||||
|
store %YYValue* %5, %YYValue** %4
|
||||||
|
%10 = call fastcc %YYValue* @foobar(%II* inttoptr (i32 3431104 to %II*), %CC* %0, %YYValue** %1) ; <%YYValue*> [#uses=1]
|
||||||
|
ret %YYValue* %10
|
||||||
|
|
||||||
|
; <label>:11 ; preds = %2
|
||||||
|
%12 = call fastcc %YYValue* @foobar(%II* inttoptr (i32 3431080 to %II*), %CC* %0, %YYValue** %1) ; <%YYValue*> [#uses=1]
|
||||||
|
ret %YYValue* %12
|
||||||
|
}
|
||||||
|
|
||||||
|
declare fastcc %YYValue* @foobar(%II*, %CC*, %YYValue**) nounwind
|
||||||
|
|
||||||
|
declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) nounwind
|
@ -1,4 +1,4 @@
|
|||||||
; RUN: llvm-as < %s | llc -march=x86 | grep xor | count 2
|
; RUN: llvm-as < %s | llc -march=x86 | grep xor | count 1
|
||||||
|
|
||||||
%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
|
%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
|
||||||
%struct.ImgT = type { i8, i8*, i8*, %struct.FILE*, i32, i32, i32, i32, i8*, double*, float*, float*, float*, i32*, double, double, i32*, double*, i32*, i32* }
|
%struct.ImgT = type { i8, i8*, i8*, %struct.FILE*, i32, i32, i32, i32, i8*, double*, float*, float*, float*, i32*, double, double, i32*, double*, i32*, i32* }
|
||||||
|
Loading…
Reference in New Issue
Block a user