Recommit r346483: [CallSiteSplitting] Only record conditions up to the IDom(call site).

The underlying problem causing the expensive-check failure was fixed in
rL346769.

llvm-svn: 346843
This commit is contained in:
Florian Hahn 2018-11-14 10:04:30 +00:00
parent 91e29d8455
commit 586ba5447b
4 changed files with 64 additions and 25 deletions

View File

@ -149,14 +149,14 @@ static void recordCondition(CallSite CS, BasicBlock *From, BasicBlock *To,
/// Record ICmp conditions relevant to any argument in CS following Pred's
/// single predecessors. If there are conflicting conditions along a path, like
/// x == 1 and x == 0, the first condition will be used.
/// x == 1 and x == 0, the first condition will be used. We stop once we reach
/// an edge to StopAt.
static void recordConditions(CallSite CS, BasicBlock *Pred,
ConditionsTy &Conditions) {
recordCondition(CS, Pred, CS.getInstruction()->getParent(), Conditions);
ConditionsTy &Conditions, BasicBlock *StopAt) {
BasicBlock *From = Pred;
BasicBlock *To = Pred;
SmallPtrSet<BasicBlock *, 4> Visited;
while (!Visited.count(From->getSinglePredecessor()) &&
while (To != StopAt && !Visited.count(From->getSinglePredecessor()) &&
(From = From->getSinglePredecessor())) {
recordCondition(CS, From, To, Conditions);
Visited.insert(From);
@ -453,15 +453,27 @@ static PredsWithCondsTy shouldSplitOnPHIPredicatedArgument(CallSite CS) {
// Checks if any of the arguments in CS are predicated in a predecessor and
// returns a list of predecessors with the conditions that hold on their edges
// to CS.
static PredsWithCondsTy shouldSplitOnPredicatedArgument(CallSite CS) {
static PredsWithCondsTy shouldSplitOnPredicatedArgument(CallSite CS,
DomTreeUpdater &DTU) {
auto Preds = getTwoPredecessors(CS.getInstruction()->getParent());
if (Preds[0] == Preds[1])
return {};
// We can stop recording conditions once we reached the immediate dominator
// for the block containing the call site. Conditions in predecessors of the
// that node will be the same for all paths to the call site and splitting
// is not beneficial.
assert(DTU.hasDomTree() && "We need a DTU with a valid DT!");
auto *CSDTNode = DTU.getDomTree().getNode(CS.getInstruction()->getParent());
BasicBlock *StopAt = CSDTNode ? CSDTNode->getIDom()->getBlock() : nullptr;
SmallVector<std::pair<BasicBlock *, ConditionsTy>, 2> PredsCS;
for (auto *Pred : make_range(Preds.rbegin(), Preds.rend())) {
ConditionsTy Conditions;
recordConditions(CS, Pred, Conditions);
// Record condition on edge BB(CS) <- Pred
recordCondition(CS, Pred, CS.getInstruction()->getParent(), Conditions);
// Record conditions following Pred's single predecessors.
recordConditions(CS, Pred, Conditions, StopAt);
PredsCS.push_back({Pred, Conditions});
}
@ -479,7 +491,7 @@ static bool tryToSplitCallSite(CallSite CS, TargetTransformInfo &TTI,
if (!CS.arg_size() || !canSplitCallSite(CS, TTI))
return false;
auto PredsWithConds = shouldSplitOnPredicatedArgument(CS);
auto PredsWithConds = shouldSplitOnPredicatedArgument(CS, DTU);
if (PredsWithConds.empty())
PredsWithConds = shouldSplitOnPHIPredicatedArgument(CS);
if (PredsWithConds.empty())
@ -490,9 +502,9 @@ static bool tryToSplitCallSite(CallSite CS, TargetTransformInfo &TTI,
}
static bool doCallSiteSplitting(Function &F, TargetLibraryInfo &TLI,
TargetTransformInfo &TTI, DominatorTree *DT) {
TargetTransformInfo &TTI, DominatorTree &DT) {
DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Lazy);
bool Changed = false;
for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE;) {
BasicBlock &BB = *BI++;
@ -537,6 +549,7 @@ struct CallSiteSplittingLegacyPass : public FunctionPass {
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addRequired<DominatorTreeWrapperPass>();
AU.addPreserved<DominatorTreeWrapperPass>();
FunctionPass::getAnalysisUsage(AU);
}
@ -547,9 +560,8 @@ struct CallSiteSplittingLegacyPass : public FunctionPass {
auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
auto *DTWP = getAnalysisIfAvailable<DominatorTreeWrapperPass>();
return doCallSiteSplitting(F, TLI, TTI,
DTWP ? &DTWP->getDomTree() : nullptr);
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
return doCallSiteSplitting(F, TLI, TTI, DT);
}
};
} // namespace
@ -559,6 +571,7 @@ INITIALIZE_PASS_BEGIN(CallSiteSplittingLegacyPass, "callsite-splitting",
"Call-site splitting", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_END(CallSiteSplittingLegacyPass, "callsite-splitting",
"Call-site splitting", false, false)
FunctionPass *llvm::createCallSiteSplittingPass() {
@ -569,7 +582,7 @@ PreservedAnalyses CallSiteSplittingPass::run(Function &F,
FunctionAnalysisManager &AM) {
auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
auto &TTI = AM.getResult<TargetIRAnalysis>(F);
auto *DT = AM.getCachedResult<DominatorTreeAnalysis>(F);
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
if (!doCallSiteSplitting(F, TLI, TTI, DT))
return PreservedAnalyses::all();

View File

@ -38,13 +38,13 @@
; CHECK-O2-NEXT: Running pass: CallSiteSplittingPass on foo
; CHECK-O2-NEXT: Running analysis: TargetLibraryAnalysis on foo
; CHECK-O2-NEXT: Running analysis: TargetIRAnalysis on foo
; CHECK-O2-NEXT: Running analysis: DominatorTreeAnalysis on foo
; CHECK-O2-NEXT: Finished llvm::Function pass manager run.
; CHECK-O2-NEXT: PGOIndirectCallPromotion
; CHECK-O2-NEXT: Running analysis: ProfileSummaryAnalysis
; CHECK-O2-NEXT: Running analysis: OptimizationRemarkEmitterAnalysis
; CHECK-O2-NEXT: Running pass: IPSCCPPass
; CHECK-O2-DAG: Running analysis: AssumptionAnalysis on foo
; CHECK-O2-DAG: Running analysis: DominatorTreeAnalysis on foo
; CHECK-O2-NEXT: Running analysis: AssumptionAnalysis on foo
; CHECK-O2-NEXT: Running pass: CalledValuePropagationPass
; CHECK-O-NEXT: Running pass: ModuleToPostOrderCGSCCPassAdaptor<{{.*}}PostOrderFunctionAttrsPass>
; CHECK-O-NEXT: Running analysis: InnerAnalysisManagerProxy<{{.*}}SCC

View File

@ -28,6 +28,7 @@
; CHECK-NEXT: Force set function attributes
; CHECK-NEXT: Infer set function attributes
; CHECK-NEXT: FunctionPass Manager
; CHECK-NEXT: Dominator Tree Construction
; CHECK-NEXT: Call-site splitting
; CHECK-NEXT: Interprocedural Sparse Conditional Constant Propagation
; CHECK-NEXT: Unnamed pass: implement Pass::getPassName()

View File

@ -37,9 +37,9 @@ End:
;CHECK-LABEL: @test_eq_eq_eq
;CHECK-LABEL: Header2.split:
;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* null, i32 %v, i32 10)
;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* %a, i32 %v, i32 10)
;CHECK-LABEL: TBB.split:
;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* null, i32 1, i32 %p)
;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* %a, i32 1, i32 %p)
;CHECK-LABEL: Tail
;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Header2.split ], [ %[[CALL2]], %TBB.split ]
;CHECK: ret i32 %[[MERGED]]
@ -123,14 +123,14 @@ End:
;CHECK-LABEL: Header2.split:
;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* nonnull %a, i32 %v, i32 10)
;CHECK-LABEL: TBB.split:
;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* nonnull %a, i32 %v, i32 %p)
;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* %a, i32 %v, i32 %p)
;CHECK-LABEL: Tail
;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Header2.split ], [ %[[CALL2]], %TBB.split ]
;CHECK: ret i32 %[[MERGED]]
define i32 @test_ne_eq_ne(i32* %a, i32 %v, i32 %p) {
Header:
%tobool1 = icmp ne i32* %a, null
br i1 %tobool1, label %Header2, label %End
br i1 %tobool1, label %Header2, label %TBB
Header2:
%tobool2 = icmp eq i32 %p, 10
@ -178,14 +178,14 @@ End:
;CHECK-LABEL: Header2.split:
;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* nonnull %a, i32 %v, i32 %p)
;CHECK-LABEL: TBB.split:
;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* nonnull %a, i32 %v, i32 %p)
;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* %a, i32 %v, i32 %p)
;CHECK-LABEL: Tail
;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Header2.split ], [ %[[CALL2]], %TBB.split ]
;CHECK: ret i32 %[[MERGED]]
define i32 @test_ne_ne_ne_constrain_same_pointer_arg(i32* %a, i32 %v, i32 %p, i32* %a2, i32* %a3) {
Header:
%tobool1 = icmp ne i32* %a, null
br i1 %tobool1, label %Header2, label %End
br i1 %tobool1, label %Header2, label %TBB
Header2:
%tobool2 = icmp ne i32* %a, %a2
@ -235,14 +235,14 @@ End:
;CHECK-LABEL: Header2.split:
;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* nonnull %a, i32 %v, i32 10)
;CHECK-LABEL: TBB.split:
;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* nonnull %a, i32 1, i32 %p)
;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* %a, i32 1, i32 %p)
;CHECK-LABEL: Tail
;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Header2.split ], [ %[[CALL2]], %TBB.split ]
;CHECK: ret i32 %[[MERGED]]
define i32 @test_eq_eq_eq_untaken(i32* %a, i32 %v, i32 %p) {
Header:
%tobool1 = icmp eq i32* %a, null
br i1 %tobool1, label %End, label %Header2
br i1 %tobool1, label %TBB, label %Header2
Header2:
%tobool2 = icmp eq i32 %p, 10
@ -290,14 +290,14 @@ End:
;CHECK-LABEL: Header2.split:
;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* null, i32 %v, i32 10)
;CHECK-LABEL: TBB.split:
;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* null, i32 %v, i32 %p)
;CHECK: %[[CALL2:.*]] = call i32 @callee(i32* %a, i32 %v, i32 %p)
;CHECK-LABEL: Tail
;CHECK: %[[MERGED:.*]] = phi i32 [ %[[CALL1]], %Header2.split ], [ %[[CALL2]], %TBB.split ]
;CHECK: ret i32 %[[MERGED]]
define i32 @test_ne_eq_ne_untaken(i32* %a, i32 %v, i32 %p) {
Header:
%tobool1 = icmp ne i32* %a, null
br i1 %tobool1, label %End, label %Header2
br i1 %tobool1, label %TBB, label %Header2
Header2:
%tobool2 = icmp eq i32 %p, 10
@ -489,6 +489,31 @@ End:
ret i32 %v
}
;CHECK-LABEL: @test_cond_no_effect
;CHECK-NOT: Header.split:
;CHECK-NOT: TBB.split:
;CHECK-LABEL: Tail:
;CHECK: %r = call i32 @callee(i32* %a, i32 %v, i32 0)
;CHECK: ret i32 %r
define i32 @test_cond_no_effect(i32* %a, i32 %v) {
Entry:
%tobool1 = icmp eq i32* %a, null
br i1 %tobool1, label %Header, label %End
Header:
br i1 undef, label %Tail, label %TBB
TBB:
br i1 undef, label %Tail, label %End
Tail:
%r = call i32 @callee(i32* %a, i32 %v, i32 0)
ret i32 %r
End:
ret i32 %v
}
;CHECK-LABEL: @test_unreachable
;CHECK-LABEL: Header.split:
;CHECK: %[[CALL1:.*]] = call i32 @callee(i32* %a, i32 %v, i32 10)