diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index 33c7d0172a2..4d4af727576 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -290,6 +290,21 @@ bool Inliner::shouldInline(CallSite CS) { return true; } +/// InlineHistoryIncludes - Return true if the specified inline history ID +/// indicates an inline history that includes the specified function. +static bool InlineHistoryIncludes(Function *F, int InlineHistoryID, + const SmallVectorImpl > &InlineHistory) { + while (InlineHistoryID != -1) { + assert(unsigned(InlineHistoryID) < InlineHistory.size() && + "Invalid inline history ID"); + if (InlineHistory[InlineHistoryID].first == F) + return true; + InlineHistoryID = InlineHistory[InlineHistoryID].second; + } + return false; +} + + bool Inliner::runOnSCC(CallGraphSCC &SCC) { CallGraph &CG = getAnalysis(); const TargetData *TD = getAnalysisIfAvailable(); @@ -305,7 +320,13 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { // Scan through and identify all call sites ahead of time so that we only // inline call sites in the original functions, not call sites that result // from inlining other functions. - SmallVector CallSites; + SmallVector, 16> CallSites; + + // When inlining a callee produces new call sites, we want to keep track of + // the fact that they were inlined from the callee. This allows us to avoid + // infinite inlining in some obscure cases. To represent this, we use an + // index into the InlineHistory vector. + SmallVector, 8> InlineHistory; for (CallGraphSCC::iterator I = SCC.begin(), E = SCC.end(); I != E; ++I) { Function *F = (*I)->getFunction(); @@ -325,7 +346,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { if (CS.getCalledFunction() && CS.getCalledFunction()->isDeclaration()) continue; - CallSites.push_back(CS); + CallSites.push_back(std::make_pair(CS, -1)); } } @@ -339,7 +360,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { // current SCC to the end of the list. unsigned FirstCallInSCC = CallSites.size(); for (unsigned i = 0; i < FirstCallInSCC; ++i) - if (Function *F = CallSites[i].getCalledFunction()) + if (Function *F = CallSites[i].first.getCalledFunction()) if (SCCFunctions.count(F)) std::swap(CallSites[i--], CallSites[--FirstCallInSCC]); @@ -356,7 +377,7 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { // Iterate over the outer loop because inlining functions can cause indirect // calls to become direct calls. for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) { - CallSite CS = CallSites[CSi]; + CallSite CS = CallSites[CSi].first; Function *Caller = CS.getCaller(); Function *Callee = CS.getCalledFunction(); @@ -378,6 +399,17 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { // We can only inline direct calls to non-declarations. if (Callee == 0 || Callee->isDeclaration()) continue; + // If this call sites was obtained by inlining another function, verify + // that the include path for the function did not include the callee + // itself. If so, we'd be recursively inlinling the same function, + // which would provide the same callsites, which would cause us to + // infinitely inline. + int InlineHistoryID = CallSites[CSi].second; + if (InlineHistoryID != -1 && + InlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) + continue; + + // If the policy determines that we should inline this function, // try to do so. if (!shouldInline(CS)) @@ -387,13 +419,20 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { if (!InlineCallIfPossible(CS, InlineInfo, InlinedArrayAllocas)) continue; ++NumInlined; - + // If inlining this function devirtualized any call sites, throw them // onto our worklist to process. They are useful inline candidates. - for (unsigned i = 0, e = InlineInfo.DevirtualizedCalls.size(); - i != e; ++i) { - Value *Ptr = InlineInfo.DevirtualizedCalls[i]; - CallSites.push_back(CallSite(Ptr)); + if (!InlineInfo.DevirtualizedCalls.empty()) { + // Create a new inline history entry for this, so that we remember + // that these new callsites came about due to inlining Callee. + int NewHistoryID = InlineHistory.size(); + InlineHistory.push_back(std::make_pair(Callee, InlineHistoryID)); + + for (unsigned i = 0, e = InlineInfo.DevirtualizedCalls.size(); + i != e; ++i) { + Value *Ptr = InlineInfo.DevirtualizedCalls[i]; + CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID)); + } } // Update the cached cost info with the inlined call. diff --git a/test/Transforms/Inline/noinline-recursive-fn.ll b/test/Transforms/Inline/noinline-recursive-fn.ll index dcae0243300..1d5ebbbf0fa 100644 --- a/test/Transforms/Inline/noinline-recursive-fn.ll +++ b/test/Transforms/Inline/noinline-recursive-fn.ll @@ -2,7 +2,7 @@ ; This effectively is just peeling off the first iteration of a loop, and the ; inliner heuristics are not set up for this. -; RUN: opt -inline %s -S | grep "call void @foo(i32 42)" +; RUN: opt -inline %s -S | FileCheck %s target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" target triple = "x86_64-apple-darwin10.3" @@ -11,7 +11,6 @@ target triple = "x86_64-apple-darwin10.3" define internal void @foo(i32 %x) nounwind ssp { entry: - %"alloca point" = bitcast i32 0 to i32 ; [#uses=0] %0 = icmp slt i32 %x, 0 ; [#uses=1] br i1 %0, label %return, label %bb @@ -25,8 +24,50 @@ return: ; preds = %entry ret void } + +;; CHECK: @bonk +;; CHECK: call void @foo(i32 42) define void @bonk() nounwind ssp { entry: call void @foo(i32 42) nounwind ssp ret void } + + + +;; Here is an indirect case that should not be infinitely inlined. + +define internal void @f1(i32 %x, i8* %Foo, i8* %Bar) nounwind ssp { +entry: + %0 = bitcast i8* %Bar to void (i32, i8*, i8*)* + %1 = sub nsw i32 %x, 1 + call void %0(i32 %1, i8* %Foo, i8* %Bar) nounwind + volatile store i32 42, i32* @g, align 4 + ret void +} + +define internal void @f2(i32 %x, i8* %Foo, i8* %Bar) nounwind ssp { +entry: + %0 = icmp slt i32 %x, 0 ; [#uses=1] + br i1 %0, label %return, label %bb + +bb: ; preds = %entry + %1 = bitcast i8* %Foo to void (i32, i8*, i8*)* ; [#uses=1] + call void %1(i32 %x, i8* %Foo, i8* %Bar) nounwind + volatile store i32 13, i32* @g, align 4 + ret void + +return: ; preds = %entry + ret void +} + + +; CHECK: @top_level +; CHECK: call void @f2(i32 122 +; Here we inline one instance of the cycle, but we don't want to completely +; unroll it. +define void @top_level() nounwind ssp { +entry: + call void @f2(i32 123, i8* bitcast (void (i32, i8*, i8*)* @f1 to i8*), i8* bitcast (void (i32, i8*, i8*)* @f2 to i8*)) nounwind ssp + ret void +}