diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp index 1f7625d3049..9590df966e6 100644 --- a/lib/Transforms/IPO/Inliner.cpp +++ b/lib/Transforms/IPO/Inliner.cpp @@ -19,6 +19,7 @@ #include "llvm/IntrinsicInst.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineCost.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Target/TargetData.h" #include "llvm/Transforms/IPO/InlinerPass.h" #include "llvm/Transforms/Utils/Cloning.h" @@ -327,6 +328,37 @@ static bool InlineHistoryIncludes(Function *F, int InlineHistoryID, return false; } +/// \brief Simplify arguments going into a particular callsite. +/// +/// This is important to do each time we add a callsite due to inlining so that +/// constants and other entities which feed into inline cost estimation are +/// properly recognized when analyzing the new callsite. Consider: +/// void outer(int x) { +/// if (x < 42) +/// return inner(42 - x); +/// ... +/// } +/// void inner(int x) { +/// ... +/// } +/// +/// The inliner gives calls to 'outer' with a constant argument a bonus because +/// it will delete one side of a branch. But the resulting call to 'inner' +/// will, after inlining, also have a constant operand. We need to do just +/// enough constant folding to expose this for callsite arguments. The rest +/// will be taken care of after the inliner finishes running. +static void simplifyCallSiteArguments(const TargetData *TD, CallSite CS) { + // FIXME: It would be nice to avoid this smallvector if RAUW doesn't + // invalidate operand iterators in any cases. + SmallVector, 4> SimplifiedArgs; + for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); + I != E; ++I) + if (Instruction *Inst = dyn_cast(*I)) + if (Value *SimpleArg = SimplifyInstruction(Inst, TD)) + SimplifiedArgs.push_back(std::make_pair(Inst, SimpleArg)); + for (unsigned Idx = 0, Size = SimplifiedArgs.size(); Idx != Size; ++Idx) + SimplifiedArgs[Idx].first->replaceAllUsesWith(SimplifiedArgs[Idx].second); +} bool Inliner::runOnSCC(CallGraphSCC &SCC) { CallGraph &CG = getAnalysis(); @@ -455,7 +487,9 @@ bool Inliner::runOnSCC(CallGraphSCC &SCC) { for (unsigned i = 0, e = InlineInfo.InlinedCalls.size(); i != e; ++i) { Value *Ptr = InlineInfo.InlinedCalls[i]; - CallSites.push_back(std::make_pair(CallSite(Ptr), NewHistoryID)); + CallSite NewCS = Ptr; + simplifyCallSiteArguments(TD, NewCS); + CallSites.push_back(std::make_pair(NewCS, NewHistoryID)); } } diff --git a/test/Transforms/Inline/inline_constprop.ll b/test/Transforms/Inline/inline_constprop.ll index 18edf15ab45..cc7aaac2b3a 100644 --- a/test/Transforms/Inline/inline_constprop.ll +++ b/test/Transforms/Inline/inline_constprop.ll @@ -12,3 +12,78 @@ define i32 @caller1() { %X = call i32 @callee1( i32 10, i32 3 ) ret i32 %X } + +define i32 @caller2() { +; CHECK: @caller2 +; CHECK-NOT: call void @callee2 +; CHECK: ret + +; We contrive to make this hard for *just* the inline pass to do in order to +; simulate what can actually happen with large, complex functions getting +; inlined. + %a = add i32 42, 0 + %b = add i32 48, 0 + + %x = call i32 @callee21(i32 %a, i32 %b) + ret i32 %x +} + +define i32 @callee21(i32 %x, i32 %y) { + %sub = sub i32 %y, %x + %result = call i32 @callee22(i32 %sub) + ret i32 %result +} + +declare i8* @getptr() + +define i32 @callee22(i32 %x) { + %icmp = icmp ugt i32 %x, 42 + br i1 %icmp, label %bb.true, label %bb.false +bb.true: + ; This block musn't be counted in the inline cost. + %ptr = call i8* @getptr() + load volatile i8* %ptr + load volatile i8* %ptr + load volatile i8* %ptr + load volatile i8* %ptr + load volatile i8* %ptr + load volatile i8* %ptr + load volatile i8* %ptr + load volatile i8* %ptr + load volatile i8* %ptr + load volatile i8* %ptr + load volatile i8* %ptr + load volatile i8* %ptr + load volatile i8* %ptr + load volatile i8* %ptr + load volatile i8* %ptr + load volatile i8* %ptr + load volatile i8* %ptr + load volatile i8* %ptr + load volatile i8* %ptr + load volatile i8* %ptr + load volatile i8* %ptr + load volatile i8* %ptr + load volatile i8* %ptr + load volatile i8* %ptr + load volatile i8* %ptr + load volatile i8* %ptr + load volatile i8* %ptr + load volatile i8* %ptr + load volatile i8* %ptr + load volatile i8* %ptr + load volatile i8* %ptr + load volatile i8* %ptr + load volatile i8* %ptr + load volatile i8* %ptr + load volatile i8* %ptr + load volatile i8* %ptr + load volatile i8* %ptr + load volatile i8* %ptr + load volatile i8* %ptr + load volatile i8* %ptr + + ret i32 %x +bb.false: + ret i32 %x +}