From 24947af01331cce712f4b9a549a958f66da50820 Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Thu, 29 May 2003 15:11:31 +0000 Subject: [PATCH] * Separate all of the grunt work of inlining out into the Utils library. * Make the function inliner _significantly_ smarter. :) llvm-svn: 6396 --- lib/Transforms/IPO/InlineSimple.cpp | 340 +++++++++++------------- lib/Transforms/Utils/InlineFunction.cpp | 164 ++++++++++++ 2 files changed, 313 insertions(+), 191 deletions(-) create mode 100644 lib/Transforms/Utils/InlineFunction.cpp diff --git a/lib/Transforms/IPO/InlineSimple.cpp b/lib/Transforms/IPO/InlineSimple.cpp index a45c4546f82..0151a2d2ac6 100644 --- a/lib/Transforms/IPO/InlineSimple.cpp +++ b/lib/Transforms/IPO/InlineSimple.cpp @@ -1,15 +1,6 @@ //===- FunctionInlining.cpp - Code to perform function inlining -----------===// // -// This file implements inlining of functions. -// -// Specifically, this: -// * Exports functionality to inline any function call -// * Inlines functions that consist of a single basic block -// * Is able to inline ANY function call -// . Has a smart heuristic for when to inline a function -// -// FIXME: This pass should transform alloca instructions in the called function -// into malloc/free pairs! Or perhaps it should refuse to inline them! +// This file implements bottom-up inlining of functions into callees. // //===----------------------------------------------------------------------===// @@ -17,194 +8,161 @@ #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Module.h" #include "llvm/Pass.h" -#include "llvm/iTerminators.h" -#include "llvm/iPHINode.h" #include "llvm/iOther.h" -#include "llvm/DerivedTypes.h" +#include "llvm/iMemory.h" #include "Support/Statistic.h" -#include - -static Statistic<> NumInlined("inline", "Number of functions inlined"); - -// InlineFunction - This function forcibly inlines the called function into the -// basic block of the caller. This returns false if it is not possible to -// inline this call. The program is still in a well defined state if this -// occurs though. -// -// Note that this only does one level of inlining. For example, if the -// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now -// exists in the instruction stream. Similiarly this will inline a recursive -// function by one level. -// -bool InlineFunction(CallInst *CI) { - assert(isa(CI) && "InlineFunction only works on CallInst nodes"); - assert(CI->getParent() && "Instruction not embedded in basic block!"); - assert(CI->getParent()->getParent() && "Instruction not in function!"); - - const Function *CalledFunc = CI->getCalledFunction(); - if (CalledFunc == 0 || // Can't inline external function or indirect - CalledFunc->isExternal() || // call, or call to a vararg function! - CalledFunc->getFunctionType()->isVarArg()) return false; - - //std::cerr << "Inlining " << CalledFunc->getName() << " into " - // << CurrentMeth->getName() << "\n"; - - BasicBlock *OrigBB = CI->getParent(); - - // Call splitBasicBlock - The original basic block now ends at the instruction - // immediately before the call. The original basic block now ends with an - // unconditional branch to NewBB, and NewBB starts with the call instruction. - // - BasicBlock *NewBB = OrigBB->splitBasicBlock(CI); - NewBB->setName("InlinedFunctionReturnNode"); - - // Remove (unlink) the CallInst from the start of the new basic block. - NewBB->getInstList().remove(CI); - - // If we have a return value generated by this call, convert it into a PHI - // node that gets values from each of the old RET instructions in the original - // function. - // - PHINode *PHI = 0; - if (!CI->use_empty()) { - // The PHI node should go at the front of the new basic block to merge all - // possible incoming values. - // - PHI = new PHINode(CalledFunc->getReturnType(), CI->getName(), - NewBB->begin()); - - // Anything that used the result of the function call should now use the PHI - // node as their operand. - // - CI->replaceAllUsesWith(PHI); - } - - // Get a pointer to the last basic block in the function, which will have the - // new function inlined after it. - // - Function::iterator LastBlock = &OrigBB->getParent()->back(); - - // Calculate the vector of arguments to pass into the function cloner... - std::map ValueMap; - assert((unsigned)std::distance(CalledFunc->abegin(), CalledFunc->aend()) == - CI->getNumOperands()-1 && "No varargs calls can be inlined yet!"); - - unsigned i = 1; - for (Function::const_aiterator I = CalledFunc->abegin(), E=CalledFunc->aend(); - I != E; ++I, ++i) - ValueMap[I] = CI->getOperand(i); - - // Since we are now done with the CallInst, we can delete it. - delete CI; - - // Make a vector to capture the return instructions in the cloned function... - std::vector Returns; - - // Populate the value map with all of the globals in the program. - Module &M = *OrigBB->getParent()->getParent(); - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) - ValueMap[I] = I; - for (Module::giterator I = M.gbegin(), E = M.gend(); I != E; ++I) - ValueMap[I] = I; - - // Do all of the hard part of cloning the callee into the caller... - CloneFunctionInto(OrigBB->getParent(), CalledFunc, ValueMap, Returns, ".i"); - - // Loop over all of the return instructions, turning them into unconditional - // branches to the merge point now... - for (unsigned i = 0, e = Returns.size(); i != e; ++i) { - ReturnInst *RI = Returns[i]; - BasicBlock *BB = RI->getParent(); - - // Add a branch to the merge point where the PHI node would live... - new BranchInst(NewBB, RI); - - if (PHI) { // The PHI node should include this value! - assert(RI->getReturnValue() && "Ret should have value!"); - assert(RI->getReturnValue()->getType() == PHI->getType() && - "Ret value not consistent in function!"); - PHI->addIncoming(RI->getReturnValue(), BB); - } - - // Delete the return instruction now - BB->getInstList().erase(RI); - } - - // Check to see if the PHI node only has one argument. This is a common - // case resulting from there only being a single return instruction in the - // function call. Because this is so common, eliminate the PHI node. - // - if (PHI && PHI->getNumIncomingValues() == 1) { - PHI->replaceAllUsesWith(PHI->getIncomingValue(0)); - PHI->getParent()->getInstList().erase(PHI); - } - - // Change the branch that used to go to NewBB to branch to the first basic - // block of the inlined function. - // - TerminatorInst *Br = OrigBB->getTerminator(); - assert(Br && Br->getOpcode() == Instruction::Br && - "splitBasicBlock broken!"); - Br->setOperand(0, ++LastBlock); - return true; -} - -static inline bool ShouldInlineFunction(const CallInst *CI, const Function *F) { - assert(CI->getParent() && CI->getParent()->getParent() && - "Call not embedded into a function!"); - - // Don't inline a recursive call. - if (CI->getParent()->getParent() == F) return false; - - // Don't inline something too big. This is a really crappy heuristic - if (F->size() > 3) return false; - - // Don't inline into something too big. This is a **really** crappy heuristic - if (CI->getParent()->getParent()->size() > 10) return false; - - // Go ahead and try just about anything else. - return true; -} - - -static inline bool DoFunctionInlining(BasicBlock *BB) { - for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ++I) { - if (CallInst *CI = dyn_cast(I)) { - // Check to see if we should inline this function - Function *F = CI->getCalledFunction(); - if (F && ShouldInlineFunction(CI, F)) { - return InlineFunction(CI); - } - } - } - return false; -} - -// doFunctionInlining - Use a heuristic based approach to inline functions that -// seem to look good. -// -static bool doFunctionInlining(Function &F) { - bool Changed = false; - - // Loop through now and inline instructions a basic block at a time... - for (Function::iterator I = F.begin(); I != F.end(); ) - if (DoFunctionInlining(I)) { - ++NumInlined; - Changed = true; - } else { - ++I; - } - - return Changed; -} +#include namespace { - struct FunctionInlining : public FunctionPass { - virtual bool runOnFunction(Function &F) { - return doFunctionInlining(F); + Statistic<> NumInlined("inline", "Number of functions inlined"); + + struct FunctionInlining : public Pass { + virtual bool run(Module &M) { + bool Changed = false; + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + Changed |= doInlining(I); + ProcessedFunctions.clear(); + return Changed; } + + private: + std::set ProcessedFunctions; // Prevent infinite recursion + bool doInlining(Function *F); }; RegisterOpt X("inline", "Function Integration/Inlining"); } Pass *createFunctionInliningPass() { return new FunctionInlining(); } + + +// ShouldInlineFunction - The heuristic used to determine if we should inline +// the function call or not. +// +static inline bool ShouldInlineFunction(const CallInst *CI) { + assert(CI->getParent() && CI->getParent()->getParent() && + "Call not embedded into a function!"); + + const Function *Callee = CI->getCalledFunction(); + if (Callee == 0 || Callee->isExternal()) + return false; // Cannot inline an indirect call... or external function. + + // Don't inline a recursive call. + const Function *Caller = CI->getParent()->getParent(); + if (Caller == Callee) return false; + + // InlineQuality - This value measures how good of an inline candidate this + // call site is to inline. The initial value determines how aggressive the + // inliner is. If this value is negative after the final computation, + // inlining is not performed. + // + int InlineQuality = 200; // FIXME: This is VERY conservative + + // If there is only one call of the function, and it has internal linkage, + // make it almost guaranteed to be inlined. + // + if (Callee->use_size() == 1 && Callee->hasInternalLinkage()) + InlineQuality += 30000; + + // Add to the inline quality for properties that make the call valueable to + // inline. This includes factors that indicate that the result of inlining + // the function will be optimizable. Currently this just looks at arguments + // passed into the function. + // + for (User::const_op_iterator I = CI->op_begin()+1, E = CI->op_end(); + I != E; ++I){ + // Each argument passed in has a cost at both the caller and the callee + // sides. This favors functions that take many arguments over functions + // that take few arguments. + InlineQuality += 20; + + // If this is a function being passed in, it is very likely that we will be + // able to turn an indirect function call into a direct function call. + if (isa(I)) + InlineQuality += 100; + + // If a constant, global variable or alloca is passed in, inlining this + // function is likely to allow significant future optimization possibilities + // (constant propagation, scalar promotion, and scalarization), so encourage + // the inlining of the function. + // + else if (isa(I) || isa(I) || isa(I)) + InlineQuality += 60; + } + + // Now that we have considered all of the factors that make the call site more + // likely to be inlined, look at factors that make us not want to inline it. + // As soon as the inline quality gets negative, bail out. + + // Look at the size of the callee. Each basic block counts as 20 units, and + // each instruction counts as 10. + for (Function::const_iterator BB = Callee->begin(), E = Callee->end(); + BB != E; ++BB) { + InlineQuality -= BB->size()*10 + 20; + if (InlineQuality < 0) return false; + } + + // Don't inline into something too big, which would make it bigger. Here, we + // count each basic block as a single unit. + for (Function::const_iterator BB = Caller->begin(), E = Caller->end(); + BB != E; ++BB) { + --InlineQuality; + if (InlineQuality < 0) return false; + } + + // If we get here, this call site is high enough "quality" to inline. + DEBUG(std::cerr << "Inlining in '" << Caller->getName() + << "', quality = " << InlineQuality << ": " << *CI); + return true; +} + + +// doInlining - Use a heuristic based approach to inline functions that seem to +// look good. +// +bool FunctionInlining::doInlining(Function *F) { + // If we have already processed this function (ie, it is recursive) don't + // revisit. + std::set::iterator PFI = ProcessedFunctions.lower_bound(F); + if (PFI != ProcessedFunctions.end() && *PFI == F) return false; + + // Insert the function in the set so it doesn't get revisited. + ProcessedFunctions.insert(PFI, F); + + bool Changed = false; + for (Function::iterator BB = F->begin(); BB != F->end(); ++BB) + for (BasicBlock::iterator I = BB->begin(); I != BB->end(); ) { + bool ShouldInc = true; + // Found a call instruction? FIXME: This should also handle INVOKEs + if (CallInst *CI = dyn_cast(I)) { + if (Function *Callee = CI->getCalledFunction()) + doInlining(Callee); // Inline in callees before callers! + + // Decide whether we should inline this function... + if (ShouldInlineFunction(CI)) { + // Save an iterator to the instruction before the call if it exists, + // otherwise get an iterator at the end of the block... because the + // call will be destroyed. + // + BasicBlock::iterator SI; + if (I != BB->begin()) { + SI = I; --SI; // Instruction before the call... + } else { + SI = BB->end(); + } + + // Attempt to inline the function... + if (InlineFunction(CI)) { + ++NumInlined; + Changed = true; + // Move to instruction before the call... + I = (SI == BB->end()) ? BB->begin() : SI; + ShouldInc = false; // Don't increment iterator until next time + } + } + } + if (ShouldInc) ++I; + } + + return Changed; +} + diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp new file mode 100644 index 00000000000..e88153e1f8b --- /dev/null +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -0,0 +1,164 @@ +//===- InlineFunction.cpp - Code to perform function inlining -------------===// +// +// This file implements inlining of a function into a call site, resolving +// parameters and the return value as appropriate. +// +// FIXME: This pass should transform alloca instructions in the called function +// into malloc/free pairs! Or perhaps it should refuse to inline them! +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Module.h" +#include "llvm/iTerminators.h" +#include "llvm/iPHINode.h" +#include "llvm/iMemory.h" +#include "llvm/iOther.h" +#include "llvm/DerivedTypes.h" + +// InlineFunction - This function inlines the called function into the basic +// block of the caller. This returns false if it is not possible to inline this +// call. The program is still in a well defined state if this occurs though. +// +// Note that this only does one level of inlining. For example, if the +// instruction 'call B' is inlined, and 'B' calls 'C', then the call to 'C' now +// exists in the instruction stream. Similiarly this will inline a recursive +// function by one level. +// +bool InlineFunction(CallInst *CI) { + assert(isa(CI) && "InlineFunction only works on CallInst nodes"); + assert(CI->getParent() && "Instruction not embedded in basic block!"); + assert(CI->getParent()->getParent() && "Instruction not in function!"); + + const Function *CalledFunc = CI->getCalledFunction(); + if (CalledFunc == 0 || // Can't inline external function or indirect + CalledFunc->isExternal() || // call, or call to a vararg function! + CalledFunc->getFunctionType()->isVarArg()) return false; + + BasicBlock *OrigBB = CI->getParent(); + Function *Caller = OrigBB->getParent(); + + // Call splitBasicBlock - The original basic block now ends at the instruction + // immediately before the call. The original basic block now ends with an + // unconditional branch to NewBB, and NewBB starts with the call instruction. + // + BasicBlock *NewBB = OrigBB->splitBasicBlock(CI); + NewBB->setName(OrigBB->getName()+".split"); + + // Remove (unlink) the CallInst from the start of the new basic block. + NewBB->getInstList().remove(CI); + + // If we have a return value generated by this call, convert it into a PHI + // node that gets values from each of the old RET instructions in the original + // function. + // + PHINode *PHI = 0; + if (!CI->use_empty()) { + // The PHI node should go at the front of the new basic block to merge all + // possible incoming values. + // + PHI = new PHINode(CalledFunc->getReturnType(), CI->getName(), + NewBB->begin()); + + // Anything that used the result of the function call should now use the PHI + // node as their operand. + // + CI->replaceAllUsesWith(PHI); + } + + // Get an iterator to the last basic block in the function, which will have + // the new function inlined after it. + // + Function::iterator LastBlock = &Caller->back(); + + // Calculate the vector of arguments to pass into the function cloner... + std::map ValueMap; + assert((unsigned)std::distance(CalledFunc->abegin(), CalledFunc->aend()) == + CI->getNumOperands()-1 && "No varargs calls can be inlined yet!"); + + unsigned i = 1; + for (Function::const_aiterator I = CalledFunc->abegin(), E=CalledFunc->aend(); + I != E; ++I, ++i) + ValueMap[I] = CI->getOperand(i); + + // Since we are now done with the CallInst, we can delete it. + delete CI; + + // Make a vector to capture the return instructions in the cloned function... + std::vector Returns; + + // Populate the value map with all of the globals in the program. + Module &M = *Caller->getParent(); + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + ValueMap[I] = I; + for (Module::giterator I = M.gbegin(), E = M.gend(); I != E; ++I) + ValueMap[I] = I; + + // Do all of the hard part of cloning the callee into the caller... + CloneFunctionInto(Caller, CalledFunc, ValueMap, Returns, ".i"); + + // Loop over all of the return instructions, turning them into unconditional + // branches to the merge point now... + for (unsigned i = 0, e = Returns.size(); i != e; ++i) { + ReturnInst *RI = Returns[i]; + BasicBlock *BB = RI->getParent(); + + // Add a branch to the merge point where the PHI node would live... + new BranchInst(NewBB, RI); + + if (PHI) { // The PHI node should include this value! + assert(RI->getReturnValue() && "Ret should have value!"); + assert(RI->getReturnValue()->getType() == PHI->getType() && + "Ret value not consistent in function!"); + PHI->addIncoming(RI->getReturnValue(), BB); + } + + // Delete the return instruction now + BB->getInstList().erase(RI); + } + + // Check to see if the PHI node only has one argument. This is a common + // case resulting from there only being a single return instruction in the + // function call. Because this is so common, eliminate the PHI node. + // + if (PHI && PHI->getNumIncomingValues() == 1) { + PHI->replaceAllUsesWith(PHI->getIncomingValue(0)); + PHI->getParent()->getInstList().erase(PHI); + } + + // Change the branch that used to go to NewBB to branch to the first basic + // block of the inlined function. + // + TerminatorInst *Br = OrigBB->getTerminator(); + assert(Br && Br->getOpcode() == Instruction::Br && + "splitBasicBlock broken!"); + Br->setOperand(0, ++LastBlock); + + // If there are any alloca instructions in the block that used to be the entry + // block for the callee, move them to the entry block of the caller. First + // calculate which instruction they should be inserted before. We insert the + // instructions at the end of the current alloca list. + // + BasicBlock::iterator InsertPoint = Caller->begin()->begin(); + while (isa(InsertPoint)) ++InsertPoint; + + for (BasicBlock::iterator I = LastBlock->begin(), E = LastBlock->end(); + I != E; ) + if (AllocaInst *AI = dyn_cast(I)) { + ++I; // Move to the next instruction + LastBlock->getInstList().remove(AI); + Caller->front().getInstList().insert(InsertPoint, AI); + + } else { + ++I; + } + + // Now that the function is correct, make it a little bit nicer. In + // particular, move the basic blocks inserted from the end of the function + // into the space made by splitting the source basic block. + // + Caller->getBasicBlockList().splice(NewBB, Caller->getBasicBlockList(), + LastBlock, Caller->end()); + + return true; +}