diff --git a/llvm/include/llvm/Analysis/AliasAnalysis.h b/llvm/include/llvm/Analysis/AliasAnalysis.h index 953e15e358f1..68956bcf388a 100644 --- a/llvm/include/llvm/Analysis/AliasAnalysis.h +++ b/llvm/include/llvm/Analysis/AliasAnalysis.h @@ -985,10 +985,6 @@ ImmutablePass *createExternalAAWrapperPass( /// getAnalysisUsage. AAResults createLegacyPMAAResults(Pass &P, Function &F, BasicAAResult &BAR); -/// A helper for the legacy pass manager to populate \p AU to add uses to make -/// sure the analyses required by \p createLegacyPMAAResults are available. -void getAAResultsAnalysisUsage(AnalysisUsage &AU); - } // end namespace llvm #endif // LLVM_ANALYSIS_ALIASANALYSIS_H diff --git a/llvm/include/llvm/Transforms/IPO/Inliner.h b/llvm/include/llvm/Transforms/IPO/Inliner.h index 1e154eb8f5da..f1d1e86f3cab 100644 --- a/llvm/include/llvm/Transforms/IPO/Inliner.h +++ b/llvm/include/llvm/Transforms/IPO/Inliner.h @@ -23,62 +23,6 @@ class AssumptionCacheTracker; class CallGraph; class ProfileSummaryInfo; -/// This class contains all of the helper code which is used to perform the -/// inlining operations that do not depend on the policy. It contains the core -/// bottom-up inlining infrastructure that specific inliner passes use. -struct LegacyInlinerBase : public CallGraphSCCPass { - explicit LegacyInlinerBase(char &ID); - explicit LegacyInlinerBase(char &ID, bool InsertLifetime); - - /// For this class, we declare that we require and preserve the call graph. - /// If the derived class implements this method, it should always explicitly - /// call the implementation here. - void getAnalysisUsage(AnalysisUsage &Info) const override; - - using llvm::Pass::doInitialization; - - bool doInitialization(CallGraph &CG) override; - - /// Main run interface method, this implements the interface required by the - /// Pass class. - bool runOnSCC(CallGraphSCC &SCC) override; - - using llvm::Pass::doFinalization; - - /// Remove now-dead linkonce functions at the end of processing to avoid - /// breaking the SCC traversal. - bool doFinalization(CallGraph &CG) override; - - /// This method must be implemented by the subclass to determine the cost of - /// inlining the specified call site. If the cost returned is greater than - /// the current inline threshold, the call site is not inlined. - virtual InlineCost getInlineCost(CallBase &CB) = 0; - - /// Remove dead functions. - /// - /// This also includes a hack in the form of the 'AlwaysInlineOnly' flag - /// which restricts it to deleting functions with an 'AlwaysInline' - /// attribute. This is useful for the InlineAlways pass that only wants to - /// deal with that subset of the functions. - bool removeDeadFunctions(CallGraph &CG, bool AlwaysInlineOnly = false); - - /// This function performs the main work of the pass. The default of - /// Inlinter::runOnSCC() calls skipSCC() before calling this method, but - /// derived classes which cannot be skipped can override that method and call - /// this function unconditionally. - bool inlineCalls(CallGraphSCC &SCC); - -private: - // Insert @llvm.lifetime intrinsics. - bool InsertLifetime = true; - -protected: - AssumptionCacheTracker *ACT; - ProfileSummaryInfo *PSI; - std::function GetTLI; - ImportedFunctionsInliningStatistics ImportedFunctionsStats; -}; - /// The inliner pass for the new pass manager. /// /// This pass wires together the inlining utilities and the inline cost diff --git a/llvm/include/llvm/Transforms/Utils/Cloning.h b/llvm/include/llvm/Transforms/Utils/Cloning.h index 85343a6f66b2..22c8a3b246f3 100644 --- a/llvm/include/llvm/Transforms/Utils/Cloning.h +++ b/llvm/include/llvm/Transforms/Utils/Cloning.h @@ -203,18 +203,15 @@ void CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc, class InlineFunctionInfo { public: explicit InlineFunctionInfo( - CallGraph *cg = nullptr, function_ref GetAssumptionCache = nullptr, ProfileSummaryInfo *PSI = nullptr, BlockFrequencyInfo *CallerBFI = nullptr, BlockFrequencyInfo *CalleeBFI = nullptr, bool UpdateProfile = true) - : CG(cg), GetAssumptionCache(GetAssumptionCache), PSI(PSI), - CallerBFI(CallerBFI), CalleeBFI(CalleeBFI), - UpdateProfile(UpdateProfile) {} + : GetAssumptionCache(GetAssumptionCache), PSI(PSI), CallerBFI(CallerBFI), + CalleeBFI(CalleeBFI), UpdateProfile(UpdateProfile) {} /// If non-null, InlineFunction will update the callgraph to reflect the /// changes it makes. - CallGraph *CG; function_ref GetAssumptionCache; ProfileSummaryInfo *PSI; BlockFrequencyInfo *CallerBFI, *CalleeBFI; diff --git a/llvm/lib/Analysis/AliasAnalysis.cpp b/llvm/lib/Analysis/AliasAnalysis.cpp index 9e24f6b87bdb..cf1b11636efa 100644 --- a/llvm/lib/Analysis/AliasAnalysis.cpp +++ b/llvm/lib/Analysis/AliasAnalysis.cpp @@ -935,14 +935,3 @@ bool llvm::isNotVisibleOnUnwind(const Value *Object, return false; } - -void llvm::getAAResultsAnalysisUsage(AnalysisUsage &AU) { - // This function needs to be in sync with llvm::createLegacyPMAAResults -- if - // more alias analyses are added to llvm::createLegacyPMAAResults, they need - // to be added here also. - AU.addRequired(); - AU.addUsedIfAvailable(); - AU.addUsedIfAvailable(); - AU.addUsedIfAvailable(); - AU.addUsedIfAvailable(); -} diff --git a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 64dc8604e76a..f8821b9f7f4b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -974,12 +974,6 @@ void AMDGPUPassConfig::addIRPasses() { // Function calls are not supported, so make sure we inline everything. addPass(createAMDGPUAlwaysInlinePass()); addPass(createAlwaysInlinerLegacyPass()); - // We need to add the barrier noop pass, otherwise adding the function - // inlining pass will cause all of the PassConfigs passes to be run - // one function at a time, which means if we have a module with two - // functions, then we will generate code for the first function - // without ever running any passes on the second. - addPass(createBarrierNoopPass()); // Handle uses of OpenCL image2d_t, image3d_t and sampler_t arguments. if (TM.getTargetTriple().getArch() == Triple::r600) diff --git a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp index 09286482edff..cc375f9badcd 100644 --- a/llvm/lib/Transforms/IPO/AlwaysInliner.cpp +++ b/llvm/lib/Transforms/IPO/AlwaysInliner.cpp @@ -28,16 +28,13 @@ using namespace llvm; #define DEBUG_TYPE "inline" -PreservedAnalyses AlwaysInlinerPass::run(Module &M, - ModuleAnalysisManager &MAM) { - // Add inline assumptions during code generation. - FunctionAnalysisManager &FAM = - MAM.getResult(M).getManager(); - auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & { - return FAM.getResult(F); - }; - auto &PSI = MAM.getResult(M); +namespace { +bool AlwaysInlineImpl( + Module &M, bool InsertLifetime, ProfileSummaryInfo &PSI, + function_ref GetAssumptionCache, + function_ref GetAAR, + function_ref GetBFI) { SmallSetVector Calls; bool Changed = false; SmallVector InlinedFunctions; @@ -65,14 +62,12 @@ PreservedAnalyses AlwaysInlinerPass::run(Module &M, DebugLoc DLoc = CB->getDebugLoc(); BasicBlock *Block = CB->getParent(); - InlineFunctionInfo IFI( - /*cg=*/nullptr, GetAssumptionCache, &PSI, - &FAM.getResult(*Caller), - &FAM.getResult(F)); + InlineFunctionInfo IFI(GetAssumptionCache, &PSI, + GetBFI ? &GetBFI(*Caller) : nullptr, + GetBFI ? &GetBFI(F) : nullptr); - InlineResult Res = - InlineFunction(*CB, IFI, /*MergeAttributes=*/true, - &FAM.getResult(F), InsertLifetime); + InlineResult Res = InlineFunction(*CB, IFI, /*MergeAttributes=*/true, + &GetAAR(F), InsertLifetime); if (!Res.isSuccess()) { ORE.emit([&]() { return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, @@ -127,48 +122,52 @@ PreservedAnalyses AlwaysInlinerPass::run(Module &M, } } - return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); + return Changed; } -namespace { +struct AlwaysInlinerLegacyPass : public ModulePass { + bool InsertLifetime; -/// Inliner pass which only handles "always inline" functions. -/// -/// Unlike the \c AlwaysInlinerPass, this uses the more heavyweight \c Inliner -/// base class to provide several facilities such as array alloca merging. -class AlwaysInlinerLegacyPass : public LegacyInlinerBase { - -public: - AlwaysInlinerLegacyPass() : LegacyInlinerBase(ID, /*InsertLifetime*/ true) { - initializeAlwaysInlinerLegacyPassPass(*PassRegistry::getPassRegistry()); - } + AlwaysInlinerLegacyPass() + : AlwaysInlinerLegacyPass(/*InsertLifetime*/ true) {} AlwaysInlinerLegacyPass(bool InsertLifetime) - : LegacyInlinerBase(ID, InsertLifetime) { + : ModulePass(ID), InsertLifetime(InsertLifetime) { initializeAlwaysInlinerLegacyPassPass(*PassRegistry::getPassRegistry()); } /// Main run interface method. We override here to avoid calling skipSCC(). - bool runOnSCC(CallGraphSCC &SCC) override { return inlineCalls(SCC); } + bool runOnModule(Module &M) override { + + auto &PSI = getAnalysis().getPSI(); + auto GetAAR = [&](Function &F) -> AAResults & { + return getAnalysis(F).getAAResults(); + }; + auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & { + return getAnalysis().getAssumptionCache(F); + }; + + return AlwaysInlineImpl(M, InsertLifetime, PSI, GetAssumptionCache, GetAAR, + /*GetBFI*/ nullptr); + } static char ID; // Pass identification, replacement for typeid - InlineCost getInlineCost(CallBase &CB) override; - - using llvm::Pass::doFinalization; - bool doFinalization(CallGraph &CG) override { - return removeDeadFunctions(CG, /*AlwaysInlineOnly=*/true); + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + AU.addRequired(); + AU.addRequired(); } }; -} + +} // namespace char AlwaysInlinerLegacyPass::ID = 0; INITIALIZE_PASS_BEGIN(AlwaysInlinerLegacyPass, "always-inline", "Inliner for always_inline functions", false, false) +INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) -INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass) INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) -INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_END(AlwaysInlinerLegacyPass, "always-inline", "Inliner for always_inline functions", false, false) @@ -176,46 +175,23 @@ Pass *llvm::createAlwaysInlinerLegacyPass(bool InsertLifetime) { return new AlwaysInlinerLegacyPass(InsertLifetime); } -/// Get the inline cost for the always-inliner. -/// -/// The always inliner *only* handles functions which are marked with the -/// attribute to force inlining. As such, it is dramatically simpler and avoids -/// using the powerful (but expensive) inline cost analysis. Instead it uses -/// a very simple and boring direct walk of the instructions looking for -/// impossible-to-inline constructs. -/// -/// Note, it would be possible to go to some lengths to cache the information -/// computed here, but as we only expect to do this for relatively few and -/// small functions which have the explicit attribute to force inlining, it is -/// likely not worth it in practice. -InlineCost AlwaysInlinerLegacyPass::getInlineCost(CallBase &CB) { - Function *Callee = CB.getCalledFunction(); +PreservedAnalyses AlwaysInlinerPass::run(Module &M, + ModuleAnalysisManager &MAM) { + FunctionAnalysisManager &FAM = + MAM.getResult(M).getManager(); + auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & { + return FAM.getResult(F); + }; + auto GetBFI = [&](Function &F) -> BlockFrequencyInfo & { + return FAM.getResult(F); + }; + auto GetAAR = [&](Function &F) -> AAResults & { + return FAM.getResult(F); + }; + auto &PSI = MAM.getResult(M); - // Only inline direct calls to functions with always-inline attributes - // that are viable for inlining. - if (!Callee) - return InlineCost::getNever("indirect call"); + bool Changed = AlwaysInlineImpl(M, InsertLifetime, PSI, GetAssumptionCache, + GetAAR, GetBFI); - // When callee coroutine function is inlined into caller coroutine function - // before coro-split pass, - // coro-early pass can not handle this quiet well. - // So we won't inline the coroutine function if it have not been unsplited - if (Callee->isPresplitCoroutine()) - return InlineCost::getNever("unsplited coroutine call"); - - // FIXME: We shouldn't even get here for declarations. - if (Callee->isDeclaration()) - return InlineCost::getNever("no definition"); - - if (!CB.hasFnAttr(Attribute::AlwaysInline)) - return InlineCost::getNever("no alwaysinline attribute"); - - if (Callee->hasFnAttribute(Attribute::AlwaysInline) && CB.isNoInline()) - return InlineCost::getNever("noinline call site attribute"); - - auto IsViable = isInlineViable(*Callee); - if (!IsViable.isSuccess()) - return InlineCost::getNever(IsViable.getFailureReason()); - - return InlineCost::getAlways("always inliner"); + return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); } diff --git a/llvm/lib/Transforms/IPO/Inliner.cpp b/llvm/lib/Transforms/IPO/Inliner.cpp index 7340edcbd6be..01808b3d14fe 100644 --- a/llvm/lib/Transforms/IPO/Inliner.cpp +++ b/llvm/lib/Transforms/IPO/Inliner.cpp @@ -27,7 +27,6 @@ #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/CGSCCPassManager.h" -#include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/InlineAdvisor.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/LazyCallGraph.h" @@ -71,7 +70,6 @@ using namespace llvm; #define DEBUG_TYPE "inline" STATISTIC(NumInlined, "Number of functions inlined"); -STATISTIC(NumCallsDeleted, "Number of call sites deleted, not inlined"); STATISTIC(NumDeleted, "Number of functions deleted because all callers found"); static cl::opt IntraSCCCostMultiplier( @@ -96,9 +94,6 @@ static cl::opt EnablePostSCCAdvisorPrinting("enable-scc-inline-advisor-printing", cl::init(false), cl::Hidden); -namespace llvm { -extern cl::opt InlinerFunctionImportStats; -} static cl::opt CGSCCInlineReplayFile( "cgscc-inline-replay", cl::init(""), cl::value_desc("filename"), @@ -151,56 +146,6 @@ static cl::opt CGSCCInlineReplayFormat( ":. (default)")), cl::desc("How cgscc inline replay file is formatted"), cl::Hidden); -LegacyInlinerBase::LegacyInlinerBase(char &ID) : CallGraphSCCPass(ID) {} - -LegacyInlinerBase::LegacyInlinerBase(char &ID, bool InsertLifetime) - : CallGraphSCCPass(ID), InsertLifetime(InsertLifetime) {} - -/// For this class, we declare that we require and preserve the call graph. -/// If the derived class implements this method, it should -/// always explicitly call the implementation here. -void LegacyInlinerBase::getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); - AU.addRequired(); - AU.addRequired(); - getAAResultsAnalysisUsage(AU); - CallGraphSCCPass::getAnalysisUsage(AU); -} - -using InlinedArrayAllocasTy = DenseMap>; - -/// If it is possible to inline the specified call site, -/// do so and update the CallGraph for this operation. -/// -/// This function also does some basic book-keeping to update the IR. The -/// InlinedArrayAllocas map keeps track of any allocas that are already -/// available from other functions inlined into the caller. If we are able to -/// inline this call site we attempt to reuse already available allocas or add -/// any new allocas to the set if not possible. -static InlineResult inlineCallIfPossible( - CallBase &CB, InlineFunctionInfo &IFI, - InlinedArrayAllocasTy &InlinedArrayAllocas, int InlineHistory, - bool InsertLifetime, function_ref &AARGetter, - ImportedFunctionsInliningStatistics &ImportedFunctionsStats) { - Function *Callee = CB.getCalledFunction(); - Function *Caller = CB.getCaller(); - - AAResults &AAR = AARGetter(*Callee); - - // Try to inline the function. Get the list of static allocas that were - // inlined. - InlineResult IR = - InlineFunction(CB, IFI, - /*MergeAttributes=*/true, &AAR, InsertLifetime); - if (!IR.isSuccess()) - return IR; - - if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) - ImportedFunctionsStats.recordInline(*Caller, *Callee); - - return IR; // success -} - /// Return true if the specified inline history ID /// indicates an inline history that includes the specified function. static bool inlineHistoryIncludes( @@ -216,362 +161,6 @@ static bool inlineHistoryIncludes( return false; } -bool LegacyInlinerBase::doInitialization(CallGraph &CG) { - if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) - ImportedFunctionsStats.setModuleInfo(CG.getModule()); - return false; // No changes to CallGraph. -} - -bool LegacyInlinerBase::runOnSCC(CallGraphSCC &SCC) { - if (skipSCC(SCC)) - return false; - return inlineCalls(SCC); -} - -static bool -inlineCallsImpl(CallGraphSCC &SCC, CallGraph &CG, - std::function GetAssumptionCache, - ProfileSummaryInfo *PSI, - std::function GetTLI, - bool InsertLifetime, - function_ref GetInlineCost, - function_ref AARGetter, - ImportedFunctionsInliningStatistics &ImportedFunctionsStats) { - SmallPtrSet SCCFunctions; - LLVM_DEBUG(dbgs() << "Inliner visiting SCC:"); - for (CallGraphNode *Node : SCC) { - Function *F = Node->getFunction(); - if (F) - SCCFunctions.insert(F); - LLVM_DEBUG(dbgs() << " " << (F ? F->getName() : "INDIRECTNODE")); - } - - // Scan through and identify all call sites ahead of time so that we only - // inline call sites in the original functions, not call sites that result - // from inlining other functions. - SmallVector, 16> CallSites; - - // When inlining a callee produces new call sites, we want to keep track of - // the fact that they were inlined from the callee. This allows us to avoid - // infinite inlining in some obscure cases. To represent this, we use an - // index into the InlineHistory vector. - SmallVector, 8> InlineHistory; - - for (CallGraphNode *Node : SCC) { - Function *F = Node->getFunction(); - if (!F || F->isDeclaration()) - continue; - - OptimizationRemarkEmitter ORE(F); - for (BasicBlock &BB : *F) - for (Instruction &I : BB) { - auto *CB = dyn_cast(&I); - // If this isn't a call, or it is a call to an intrinsic, it can - // never be inlined. - if (!CB || isa(I)) - continue; - - // If this is a direct call to an external function, we can never inline - // it. If it is an indirect call, inlining may resolve it to be a - // direct call, so we keep it. - if (Function *Callee = CB->getCalledFunction()) - if (Callee->isDeclaration()) { - using namespace ore; - - setInlineRemark(*CB, "unavailable definition"); - ORE.emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "NoDefinition", &I) - << NV("Callee", Callee) << " will not be inlined into " - << NV("Caller", CB->getCaller()) - << " because its definition is unavailable" - << setIsVerbose(); - }); - continue; - } - - CallSites.push_back(std::make_pair(CB, -1)); - } - } - - LLVM_DEBUG(dbgs() << ": " << CallSites.size() << " call sites.\n"); - - // If there are no calls in this function, exit early. - if (CallSites.empty()) - return false; - - // Now that we have all of the call sites, move the ones to functions in the - // current SCC to the end of the list. - unsigned FirstCallInSCC = CallSites.size(); - for (unsigned I = 0; I < FirstCallInSCC; ++I) - if (Function *F = CallSites[I].first->getCalledFunction()) - if (SCCFunctions.count(F)) - std::swap(CallSites[I--], CallSites[--FirstCallInSCC]); - - InlinedArrayAllocasTy InlinedArrayAllocas; - InlineFunctionInfo InlineInfo(&CG, GetAssumptionCache, PSI); - - // Now that we have all of the call sites, loop over them and inline them if - // it looks profitable to do so. - bool Changed = false; - bool LocalChange; - do { - LocalChange = false; - // Iterate over the outer loop because inlining functions can cause indirect - // calls to become direct calls. - // CallSites may be modified inside so ranged for loop can not be used. - for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) { - auto &P = CallSites[CSi]; - CallBase &CB = *P.first; - const int InlineHistoryID = P.second; - - Function *Caller = CB.getCaller(); - Function *Callee = CB.getCalledFunction(); - - // We can only inline direct calls to non-declarations. - if (!Callee || Callee->isDeclaration()) - continue; - - bool IsTriviallyDead = isInstructionTriviallyDead(&CB, &GetTLI(*Caller)); - - if (!IsTriviallyDead) { - // If this call site was obtained by inlining another function, verify - // that the include path for the function did not include the callee - // itself. If so, we'd be recursively inlining the same function, - // which would provide the same callsites, which would cause us to - // infinitely inline. - if (InlineHistoryID != -1 && - inlineHistoryIncludes(Callee, InlineHistoryID, InlineHistory)) { - setInlineRemark(CB, "recursive"); - continue; - } - } - - // FIXME for new PM: because of the old PM we currently generate ORE and - // in turn BFI on demand. With the new PM, the ORE dependency should - // just become a regular analysis dependency. - OptimizationRemarkEmitter ORE(Caller); - - auto OIC = shouldInline(CB, GetInlineCost, ORE); - // If the policy determines that we should inline this function, - // delete the call instead. - if (!OIC) - continue; - - // If this call site is dead and it is to a readonly function, we should - // just delete the call instead of trying to inline it, regardless of - // size. This happens because IPSCCP propagates the result out of the - // call and then we're left with the dead call. - if (IsTriviallyDead) { - LLVM_DEBUG(dbgs() << " -> Deleting dead call: " << CB << "\n"); - // Update the call graph by deleting the edge from Callee to Caller. - setInlineRemark(CB, "trivially dead"); - CG[Caller]->removeCallEdgeFor(CB); - CB.eraseFromParent(); - ++NumCallsDeleted; - } else { - // Get DebugLoc to report. CB will be invalid after Inliner. - DebugLoc DLoc = CB.getDebugLoc(); - BasicBlock *Block = CB.getParent(); - - // Attempt to inline the function. - using namespace ore; - - InlineResult IR = inlineCallIfPossible( - CB, InlineInfo, InlinedArrayAllocas, InlineHistoryID, - InsertLifetime, AARGetter, ImportedFunctionsStats); - if (!IR.isSuccess()) { - setInlineRemark(CB, std::string(IR.getFailureReason()) + "; " + - inlineCostStr(*OIC)); - ORE.emit([&]() { - return OptimizationRemarkMissed(DEBUG_TYPE, "NotInlined", DLoc, - Block) - << NV("Callee", Callee) << " will not be inlined into " - << NV("Caller", Caller) << ": " - << NV("Reason", IR.getFailureReason()); - }); - continue; - } - ++NumInlined; - - emitInlinedIntoBasedOnCost(ORE, DLoc, Block, *Callee, *Caller, *OIC); - - // If inlining this function gave us any new call sites, throw them - // onto our worklist to process. They are useful inline candidates. - if (!InlineInfo.InlinedCalls.empty()) { - // Create a new inline history entry for this, so that we remember - // that these new callsites came about due to inlining Callee. - int NewHistoryID = InlineHistory.size(); - InlineHistory.push_back(std::make_pair(Callee, InlineHistoryID)); - -#ifndef NDEBUG - // Make sure no dupplicates in the inline candidates. This could - // happen when a callsite is simpilfied to reusing the return value - // of another callsite during function cloning, thus the other - // callsite will be reconsidered here. - DenseSet DbgCallSites; - for (auto &II : CallSites) - DbgCallSites.insert(II.first); -#endif - - for (Value *Ptr : InlineInfo.InlinedCalls) { -#ifndef NDEBUG - assert(DbgCallSites.count(dyn_cast(Ptr)) == 0); -#endif - CallSites.push_back( - std::make_pair(dyn_cast(Ptr), NewHistoryID)); - } - } - } - - // If we inlined or deleted the last possible call site to the function, - // delete the function body now. - assert(Callee && "Expected to be non-null due to check at start of loop"); - if (Callee->use_empty() && Callee->hasLocalLinkage() && - // TODO: Can remove if in SCC now. - !SCCFunctions.count(Callee) && - // The function may be apparently dead, but if there are indirect - // callgraph references to the node, we cannot delete it yet, this - // could invalidate the CGSCC iterator. - CG[Callee]->getNumReferences() == 0) { - LLVM_DEBUG(dbgs() << " -> Deleting dead function: " - << Callee->getName() << "\n"); - CallGraphNode *CalleeNode = CG[Callee]; - - // Remove any call graph edges from the callee to its callees. - CalleeNode->removeAllCalledFunctions(); - - // Removing the node for callee from the call graph and delete it. - delete CG.removeFunctionFromModule(CalleeNode); - ++NumDeleted; - } - - // Remove this call site from the list. If possible, use - // swap/pop_back for efficiency, but do not use it if doing so would - // move a call site to a function in this SCC before the - // 'FirstCallInSCC' barrier. - if (SCC.isSingular()) { - CallSites[CSi] = CallSites.back(); - CallSites.pop_back(); - } else { - CallSites.erase(CallSites.begin() + CSi); - } - --CSi; - - Changed = true; - LocalChange = true; - } - } while (LocalChange); - - return Changed; -} - -bool LegacyInlinerBase::inlineCalls(CallGraphSCC &SCC) { - CallGraph &CG = getAnalysis().getCallGraph(); - ACT = &getAnalysis(); - PSI = &getAnalysis().getPSI(); - GetTLI = [&](Function &F) -> const TargetLibraryInfo & { - return getAnalysis().getTLI(F); - }; - auto GetAssumptionCache = [&](Function &F) -> AssumptionCache & { - return ACT->getAssumptionCache(F); - }; - return inlineCallsImpl( - SCC, CG, GetAssumptionCache, PSI, GetTLI, InsertLifetime, - [&](CallBase &CB) { return getInlineCost(CB); }, LegacyAARGetter(*this), - ImportedFunctionsStats); -} - -/// Remove now-dead linkonce functions at the end of -/// processing to avoid breaking the SCC traversal. -bool LegacyInlinerBase::doFinalization(CallGraph &CG) { - if (InlinerFunctionImportStats != InlinerFunctionImportStatsOpts::No) - ImportedFunctionsStats.dump(InlinerFunctionImportStats == - InlinerFunctionImportStatsOpts::Verbose); - return removeDeadFunctions(CG); -} - -/// Remove dead functions that are not included in DNR (Do Not Remove) list. -bool LegacyInlinerBase::removeDeadFunctions(CallGraph &CG, - bool AlwaysInlineOnly) { - SmallVector FunctionsToRemove; - SmallVector DeadFunctionsInComdats; - - auto RemoveCGN = [&](CallGraphNode *CGN) { - // Remove any call graph edges from the function to its callees. - CGN->removeAllCalledFunctions(); - - // Remove any edges from the external node to the function's call graph - // node. These edges might have been made irrelegant due to - // optimization of the program. - CG.getExternalCallingNode()->removeAnyCallEdgeTo(CGN); - - // Removing the node for callee from the call graph and delete it. - FunctionsToRemove.push_back(CGN); - }; - - // Scan for all of the functions, looking for ones that should now be removed - // from the program. Insert the dead ones in the FunctionsToRemove set. - for (const auto &I : CG) { - CallGraphNode *CGN = I.second.get(); - Function *F = CGN->getFunction(); - if (!F || F->isDeclaration()) - continue; - - // Handle the case when this function is called and we only want to care - // about always-inline functions. This is a bit of a hack to share code - // between here and the InlineAlways pass. - if (AlwaysInlineOnly && !F->hasFnAttribute(Attribute::AlwaysInline)) - continue; - - // If the only remaining users of the function are dead constants, remove - // them. - F->removeDeadConstantUsers(); - - if (!F->isDefTriviallyDead()) - continue; - - // It is unsafe to drop a function with discardable linkage from a COMDAT - // without also dropping the other members of the COMDAT. - // The inliner doesn't visit non-function entities which are in COMDAT - // groups so it is unsafe to do so *unless* the linkage is local. - if (!F->hasLocalLinkage()) { - if (F->hasComdat()) { - DeadFunctionsInComdats.push_back(F); - continue; - } - } - - RemoveCGN(CGN); - } - if (!DeadFunctionsInComdats.empty()) { - // Filter out the functions whose comdats remain alive. - filterDeadComdatFunctions(DeadFunctionsInComdats); - // Remove the rest. - for (Function *F : DeadFunctionsInComdats) - RemoveCGN(CG[F]); - } - - if (FunctionsToRemove.empty()) - return false; - - // Now that we know which functions to delete, do so. We didn't want to do - // this inline, because that would invalidate our CallGraph::iterator - // objects. :( - // - // Note that it doesn't matter that we are iterating over a non-stable order - // here to do this, it doesn't matter which order the functions are deleted - // in. - array_pod_sort(FunctionsToRemove.begin(), FunctionsToRemove.end()); - FunctionsToRemove.erase( - std::unique(FunctionsToRemove.begin(), FunctionsToRemove.end()), - FunctionsToRemove.end()); - for (CallGraphNode *CGN : FunctionsToRemove) { - delete CG.removeFunctionFromModule(CGN); - ++NumDeleted; - } - return true; -} - InlineAdvisor & InlinerPass::getAdvisor(const ModuleAnalysisManagerCGSCCProxy::Result &MAM, FunctionAnalysisManager &FAM, Module &M) { @@ -781,7 +370,7 @@ PreservedAnalyses InlinerPass::run(LazyCallGraph::SCC &InitialC, // Setup the data structure used to plumb customization into the // `InlineFunction` routine. InlineFunctionInfo IFI( - /*cg=*/nullptr, GetAssumptionCache, PSI, + GetAssumptionCache, PSI, &FAM.getResult(*(CB->getCaller())), &FAM.getResult(Callee)); diff --git a/llvm/lib/Transforms/IPO/ModuleInliner.cpp b/llvm/lib/Transforms/IPO/ModuleInliner.cpp index 956a8597c894..71ef3b4f3e20 100644 --- a/llvm/lib/Transforms/IPO/ModuleInliner.cpp +++ b/llvm/lib/Transforms/IPO/ModuleInliner.cpp @@ -213,7 +213,7 @@ PreservedAnalyses ModuleInlinerPass::run(Module &M, // Setup the data structure used to plumb customization into the // `InlineFunction` routine. InlineFunctionInfo IFI( - /*cg=*/nullptr, GetAssumptionCache, PSI, + GetAssumptionCache, PSI, &FAM.getResult(*(CB->getCaller())), &FAM.getResult(Callee)); diff --git a/llvm/lib/Transforms/IPO/PartialInlining.cpp b/llvm/lib/Transforms/IPO/PartialInlining.cpp index b040187adcfb..5a6e15a960c0 100644 --- a/llvm/lib/Transforms/IPO/PartialInlining.cpp +++ b/llvm/lib/Transforms/IPO/PartialInlining.cpp @@ -1383,7 +1383,7 @@ bool PartialInlinerImpl::tryPartialInline(FunctionCloner &Cloner) { OR << ore::NV("Callee", Cloner.OrigFunc) << " partially inlined into " << ore::NV("Caller", CB->getCaller()); - InlineFunctionInfo IFI(nullptr, GetAssumptionCache, &PSI); + InlineFunctionInfo IFI(GetAssumptionCache, &PSI); // We can only forward varargs when we outlined a single region, else we // bail on vararg functions. if (!InlineFunction(*CB, IFI, /*MergeAttributes=*/false, nullptr, true, diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp index ccccb37af42f..e745ff3a853d 100644 --- a/llvm/lib/Transforms/IPO/SampleProfile.cpp +++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp @@ -1263,7 +1263,7 @@ bool SampleProfileLoader::tryInlineCandidate( if (!Cost) return false; - InlineFunctionInfo IFI(nullptr, GetAC); + InlineFunctionInfo IFI(GetAC); IFI.UpdateProfile = false; InlineResult IR = InlineFunction(CB, IFI, /*MergeAttributes=*/true); diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index 474691643eb2..8794a6d4be9d 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -1460,81 +1460,6 @@ static void AddAlignmentAssumptions(CallBase &CB, InlineFunctionInfo &IFI) { } } -/// Once we have cloned code over from a callee into the caller, -/// update the specified callgraph to reflect the changes we made. -/// Note that it's possible that not all code was copied over, so only -/// some edges of the callgraph may remain. -static void UpdateCallGraphAfterInlining(CallBase &CB, - Function::iterator FirstNewBlock, - ValueToValueMapTy &VMap, - InlineFunctionInfo &IFI) { - CallGraph &CG = *IFI.CG; - const Function *Caller = CB.getCaller(); - const Function *Callee = CB.getCalledFunction(); - CallGraphNode *CalleeNode = CG[Callee]; - CallGraphNode *CallerNode = CG[Caller]; - - // Since we inlined some uninlined call sites in the callee into the caller, - // add edges from the caller to all of the callees of the callee. - CallGraphNode::iterator I = CalleeNode->begin(), E = CalleeNode->end(); - - // Consider the case where CalleeNode == CallerNode. - CallGraphNode::CalledFunctionsVector CallCache; - if (CalleeNode == CallerNode) { - CallCache.assign(I, E); - I = CallCache.begin(); - E = CallCache.end(); - } - - for (; I != E; ++I) { - // Skip 'refererence' call records. - if (!I->first) - continue; - - const Value *OrigCall = *I->first; - - ValueToValueMapTy::iterator VMI = VMap.find(OrigCall); - // Only copy the edge if the call was inlined! - if (VMI == VMap.end() || VMI->second == nullptr) - continue; - - // If the call was inlined, but then constant folded, there is no edge to - // add. Check for this case. - auto *NewCall = dyn_cast(VMI->second); - if (!NewCall) - continue; - - // We do not treat intrinsic calls like real function calls because we - // expect them to become inline code; do not add an edge for an intrinsic. - if (NewCall->getCalledFunction() && - NewCall->getCalledFunction()->isIntrinsic()) - continue; - - // Remember that this call site got inlined for the client of - // InlineFunction. - IFI.InlinedCalls.push_back(NewCall); - - // It's possible that inlining the callsite will cause it to go from an - // indirect to a direct call by resolving a function pointer. If this - // happens, set the callee of the new call site to a more precise - // destination. This can also happen if the call graph node of the caller - // was just unnecessarily imprecise. - if (!I->second->getFunction()) - if (Function *F = NewCall->getCalledFunction()) { - // Indirect call site resolved to direct call. - CallerNode->addCalledFunction(NewCall, CG[F]); - - continue; - } - - CallerNode->addCalledFunction(NewCall, I->second); - } - - // Update the call graph by deleting the edge from Callee to Caller. We must - // do this after the loop above in case Caller and Callee are the same. - CallerNode->removeCallEdgeFor(*cast(&CB)); -} - static void HandleByValArgumentInit(Type *ByValType, Value *Dst, Value *Src, Module *M, BasicBlock *InsertBlock, InlineFunctionInfo &IFI, @@ -2300,10 +2225,6 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, } } - // Update the callgraph if requested. - if (IFI.CG) - UpdateCallGraphAfterInlining(CB, FirstNewBlock, VMap, IFI); - // For 'nodebug' functions, the associated DISubprogram is always null. // Conservatively avoid propagating the callsite debug location to // instructions inlined from a function whose DISubprogram is not null. @@ -2709,7 +2630,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, // call graph updates weren't requested, as those provide value handle based // tracking of inlined call sites instead. Calls to intrinsics are not // collected because they are not inlineable. - if (InlinedFunctionInfo.ContainsCalls && !IFI.CG) { + if (InlinedFunctionInfo.ContainsCalls) { // Otherwise just collect the raw call sites that were inlined. for (BasicBlock &NewBB : make_range(FirstNewBlock->getIterator(), Caller->end())) diff --git a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll index 6582a6a1c441..2699899845b3 100644 --- a/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll +++ b/llvm/test/CodeGen/AMDGPU/llc-pipeline.ll @@ -36,10 +36,11 @@ ; GCN-O0-NEXT: Early propagate attributes from kernels to functions ; GCN-O0-NEXT: AMDGPU Lower Intrinsics ; GCN-O0-NEXT: AMDGPU Inline All Functions -; GCN-O0-NEXT: CallGraph Construction -; GCN-O0-NEXT: Call Graph SCC Pass Manager -; GCN-O0-NEXT: Inliner for always_inline functions -; GCN-O0-NEXT: A No-Op Barrier Pass +; GCN-O0-NEXT: Inliner for always_inline functions +; GCN-O0-NEXT: FunctionPass Manager +; GCN-O0-NEXT: Dominator Tree Construction +; GCN-O0-NEXT: Basic Alias Analysis (stateless AA impl) +; GCN-O0-NEXT: Function Alias Analysis Results ; GCN-O0-NEXT: Lower OpenCL enqueued blocks ; GCN-O0-NEXT: Lower uses of LDS variables from non-kernel functions ; GCN-O0-NEXT: FunctionPass Manager @@ -186,10 +187,11 @@ ; GCN-O1-NEXT: Early propagate attributes from kernels to functions ; GCN-O1-NEXT: AMDGPU Lower Intrinsics ; GCN-O1-NEXT: AMDGPU Inline All Functions -; GCN-O1-NEXT: CallGraph Construction -; GCN-O1-NEXT: Call Graph SCC Pass Manager -; GCN-O1-NEXT: Inliner for always_inline functions -; GCN-O1-NEXT: A No-Op Barrier Pass +; GCN-O1-NEXT: Inliner for always_inline functions +; GCN-O1-NEXT: FunctionPass Manager +; GCN-O1-NEXT: Dominator Tree Construction +; GCN-O1-NEXT: Basic Alias Analysis (stateless AA impl) +; GCN-O1-NEXT: Function Alias Analysis Results ; GCN-O1-NEXT: Lower OpenCL enqueued blocks ; GCN-O1-NEXT: Lower uses of LDS variables from non-kernel functions ; GCN-O1-NEXT: FunctionPass Manager @@ -461,10 +463,11 @@ ; GCN-O1-OPTS-NEXT: Early propagate attributes from kernels to functions ; GCN-O1-OPTS-NEXT: AMDGPU Lower Intrinsics ; GCN-O1-OPTS-NEXT: AMDGPU Inline All Functions -; GCN-O1-OPTS-NEXT: CallGraph Construction -; GCN-O1-OPTS-NEXT: Call Graph SCC Pass Manager -; GCN-O1-OPTS-NEXT: Inliner for always_inline functions -; GCN-O1-OPTS-NEXT: A No-Op Barrier Pass +; GCN-O1-OPTS-NEXT: Inliner for always_inline functions +; GCN-O1-OPTS-NEXT: FunctionPass Manager +; GCN-O1-OPTS-NEXT: Dominator Tree Construction +; GCN-O1-OPTS-NEXT: Basic Alias Analysis (stateless AA impl) +; GCN-O1-OPTS-NEXT: Function Alias Analysis Results ; GCN-O1-OPTS-NEXT: Lower OpenCL enqueued blocks ; GCN-O1-OPTS-NEXT: Lower uses of LDS variables from non-kernel functions ; GCN-O1-OPTS-NEXT: FunctionPass Manager @@ -768,10 +771,11 @@ ; GCN-O2-NEXT: Early propagate attributes from kernels to functions ; GCN-O2-NEXT: AMDGPU Lower Intrinsics ; GCN-O2-NEXT: AMDGPU Inline All Functions -; GCN-O2-NEXT: CallGraph Construction -; GCN-O2-NEXT: Call Graph SCC Pass Manager -; GCN-O2-NEXT: Inliner for always_inline functions -; GCN-O2-NEXT: A No-Op Barrier Pass +; GCN-O2-NEXT: Inliner for always_inline functions +; GCN-O2-NEXT: FunctionPass Manager +; GCN-O2-NEXT: Dominator Tree Construction +; GCN-O2-NEXT: Basic Alias Analysis (stateless AA impl) +; GCN-O2-NEXT: Function Alias Analysis Results ; GCN-O2-NEXT: Lower OpenCL enqueued blocks ; GCN-O2-NEXT: Lower uses of LDS variables from non-kernel functions ; GCN-O2-NEXT: FunctionPass Manager @@ -1078,10 +1082,11 @@ ; GCN-O3-NEXT: Early propagate attributes from kernels to functions ; GCN-O3-NEXT: AMDGPU Lower Intrinsics ; GCN-O3-NEXT: AMDGPU Inline All Functions -; GCN-O3-NEXT: CallGraph Construction -; GCN-O3-NEXT: Call Graph SCC Pass Manager -; GCN-O3-NEXT: Inliner for always_inline functions -; GCN-O3-NEXT: A No-Op Barrier Pass +; GCN-O3-NEXT: Inliner for always_inline functions +; GCN-O3-NEXT: FunctionPass Manager +; GCN-O3-NEXT: Dominator Tree Construction +; GCN-O3-NEXT: Basic Alias Analysis (stateless AA impl) +; GCN-O3-NEXT: Function Alias Analysis Results ; GCN-O3-NEXT: Lower OpenCL enqueued blocks ; GCN-O3-NEXT: Lower uses of LDS variables from non-kernel functions ; GCN-O3-NEXT: FunctionPass Manager