[IRSim][IROutliner] Adding the extraction basics for the IROutliner.

Extracting the similar regions is the first step in the IROutliner.

Using the IRSimilarityIdentifier, we collect the SimilarityGroups and
sort them by how many instructions will be removed.  Each
IRSimilarityCandidate is used to define an OutlinableRegion.  Each
region is ordered by their occurrence in the Module and the regions that
are not compatible with previously outlined regions are discarded.

Each region is then extracted with the CodeExtractor into its own
function.

We test that correctly extract in:
test/Transforms/IROutliner/extraction.ll
test/Transforms/IROutliner/address-taken.ll
test/Transforms/IROutliner/outlining-same-globals.ll
test/Transforms/IROutliner/outlining-same-constants.ll
test/Transforms/IROutliner/outlining-different-structure.ll

Recommit of bf899e8913 fixing memory
leaks.

Reviewers: paquette, jroelofs, yroux

Differential Revision: https://reviews.llvm.org/D86975
This commit is contained in:
Andrew Litteken 2020-09-15 18:05:38 -05:00
parent c1f30e5817
commit dae34463e3
14 changed files with 985 additions and 0 deletions

View File

@ -184,6 +184,7 @@ void initializeHotColdSplittingLegacyPassPass(PassRegistry&);
void initializeHWAddressSanitizerLegacyPassPass(PassRegistry &);
void initializeIPSCCPLegacyPassPass(PassRegistry&);
void initializeIRCELegacyPassPass(PassRegistry&);
void initializeIROutlinerLegacyPassPass(PassRegistry&);
void initializeIRSimilarityIdentifierWrapperPassPass(PassRegistry&);
void initializeIRTranslatorPass(PassRegistry&);
void initializeIVUsersWrapperPassPass(PassRegistry&);

View File

@ -215,6 +215,11 @@ ModulePass *createMergeFunctionsPass();
/// function(s).
ModulePass *createHotColdSplittingPass();
//===----------------------------------------------------------------------===//
/// createIROutlinerPass - This pass finds similar code regions and factors
/// those regions out into functions.
ModulePass *createIROutlinerPass();
//===----------------------------------------------------------------------===//
/// createPartialInliningPass - This pass inlines parts of functions.
///

View File

@ -0,0 +1,191 @@
//===- IROutliner.h - Extract similar IR regions into functions ------------==//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// \file
// The interface file for the IROutliner which is used by the IROutliner Pass.
//
// The outliner uses the IRSimilarityIdentifier to identify the similar regions
// of code. It evaluates each set of IRSimilarityCandidates with an estimate of
// whether it will provide code size reduction. Each region is extracted using
// the code extractor. These extracted functions are consolidated into a single
// function and called from the extracted call site.
//
// For example:
// \code
// %1 = add i32 %a, %b
// %2 = add i32 %b, %a
// %3 = add i32 %b, %a
// %4 = add i32 %a, %b
// \endcode
// would become function
// \code
// define internal void outlined_ir_function(i32 %0, i32 %1) {
// %1 = add i32 %0, %1
// %2 = add i32 %1, %0
// ret void
// }
// \endcode
// with calls:
// \code
// call void outlined_ir_function(i32 %a, i32 %b)
// call void outlined_ir_function(i32 %b, i32 %a)
// \endcode
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TRANSFORMS_IPO_IROUTLINER_H
#define LLVM_TRANSFORMS_IPO_IROUTLINER_H
#include "llvm/Analysis/IRSimilarityIdentifier.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/ValueMap.h"
#include "llvm/Transforms/Utils/CodeExtractor.h"
#include <set>
struct OutlinableGroup;
namespace llvm {
using namespace IRSimilarity;
class Module;
class TargetTransformInfo;
class OptimizationRemarkEmitter;
/// The OutlinableRegion holds all the information for a specific region, or
/// sequence of instructions. This includes what values need to be hoisted to
/// arguments from the extracted function, inputs and outputs to the region, and
/// mapping from the extracted function arguments to overall function arguments.
struct OutlinableRegion {
/// Describes the region of code.
IRSimilarityCandidate *Candidate;
/// If this region is outlined, the front and back IRInstructionData could
/// potentially become invalidated if the only new instruction is a call.
/// This ensures that we replace in the instruction in the IRInstructionData.
IRInstructionData *NewFront = nullptr;
IRInstructionData *NewBack = nullptr;
/// Used to create an outlined function.
CodeExtractor *CE = nullptr;
/// The call site of the extracted region.
CallInst *Call = nullptr;
/// The function for the extracted region.
Function *ExtractedFunction = nullptr;
/// Flag for whether we have split out the IRSimilarityCanidate. That is,
/// make the region contained the IRSimilarityCandidate its own BasicBlock.
bool CandidateSplit = false;
/// Flag for whether we should not consider this region for extraction.
bool IgnoreRegion = false;
/// The BasicBlock that is before the start of the region BasicBlock,
/// only defined when the region has been split.
BasicBlock *PrevBB = nullptr;
/// The BasicBlock that contains the starting instruction of the region.
BasicBlock *StartBB = nullptr;
/// The BasicBlock that contains the ending instruction of the region.
BasicBlock *EndBB = nullptr;
/// The BasicBlock that is after the start of the region BasicBlock,
/// only defined when the region has been split.
BasicBlock *FollowBB = nullptr;
/// The Outlinable Group that contains this region and structurally similar
/// regions to this region.
OutlinableGroup *Parent = nullptr;
OutlinableRegion(IRSimilarityCandidate &C, OutlinableGroup &Group)
: Candidate(&C), Parent(&Group) {
StartBB = C.getStartBB();
EndBB = C.getEndBB();
}
/// For the contained region, split the parent BasicBlock at the starting and
/// ending instructions of the contained IRSimilarityCandidate.
void splitCandidate();
/// For the contained region, reattach the BasicBlock at the starting and
/// ending instructions of the contained IRSimilarityCandidate, or if the
/// function has been extracted, the start and end of the BasicBlock
/// containing the called function.
void reattachCandidate();
};
/// This class is a pass that identifies similarity in a Module, extracts
/// instances of the similarity, and then consolidating the similar regions
/// in an effort to reduce code size. It uses the IRSimilarityIdentifier pass
/// to identify the similar regions of code, and then extracts the similar
/// sections into a single function. See the above for an example as to
/// how code is extracted and consolidated into a single function.
class IROutliner {
public:
IROutliner(function_ref<TargetTransformInfo &(Function &)> GTTI,
function_ref<IRSimilarityIdentifier &(Module &)> GIRSI)
: getTTI(GTTI), getIRSI(GIRSI) {}
bool run(Module &M);
private:
/// Find repeated similar code sequences in \p M and outline them into new
/// Functions.
///
/// \param [in] M - The module to outline from.
/// \returns The number of Functions created.
unsigned doOutline(Module &M);
/// Remove all the IRSimilarityCandidates from \p CandidateVec that have
/// instructions contained in a previously outlined region and put the
/// remaining regions in \p CurrentGroup.
///
/// \param [in] CandidateVec - List of similarity candidates for regions with
/// the same similarity structure.
/// \param [in,out] CurrentGroup - Contains the potential sections to
/// be outlined.
void
pruneIncompatibleRegions(std::vector<IRSimilarityCandidate> &CandidateVec,
OutlinableGroup &CurrentGroup);
/// Extract \p Region into its own function.
///
/// \param [in] Region - The region to be extracted into its own function.
/// \returns True if it was successfully outlined.
bool extractSection(OutlinableRegion &Region);
/// The set of outlined Instructions, identified by their location in the
/// sequential ordering of instructions in a Module.
DenseSet<unsigned> Outlined;
/// TargetTransformInfo lambda for target specific information.
function_ref<TargetTransformInfo &(Function &)> getTTI;
/// IRSimilarityIdentifier lambda to retrieve IRSimilarityIdentifier.
function_ref<IRSimilarityIdentifier &(Module &)> getIRSI;
/// The memory allocator used to allocate the CodeExtractors.
SpecificBumpPtrAllocator<CodeExtractor> ExtractorAllocator;
/// The memory allocator used to allocate the OutlinableRegions.
SpecificBumpPtrAllocator<OutlinableRegion> RegionAllocator;
/// The memory allocator used to allocate new IRInstructionData.
SpecificBumpPtrAllocator<IRInstructionData> InstDataAllocator;
};
/// Pass to outline similar regions.
class IROutlinerPass : public PassInfoMixin<IROutlinerPass> {
public:
PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
};
} // end namespace llvm
#endif // LLVM_TRANSFORMS_IPO_IROUTLINER_H

View File

@ -105,6 +105,7 @@
#include "llvm/Transforms/IPO/InferFunctionAttrs.h"
#include "llvm/Transforms/IPO/Inliner.h"
#include "llvm/Transforms/IPO/Internalize.h"
#include "llvm/Transforms/IPO/IROutliner.h"
#include "llvm/Transforms/IPO/LoopExtractor.h"
#include "llvm/Transforms/IPO/LowerTypeTests.h"
#include "llvm/Transforms/IPO/MergeFunctions.h"
@ -289,6 +290,7 @@ extern cl::opt<bool> EnableConstraintElimination;
extern cl::opt<bool> EnableGVNHoist;
extern cl::opt<bool> EnableGVNSink;
extern cl::opt<bool> EnableHotColdSplit;
extern cl::opt<bool> EnableIROutliner;
extern cl::opt<bool> EnableOrderFileInstrumentation;
extern cl::opt<bool> EnableCHR;
extern cl::opt<bool> EnableUnrollAndJam;
@ -1313,6 +1315,13 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
if (EnableHotColdSplit && !LTOPreLink)
MPM.addPass(HotColdSplittingPass());
// Search the code for similar regions of code. If enough similar regions can
// be found where extracting the regions into their own function will decrease
// the size of the program, we extract the regions, a deduplicate the
// structurally similar regions.
if (EnableIROutliner)
MPM.addPass(IROutlinerPass());
// Merge functions if requested.
if (PTO.MergeFunctions)
MPM.addPass(MergeFunctionsPass());

View File

@ -72,6 +72,7 @@ MODULE_PASS("instrprof", InstrProfiling())
MODULE_PASS("internalize", InternalizePass())
MODULE_PASS("invalidate<all>", InvalidateAllAnalysesPass())
MODULE_PASS("ipsccp", IPSCCPPass())
MODULE_PASS("iroutliner", IROutlinerPass())
MODULE_PASS("print-ir-similarity", IRSimilarityAnalysisPrinterPass(dbgs()))
MODULE_PASS("loop-extract", LoopExtractorPass())
MODULE_PASS("lowertypetests", LowerTypeTestsPass())

View File

@ -20,6 +20,7 @@ add_llvm_component_library(LLVMipo
GlobalSplit.cpp
HotColdSplitting.cpp
IPO.cpp
IROutliner.cpp
InferFunctionAttrs.cpp
InlineSimple.cpp
Inliner.cpp

View File

@ -36,6 +36,7 @@ void llvm::initializeIPO(PassRegistry &Registry) {
initializeGlobalOptLegacyPassPass(Registry);
initializeGlobalSplitPass(Registry);
initializeHotColdSplittingLegacyPassPass(Registry);
initializeIROutlinerLegacyPassPass(Registry);
initializeAlwaysInlinerLegacyPassPass(Registry);
initializeSimpleInlinerPass(Registry);
initializeInferFunctionAttrsLegacyPassPass(Registry);

View File

@ -0,0 +1,369 @@
//===- IROutliner.cpp -- Outline Similar Regions ----------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
///
/// \file
// Implementation for the IROutliner which is used by the IROutliner Pass.
//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO/IROutliner.h"
#include "llvm/Analysis/IRSimilarityIdentifier.h"
#include "llvm/Analysis/TargetTransformInfo.h"
#include "llvm/IR/Attributes.h"
#include "llvm/IR/PassManager.h"
#include "llvm/InitializePasses.h"
#include "llvm/Pass.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/IPO.h"
#include <map>
#include <set>
#include <vector>
#define DEBUG_TYPE "iroutliner"
using namespace llvm;
using namespace IRSimilarity;
/// The OutlinableGroup holds all the overarching information for outlining
/// a set of regions that are structurally similar to one another, such as the
/// types of the overall function, the output blocks, the sets of stores needed
/// and a list of the different regions. This information is used in the
/// deduplication of extracted regions with the same structure.
struct OutlinableGroup {
/// The sections that could be outlined
std::vector<OutlinableRegion *> Regions;
/// Flag for whether we should not consider this group of OutlinableRegions
/// for extraction.
bool IgnoreGroup = false;
};
/// Move the contents of \p SourceBB to before the last instruction of \p
/// TargetBB.
/// \param SourceBB - the BasicBlock to pull Instructions from.
/// \param TargetBB - the BasicBlock to put Instruction into.
static void moveBBContents(BasicBlock &SourceBB, BasicBlock &TargetBB) {
BasicBlock::iterator BBCurr, BBEnd, BBNext;
for (BBCurr = SourceBB.begin(), BBEnd = SourceBB.end(); BBCurr != BBEnd;
BBCurr = BBNext) {
BBNext = std::next(BBCurr);
BBCurr->moveBefore(TargetBB, TargetBB.end());
}
}
void OutlinableRegion::splitCandidate() {
assert(!CandidateSplit && "Candidate already split!");
Instruction *StartInst = (*Candidate->begin()).Inst;
Instruction *EndInst = (*Candidate->end()).Inst;
assert(StartInst && EndInst && "Expected a start and end instruction?");
StartBB = StartInst->getParent();
PrevBB = StartBB;
// The basic block gets split like so:
// block: block:
// inst1 inst1
// inst2 inst2
// region1 br block_to_outline
// region2 block_to_outline:
// region3 -> region1
// region4 region2
// inst3 region3
// inst4 region4
// br block_after_outline
// block_after_outline:
// inst3
// inst4
std::string OriginalName = PrevBB->getName().str();
StartBB = PrevBB->splitBasicBlock(StartInst, OriginalName + "_to_outline");
// This is the case for the inner block since we do not have to include
// multiple blocks.
EndBB = StartBB;
FollowBB = EndBB->splitBasicBlock(EndInst, OriginalName + "_after_outline");
CandidateSplit = true;
}
void OutlinableRegion::reattachCandidate() {
assert(CandidateSplit && "Candidate is not split!");
// The basic block gets reattached like so:
// block: block:
// inst1 inst1
// inst2 inst2
// br block_to_outline region1
// block_to_outline: -> region2
// region1 region3
// region2 region4
// region3 inst3
// region4 inst4
// br block_after_outline
// block_after_outline:
// inst3
// inst4
assert(StartBB != nullptr && "StartBB for Candidate is not defined!");
assert(FollowBB != nullptr && "StartBB for Candidate is not defined!");
// StartBB should only have one predecessor since we put an unconditional
// branch at the end of PrevBB when we split the BasicBlock.
PrevBB = StartBB->getSinglePredecessor();
assert(PrevBB != nullptr &&
"No Predecessor for the region start basic block!");
assert(PrevBB->getTerminator() && "Terminator removed from PrevBB!");
assert(EndBB->getTerminator() && "Terminator removed from EndBB!");
PrevBB->getTerminator()->eraseFromParent();
EndBB->getTerminator()->eraseFromParent();
moveBBContents(*StartBB, *PrevBB);
BasicBlock *PlacementBB = PrevBB;
if (StartBB != EndBB)
PlacementBB = EndBB;
moveBBContents(*FollowBB, *PlacementBB);
PrevBB->replaceSuccessorsPhiUsesWith(StartBB, PrevBB);
PrevBB->replaceSuccessorsPhiUsesWith(FollowBB, PlacementBB);
StartBB->eraseFromParent();
FollowBB->eraseFromParent();
// Make sure to save changes back to the StartBB.
StartBB = PrevBB;
EndBB = nullptr;
PrevBB = nullptr;
FollowBB = nullptr;
CandidateSplit = false;
}
void IROutliner::pruneIncompatibleRegions(
std::vector<IRSimilarityCandidate> &CandidateVec,
OutlinableGroup &CurrentGroup) {
bool PreviouslyOutlined;
// Sort from beginning to end, so the IRSimilarityCandidates are in order.
stable_sort(CandidateVec, [](const IRSimilarityCandidate &LHS,
const IRSimilarityCandidate &RHS) {
return LHS.getStartIdx() < RHS.getStartIdx();
});
unsigned CurrentEndIdx = 0;
for (IRSimilarityCandidate &IRSC : CandidateVec) {
PreviouslyOutlined = false;
unsigned StartIdx = IRSC.getStartIdx();
unsigned EndIdx = IRSC.getEndIdx();
for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
if (Outlined.find(Idx) != Outlined.end()) {
PreviouslyOutlined = true;
break;
}
if (PreviouslyOutlined)
continue;
// TODO: If in the future we can outline across BasicBlocks, we will need to
// check all BasicBlocks contained in the region.
if (IRSC.getStartBB()->hasAddressTaken())
continue;
// Greedily prune out any regions that will overlap with already chosen
// regions.
if (CurrentEndIdx != 0 && StartIdx <= CurrentEndIdx)
continue;
OutlinableRegion *OS = new (RegionAllocator.Allocate())
OutlinableRegion(IRSC, CurrentGroup);
CurrentGroup.Regions.push_back(OS);
CurrentEndIdx = EndIdx;
}
}
bool IROutliner::extractSection(OutlinableRegion &Region) {
assert(Region.StartBB != nullptr &&
"StartBB for the OutlinableRegion is nullptr!");
assert(Region.FollowBB != nullptr &&
"StartBB for the OutlinableRegion is nullptr!");
Function *OrigF = Region.StartBB->getParent();
CodeExtractorAnalysisCache CEAC(*OrigF);
Region.ExtractedFunction = Region.CE->extractCodeRegion(CEAC);
// If the extraction was successful, find the BasicBlock, and reassign the
// OutlinableRegion blocks
if (!Region.ExtractedFunction) {
LLVM_DEBUG(dbgs() << "CodeExtractor failed to outline " << Region.StartBB
<< "\n");
Region.reattachCandidate();
return false;
}
BasicBlock *RewrittenBB = Region.FollowBB->getSinglePredecessor();
Region.StartBB = RewrittenBB;
Region.EndBB = RewrittenBB;
// The sequences of outlinable regions has now changed. We must fix the
// IRInstructionDataList for consistency. Although they may not be illegal
// instructions, they should not be compared with anything else as they
// should not be outlined in this round. So marking these as illegal is
// allowed.
IRInstructionDataList *IDL = Region.Candidate->front()->IDL;
Region.NewFront = new (InstDataAllocator.Allocate())
IRInstructionData(*RewrittenBB->begin(), false, *IDL);
Region.NewBack = new (InstDataAllocator.Allocate())
IRInstructionData(*std::prev(std::prev(RewrittenBB->end())), false, *IDL);
// Insert the first IRInstructionData of the new region in front of the
// first IRInstructionData of the IRSimilarityCandidate.
IDL->insert(Region.Candidate->begin(), *Region.NewFront);
// Insert the first IRInstructionData of the new region after the
// last IRInstructionData of the IRSimilarityCandidate.
IDL->insert(Region.Candidate->end(), *Region.NewBack);
// Remove the IRInstructionData from the IRSimilarityCandidate.
IDL->erase(Region.Candidate->begin(), std::prev(Region.Candidate->end()));
assert(RewrittenBB != nullptr &&
"Could not find a predecessor after extraction!");
// Iterate over the new set of instructions to find the new call
// instruction.
for (Instruction &I : *RewrittenBB)
if (CallInst *CI = dyn_cast<CallInst>(&I))
if (Region.ExtractedFunction == CI->getCalledFunction()) {
Region.Call = CI;
break;
}
Region.reattachCandidate();
return true;
}
unsigned IROutliner::doOutline(Module &M) {
// Find the possibile similarity sections.
IRSimilarityIdentifier &Identifier = getIRSI(M);
SimilarityGroupList &SimilarityCandidates = *Identifier.getSimilarity();
// Sort them by size of extracted sections
unsigned OutlinedFunctionNum = 0;
// If we only have one SimilarityGroup in SimilarityCandidates, we do not have
// to sort them by the potential number of instructions to be outlined
if (SimilarityCandidates.size() > 1)
llvm::stable_sort(SimilarityCandidates,
[](const std::vector<IRSimilarityCandidate> &LHS,
const std::vector<IRSimilarityCandidate> &RHS) {
return LHS[0].getLength() * LHS.size() >
RHS[0].getLength() * RHS.size();
});
// Iterate over the possible sets of similarity.
for (SimilarityGroup &CandidateVec : SimilarityCandidates) {
OutlinableGroup CurrentGroup;
// Remove entries that were previously outlined
pruneIncompatibleRegions(CandidateVec, CurrentGroup);
// We pruned the number of regions to 0 to 1, meaning that it's not worth
// trying to outlined since there is no compatible similar instance of this
// code.
if (CurrentGroup.Regions.size() < 2)
continue;
// Create a CodeExtractor for each outlinable region.
for (OutlinableRegion *OS : CurrentGroup.Regions) {
// Break the outlinable region out of its parent BasicBlock into its own
// BasicBlocks (see function implementation).
OS->splitCandidate();
std::vector<BasicBlock *> BE = {OS->StartBB};
OS->CE = new (ExtractorAllocator.Allocate())
CodeExtractor(BE, nullptr, false, nullptr, nullptr, nullptr, false,
false, "outlined");
}
// Create functions out of all the sections, and mark them as outlined
std::vector<OutlinableRegion *> OutlinedRegions;
for (OutlinableRegion *OS : CurrentGroup.Regions) {
OutlinedFunctionNum++;
bool FunctionOutlined = extractSection(*OS);
if (FunctionOutlined) {
unsigned StartIdx = OS->Candidate->getStartIdx();
unsigned EndIdx = OS->Candidate->getEndIdx();
for (unsigned Idx = StartIdx; Idx <= EndIdx; Idx++)
Outlined.insert(Idx);
OutlinedRegions.push_back(OS);
}
}
CurrentGroup.Regions = std::move(OutlinedRegions);
}
return OutlinedFunctionNum;
}
bool IROutliner::run(Module &M) { return doOutline(M) > 0; }
// Pass Manager Boilerplate
class IROutlinerLegacyPass : public ModulePass {
public:
static char ID;
IROutlinerLegacyPass() : ModulePass(ID) {
initializeIROutlinerLegacyPassPass(*PassRegistry::getPassRegistry());
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<TargetTransformInfoWrapperPass>();
AU.addRequired<IRSimilarityIdentifierWrapperPass>();
}
bool runOnModule(Module &M) override;
};
bool IROutlinerLegacyPass::runOnModule(Module &M) {
if (skipModule(M))
return false;
auto GTTI = [this](Function &F) -> TargetTransformInfo & {
return this->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
};
auto GIRSI = [this](Module &) -> IRSimilarityIdentifier & {
return this->getAnalysis<IRSimilarityIdentifierWrapperPass>().getIRSI();
};
return IROutliner(GTTI, GIRSI).run(M);
}
PreservedAnalyses IROutlinerPass::run(Module &M, ModuleAnalysisManager &AM) {
auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
std::function<TargetTransformInfo &(Function &)> GTTI =
[&FAM](Function &F) -> TargetTransformInfo & {
return FAM.getResult<TargetIRAnalysis>(F);
};
std::function<IRSimilarityIdentifier &(Module &)> GIRSI =
[&AM](Module &M) -> IRSimilarityIdentifier & {
return AM.getResult<IRSimilarityAnalysis>(M);
};
if (IROutliner(GTTI, GIRSI).run(M))
return PreservedAnalyses::none();
return PreservedAnalyses::all();
}
char IROutlinerLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(IROutlinerLegacyPass, "iroutliner", "IR Outliner", false,
false)
INITIALIZE_PASS_DEPENDENCY(IRSimilarityIdentifierWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_END(IROutlinerLegacyPass, "iroutliner", "IR Outliner", false,
false)
ModulePass *llvm::createIROutlinerPass() { return new IROutlinerLegacyPass(); }

View File

@ -107,6 +107,9 @@ static cl::opt<bool>
cl::opt<bool> EnableHotColdSplit("hot-cold-split", cl::init(false),
cl::ZeroOrMore, cl::desc("Enable hot-cold splitting pass"));
cl::opt<bool> EnableIROutliner("ir-outliner", cl::init(false), cl::Hidden,
cl::desc("Enable ir outliner pass"));
static cl::opt<bool> UseLoopVersioningLICM(
"enable-loop-versioning-licm", cl::init(false), cl::Hidden,
cl::desc("Enable the experimental Loop Versioning LICM pass"));
@ -880,6 +883,9 @@ void PassManagerBuilder::populateModulePassManager(
if (EnableHotColdSplit && !(PrepareForLTO || PrepareForThinLTO))
MPM.add(createHotColdSplittingPass());
if (EnableIROutliner)
MPM.add(createIROutlinerPass());
if (MergeFunctions)
MPM.add(createMergeFunctionsPass());

View File

@ -0,0 +1,127 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
; This test makes sure we are extracting the found similarity sections
; correctly at the call site.
define void @extract1() {
; CHECK-LABEL: @extract1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4
; CHECK-NEXT: call void @extract1.outlined(i32* [[A]], i32* [[B]], i32* [[C]])
; CHECK-NEXT: ret void
;
entry:
%a = alloca i32, align 4
%b = alloca i32, align 4
%c = alloca i32, align 4
store i32 2, i32* %a, align 4
store i32 3, i32* %b, align 4
store i32 4, i32* %c, align 4
%al = load i32, i32* %a
%bl = load i32, i32* %b
%cl = load i32, i32* %c
ret void
}
define void @extract2() {
; CHECK-LABEL: @extract2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4
; CHECK-NEXT: call void @extract2.outlined(i32* [[A]], i32* [[B]], i32* [[C]])
; CHECK-NEXT: ret void
;
entry:
%a = alloca i32, align 4
%b = alloca i32, align 4
%c = alloca i32, align 4
store i32 2, i32* %a, align 4
store i32 3, i32* %b, align 4
store i32 4, i32* %c, align 4
%al = load i32, i32* %a
%bl = load i32, i32* %b
%cl = load i32, i32* %c
ret void
}
define void @extract_outs1() #0 {
; CHECK-LABEL: @extract_outs1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[DOTLOC:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[ADD_LOC:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[RESULT:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[ADD_LOC]] to i8*
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[DOTLOC]] to i8*
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]])
; CHECK-NEXT: call void @extract_outs1.outlined(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]])
; CHECK-NEXT: [[ADD_RELOAD:%.*]] = load i32, i32* [[ADD_LOC]], align 4
; CHECK-NEXT: [[DOTRELOAD:%.*]] = load i32, i32* [[DOTLOC]], align 4
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST1]])
; CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[OUTPUT]], align 4
; CHECK-NEXT: call void @extract_outs1.outlined.1(i32 [[DOTRELOAD]], i32 [[ADD_RELOAD]], i32* [[RESULT]])
; CHECK-NEXT: ret void
;
entry:
%a = alloca i32, align 4
%b = alloca i32, align 4
%output = alloca i32, align 4
%result = alloca i32, align 4
store i32 2, i32* %a, align 4
store i32 3, i32* %b, align 4
%0 = load i32, i32* %a, align 4
%1 = load i32, i32* %b, align 4
%add = add i32 %0, %1
store i32 %add, i32* %output, align 4
%2 = load i32, i32* %output, align 4
%3 = load i32, i32* %output, align 4
%mul = mul i32 %2, %add
store i32 %mul, i32* %result, align 4
ret void
}
define void @extract_outs2() #0 {
; CHECK-LABEL: @extract_outs2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[DOTLOC:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[ADD_LOC:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[OUTPUT:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[RESULT:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[LT_CAST:%.*]] = bitcast i32* [[ADD_LOC]] to i8*
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST]])
; CHECK-NEXT: [[LT_CAST1:%.*]] = bitcast i32* [[DOTLOC]] to i8*
; CHECK-NEXT: call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST1]])
; CHECK-NEXT: call void @extract_outs2.outlined(i32* [[A]], i32* [[B]], i32* [[OUTPUT]], i32* [[ADD_LOC]], i32* [[DOTLOC]])
; CHECK-NEXT: [[ADD_RELOAD:%.*]] = load i32, i32* [[ADD_LOC]], align 4
; CHECK-NEXT: [[DOTRELOAD:%.*]] = load i32, i32* [[DOTLOC]], align 4
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST]])
; CHECK-NEXT: call void @llvm.lifetime.end.p0i8(i64 -1, i8* [[LT_CAST1]])
; CHECK-NEXT: call void @extract_outs2.outlined.2(i32 [[DOTRELOAD]], i32 [[ADD_RELOAD]], i32* [[RESULT]])
; CHECK-NEXT: ret void
;
entry:
%a = alloca i32, align 4
%b = alloca i32, align 4
%output = alloca i32, align 4
%result = alloca i32, align 4
store i32 2, i32* %a, align 4
store i32 3, i32* %b, align 4
%0 = load i32, i32* %a, align 4
%1 = load i32, i32* %b, align 4
%add = add i32 %0, %1
store i32 %add, i32* %output, align 4
%2 = load i32, i32* %output, align 4
%mul = mul i32 %2, %add
store i32 %mul, i32* %result, align 4
ret void
}

View File

@ -0,0 +1,91 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
; This test shows that we do not outline from basic blocks with their address
; taken.
@ba1 = constant i8* blockaddress (@dontoutline, %new_block)
define void @outline_1() {
; CHECK-LABEL: @outline_1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4
; CHECK-NEXT: call void @[[FUNCTION_1:.*]](i32* [[A]], i32* [[B]], i32* [[C]])
; CHECK-NEXT: ret void
;
entry:
%a = alloca i32, align 4
%b = alloca i32, align 4
%c = alloca i32, align 4
store i32 2, i32* %a, align 4
store i32 3, i32* %b, align 4
store i32 4, i32* %c, align 4
%al = load i32, i32* %a
%bl = load i32, i32* %b
%cl = load i32, i32* %c
ret void
}
define void @outline_2() {
; CHECK-LABEL: @outline_2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4
; CHECK-NEXT: call void @[[FUNCTION_2:.*]](i32* [[A]], i32* [[B]], i32* [[C]])
; CHECK-NEXT: ret void
;
entry:
%a = alloca i32, align 4
%b = alloca i32, align 4
%c = alloca i32, align 4
store i32 2, i32* %a, align 4
store i32 3, i32* %b, align 4
store i32 4, i32* %c, align 4
%al = load i32, i32* %a
%bl = load i32, i32* %b
%cl = load i32, i32* %c
ret void
}
define void @dontoutline() {
; CHECK-LABEL: @dontoutline(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4
; CHECK-NEXT: br label [[NEW_BLOCK:%.*]]
; CHECK: new_block:
; CHECK-NEXT: store i32 2, i32* [[A]], align 4
; CHECK-NEXT: store i32 3, i32* [[B]], align 4
; CHECK-NEXT: store i32 4, i32* [[C]], align 4
; CHECK-NEXT: [[AL:%.*]] = load i32, i32* [[A]], align 4
; CHECK-NEXT: [[BL:%.*]] = load i32, i32* [[B]], align 4
; CHECK-NEXT: [[CL:%.*]] = load i32, i32* [[C]], align 4
; CHECK-NEXT: ret void
;
entry:
%a = alloca i32, align 4
%b = alloca i32, align 4
%c = alloca i32, align 4
br label %new_block
new_block:
store i32 2, i32* %a, align 4
store i32 3, i32* %b, align 4
store i32 4, i32* %c, align 4
%al = load i32, i32* %a
%bl = load i32, i32* %b
%cl = load i32, i32* %c
ret void
}
; CHECK: define internal void @[[FUNCTION_2]](i32* [[ARG0:%.*]], i32* [[ARG1:%.*]], i32* [[ARG2:%.*]])
; CHECK: entry_to_outline:
; CHECK-NEXT: store i32 2, i32* [[ARG0]], align 4
; CHECK-NEXT: store i32 3, i32* [[ARG1]], align 4
; CHECK-NEXT: store i32 4, i32* [[ARG2]], align 4
; CHECK-NEXT: [[AL:%.*]] = load i32, i32* [[ARG0]], align 4
; CHECK-NEXT: [[BL:%.*]] = load i32, i32* [[ARG1]], align 4
; CHECK-NEXT: [[CL:%.*]] = load i32, i32* [[ARG2]], align 4

View File

@ -0,0 +1,68 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
; This is a negative case to show that when we have the same set of
; instructions, but in a different order, they are not outlined in the same way.
; In this case, the arguments passed into the function are in a different order.
define void @outline_constants1() {
; CHECK-LABEL: @outline_constants1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4
; CHECK-NEXT: store i32 2, i32* [[A]], align 4
; CHECK-NEXT: store i32 3, i32* [[B]], align 4
; CHECK-NEXT: store i32 4, i32* [[C]], align 4
; CHECK-NEXT: call void @[[FUNCTION_0:.*]](i32* [[A]], i32* [[C]], i32* [[B]])
; CHECK-NEXT: ret void
;
entry:
%a = alloca i32, align 4
%b = alloca i32, align 4
%c = alloca i32, align 4
store i32 2, i32* %a, align 4
store i32 3, i32* %b, align 4
store i32 4, i32* %c, align 4
%al = load i32, i32* %a
%cl = load i32, i32* %c
%bl = load i32, i32* %b
ret void
}
define void @outline_constants2() {
; CHECK-LABEL: @outline_constants2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4
; CHECK-NEXT: store i32 2, i32* [[A]], align 4
; CHECK-NEXT: store i32 3, i32* [[B]], align 4
; CHECK-NEXT: store i32 4, i32* [[C]], align 4
; CHECK-NEXT: call void @[[FUNCTION_1:.*]](i32* [[A]], i32* [[B]], i32* [[C]])
; CHECK-NEXT: ret void
;
entry:
%a = alloca i32, align 4
%b = alloca i32, align 4
%c = alloca i32, align 4
store i32 2, i32* %a, align 4
store i32 3, i32* %b, align 4
store i32 4, i32* %c, align 4
%al = load i32, i32* %a
%bl = load i32, i32* %b
%cl = load i32, i32* %c
ret void
}
; CHECK: define internal void @[[FUNCTION_0]](i32* [[ARG0:%.*]], i32* [[ARG1:%.*]], i32* [[ARG2:%.*]])
; CHECK: entry_to_outline:
; CHECK-NEXT: [[AL:%.*]] = load i32, i32* [[ARG0]], align 4
; CHECK-NEXT: [[BL:%.*]] = load i32, i32* [[ARG1]], align 4
; CHECK-NEXT: [[CL:%.*]] = load i32, i32* [[ARG2]], align 4
; CHECK: define internal void @[[FUNCTION_1]](i32* [[ARG0:%.*]], i32* [[ARG1:%.*]], i32* [[ARG2:%.*]])
; CHECK: entry_to_outline:
; CHECK-NEXT: [[AL:%.*]] = load i32, i32* [[ARG0]], align 4
; CHECK-NEXT: [[BL:%.*]] = load i32, i32* [[ARG1]], align 4
; CHECK-NEXT: [[CL:%.*]] = load i32, i32* [[ARG2]], align 4

View File

@ -0,0 +1,67 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
; This test looks at the constants in the regions, and if it they are the
; same it outlines them as constants rather than elevating them to arguments.
define void @outline_constants1() {
; CHECK-LABEL: @outline_constants1(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4
; CHECK-NEXT: call void @[[FUNCTION_0:.*]](i32* [[A]], i32* [[B]], i32* [[C]])
; CHECK-NEXT: ret void
;
entry:
%a = alloca i32, align 4
%b = alloca i32, align 4
%c = alloca i32, align 4
store i32 2, i32* %a, align 4
store i32 3, i32* %b, align 4
store i32 4, i32* %c, align 4
%al = load i32, i32* %a
%bl = load i32, i32* %b
%cl = load i32, i32* %c
ret void
}
define void @outline_constants2() {
; CHECK-LABEL: @outline_constants2(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[A:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[B:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[C:%.*]] = alloca i32, align 4
; CHECK-NEXT: call void @[[FUNCTION_1:.*]](i32* [[A]], i32* [[B]], i32* [[C]])
; CHECK-NEXT: ret void
;
entry:
%a = alloca i32, align 4
%b = alloca i32, align 4
%c = alloca i32, align 4
store i32 2, i32* %a, align 4
store i32 3, i32* %b, align 4
store i32 4, i32* %c, align 4
%al = load i32, i32* %a
%bl = load i32, i32* %b
%cl = load i32, i32* %c
ret void
}
; CHECK: define internal void @[[FUNCTION_0]](i32* [[ARG0:%.*]], i32* [[ARG1:%.*]], i32* [[ARG2:%.*]])
; CHECK: entry_to_outline:
; CHECK-NEXT: store i32 2, i32* [[ARG0]], align 4
; CHECK-NEXT: store i32 3, i32* [[ARG1]], align 4
; CHECK-NEXT: store i32 4, i32* [[ARG2]], align 4
; CHECK-NEXT: [[AL:%.*]] = load i32, i32* [[ARG0]], align 4
; CHECK-NEXT: [[BL:%.*]] = load i32, i32* [[ARG1]], align 4
; CHECK-NEXT: [[CL:%.*]] = load i32, i32* [[ARG2]], align 4
; CHECK: define internal void @[[FUNCTION_1]](i32* [[ARG0:%.*]], i32* [[ARG1:%.*]], i32* [[ARG2:%.*]])
; CHECK: entry_to_outline:
; CHECK-NEXT: store i32 2, i32* [[ARG0]], align 4
; CHECK-NEXT: store i32 3, i32* [[ARG1]], align 4
; CHECK-NEXT: store i32 4, i32* [[ARG2]], align 4
; CHECK-NEXT: [[AL:%.*]] = load i32, i32* [[ARG0]], align 4
; CHECK-NEXT: [[BL:%.*]] = load i32, i32* [[ARG1]], align 4
; CHECK-NEXT: [[CL:%.*]] = load i32, i32* [[ARG2]], align 4

View File

@ -0,0 +1,48 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -verify -iroutliner < %s | FileCheck %s
@global1 = global i32 1, align 4
@global2 = global i32 2, align 4
; This test looks at the globals in the regions, and if it they are the
; same it outlines the region without elevating the globals to arguments.
define void @outline_globals1() {
; CHECK-LABEL: @outline_globals1(
; CHECK-NEXT: entry:
; CHECK-NEXT: call void @[[FUNCTION_0:.*]]()
; CHECK-NEXT: ret void
;
entry:
%0 = load i32, i32* @global1
%1 = load i32, i32* @global2
%2 = add i32 %0, %1
ret void
}
define void @outline_globals2() {
; CHECK-LABEL: @outline_globals2(
; CHECK-NEXT: entry:
; CHECK-NEXT: call void @[[FUNCTION_1:.*]]()
; CHECK-NEXT: ret void
;
entry:
%0 = load i32, i32* @global1
%1 = load i32, i32* @global2
%2 = add i32 %0, %1
ret void
}
; CHECK: define internal void @[[FUNCTION_0]]()
; CHECK: entry_to_outline:
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* @global1, align 4
; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* @global2, align 4
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP1]], [[TMP2]]
; CHECK: define internal void @[[FUNCTION_1]]()
; CHECK: entry_to_outline:
; CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* @global1, align 4
; CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* @global2, align 4
; CHECK-NEXT: [[ADD:%.*]] = add i32 [[TMP1]], [[TMP2]]