[OpenMP] Introduce the OpenMPOpt transformation pass

The OpenMPOpt pass is a CGSCC pass in which OpenMP specific
optimizations can reside.

The OpenMPOpt pass uses the OpenMPKinds.def file to identify runtime
calls and their uses. This allows targeted transformations and eases
their implementation.

This initial patch deduplicates `__kmpc_global_thread_num` and
`omp_get_thread_num` calls. We can also identify arguments that are
equivalent to such a call result and use it instead. Later we can
determine "gtid" arguments based on the use in kernel functions etc.

Reviewed By: JonChesterfield

Differential Revision: https://reviews.llvm.org/D69930
This commit is contained in:
Johannes Doerfert 2019-11-06 23:20:06 -06:00
parent 72277ecd62
commit 9548b74a83
20 changed files with 594 additions and 1 deletions

View File

@ -177,6 +177,8 @@ __OMP_RTL(__kmpc_end_serialized_parallel, false, Void, IdentPtr, Int32)
__OMP_RTL(omp_get_thread_num, false, Int32, )
__OMP_RTL(__last, false, Void, )
#undef __OMP_RTL
#undef OMP_RTL

View File

@ -71,6 +71,7 @@ void initializeAggressiveInstCombinerLegacyPassPass(PassRegistry&);
void initializeAliasSetPrinterPass(PassRegistry&);
void initializeAlignmentFromAssumptionsPass(PassRegistry&);
void initializeAlwaysInlinerLegacyPassPass(PassRegistry&);
void initializeOpenMPOptLegacyPassPass(PassRegistry &);
void initializeArgPromotionPass(PassRegistry&);
void initializeAssumptionCacheTrackerPass(PassRegistry&);
void initializeAtomicExpandPass(PassRegistry&);

View File

@ -71,6 +71,7 @@ namespace {
(void) llvm::createAggressiveDCEPass();
(void) llvm::createAggressiveInstCombinerPass();
(void) llvm::createBitTrackingDCEPass();
(void) llvm::createOpenMPOptLegacyPass();
(void) llvm::createArgumentPromotionPass();
(void) llvm::createAlignmentFromAssumptionsPass();
(void) llvm::createBasicAAWrapperPass();

View File

@ -152,6 +152,10 @@ ModulePass *createDeadArgHackingPass();
///
Pass *createArgumentPromotionPass(unsigned maxElements = 3);
//===----------------------------------------------------------------------===//
/// createOpenMPOptLegacyPass - OpenMP specific optimizations.
Pass *createOpenMPOptLegacyPass();
//===----------------------------------------------------------------------===//
/// createIPConstantPropagationPass - This pass propagates constants from call
/// sites into the bodies of functions.

View File

@ -0,0 +1,54 @@
//===- IPO/OpenMPOpt.h - Collection of OpenMP optimizations -----*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_TRANSFORMS_IPO_OPENMP_OPT_H
#define LLVM_TRANSFORMS_IPO_OPENMP_OPT_H
#include "llvm/Analysis/CGSCCPassManager.h"
#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/IR/PassManager.h"
namespace llvm {
namespace omp {
/// Helper to remember if the module contains OpenMP (runtime calls), to be used
/// foremost with containsOpenMP.
struct OpenMPInModule {
OpenMPInModule &operator=(bool Found) {
if (Found)
Value = OpenMPInModule::OpenMP::FOUND;
else
Value = OpenMPInModule::OpenMP::NOT_FOUND;
return *this;
}
bool isKnown() { return Value != OpenMP::UNKNOWN; }
operator bool() { return Value != OpenMP::NOT_FOUND; }
private:
enum class OpenMP { FOUND, NOT_FOUND, UNKNOWN } Value = OpenMP::UNKNOWN;
};
/// Helper to determine if \p M contains OpenMP (runtime calls).
bool containsOpenMP(Module &M, OpenMPInModule &OMPInModule);
} // namespace omp
/// OpenMP optimizations pass.
class OpenMPOptPass : public PassInfoMixin<OpenMPOptPass> {
/// Helper to remember if the module contains OpenMP (runtime calls).
omp::OpenMPInModule OMPInModule;
public:
PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM,
LazyCallGraph &CG, CGSCCUpdateResult &UR);
};
} // end namespace llvm
#endif // LLVM_TRANSFORMS_IPO_OPENMP_OPT_H

View File

@ -134,6 +134,7 @@ void LTOCodeGenerator::initializeLTOPasses() {
initializeSimpleInlinerPass(R);
initializePruneEHPass(R);
initializeGlobalDCELegacyPassPass(R);
initializeOpenMPOptLegacyPassPass(R);
initializeArgPromotionPass(R);
initializeJumpThreadingPass(R);
initializeSROALegacyPassPass(R);

View File

@ -87,6 +87,7 @@
#include "llvm/Transforms/IPO/Internalize.h"
#include "llvm/Transforms/IPO/LowerTypeTests.h"
#include "llvm/Transforms/IPO/MergeFunctions.h"
#include "llvm/Transforms/IPO/OpenMPOpt.h"
#include "llvm/Transforms/IPO/PartialInlining.h"
#include "llvm/Transforms/IPO/SCCP.h"
#include "llvm/Transforms/IPO/SampleProfile.h"
@ -837,6 +838,11 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
if (Level == OptimizationLevel::O3)
MainCGPipeline.addPass(ArgumentPromotionPass());
// Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
// there are no OpenMP runtime calls present in the module.
if (Level == OptimizationLevel::O2 || Level == OptimizationLevel::O3)
MainCGPipeline.addPass(OpenMPOptPass());
// Lastly, add the core function simplification pipeline nested inside the
// CGSCC walk.
MainCGPipeline.addPass(createCGSCCToFunctionPassAdaptor(

View File

@ -109,6 +109,7 @@ CGSCC_PASS("argpromotion", ArgumentPromotionPass())
CGSCC_PASS("invalidate<all>", InvalidateAllAnalysesPass())
CGSCC_PASS("function-attrs", PostOrderFunctionAttrsPass())
CGSCC_PASS("inline", InlinerPass())
CGSCC_PASS("openmpopt", OpenMPOptPass())
CGSCC_PASS("no-op-cgscc", NoOpCGSCCPass())
#undef CGSCC_PASS

View File

@ -26,6 +26,7 @@ add_llvm_component_library(LLVMipo
LoopExtractor.cpp
LowerTypeTests.cpp
MergeFunctions.cpp
OpenMPOpt.cpp
PartialInlining.cpp
PassManagerBuilder.cpp
PruneEH.cpp

View File

@ -23,6 +23,7 @@
using namespace llvm;
void llvm::initializeIPO(PassRegistry &Registry) {
initializeOpenMPOptLegacyPassPass(Registry);
initializeArgPromotionPass(Registry);
initializeCalledValuePropagationLegacyPassPass(Registry);
initializeConstantMergeLegacyPassPass(Registry);

View File

@ -19,4 +19,4 @@ type = Library
name = IPO
parent = Transforms
library_name = ipo
required_libraries = AggressiveInstCombine Analysis BitReader BitWriter Core InstCombine IRReader Linker Object ProfileData Scalar Support TransformUtils Vectorize Instrumentation
required_libraries = AggressiveInstCombine Analysis BitReader BitWriter Core FrontendOpenMP InstCombine IRReader Linker Object ProfileData Scalar Support TransformUtils Vectorize Instrumentation

View File

@ -0,0 +1,419 @@
//===-- IPO/OpenMPOpt.cpp - Collection of OpenMP specific optimizations ---===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// OpenMP specific optimizations:
//
// - Deduplication of runtime calls, e.g., omp_get_thread_num.
//
//===----------------------------------------------------------------------===//
#include "llvm/Transforms/IPO/OpenMPOpt.h"
#include "llvm/ADT/EnumeratedArray.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/CallGraphSCCPass.h"
#include "llvm/Frontend/OpenMP/OMPConstants.h"
#include "llvm/IR/CallSite.h"
#include "llvm/InitializePasses.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/Utils/CallGraphUpdater.h"
using namespace llvm;
using namespace omp;
using namespace types;
#define DEBUG_TYPE "openmp-opt"
static cl::opt<bool> DisableOpenMPOptimizations(
"openmp-opt-disable", cl::ZeroOrMore,
cl::desc("Disable OpenMP specific optimizations."), cl::Hidden,
cl::init(false));
STATISTIC(NumOpenMPRuntimeCallsDeduplicated,
"Number of OpenMP runtime calls deduplicated");
STATISTIC(NumOpenMPRuntimeFunctionsIdentified,
"Number of OpenMP runtime functions identified");
STATISTIC(NumOpenMPRuntimeFunctionUsesIdentified,
"Number of OpenMP runtime function uses identified");
static constexpr auto TAG = "[" DEBUG_TYPE "]";
namespace {
struct OpenMPOpt {
OpenMPOpt(SmallPtrSetImpl<Function *> &SCC,
SmallPtrSetImpl<Function *> &ModuleSlice,
CallGraphUpdater &CGUpdater)
: M(*(*SCC.begin())->getParent()), SCC(SCC), ModuleSlice(ModuleSlice),
CGUpdater(CGUpdater) {
initializeTypes(M);
initializeRuntimeFunctions();
}
/// Generic information that describes a runtime function
struct RuntimeFunctionInfo {
/// The kind, as described by the RuntimeFunction enum.
RuntimeFunction Kind;
/// The name of the function.
StringRef Name;
/// Flag to indicate a variadic function.
bool IsVarArg;
/// The return type of the function.
Type *ReturnType;
/// The argument types of the function.
SmallVector<Type *, 8> ArgumentTypes;
/// The declaration if available.
Function *Declaration;
/// Uses of this runtime function per function containing the use.
DenseMap<Function *, SmallPtrSet<Use *, 16>> UsesMap;
/// Return the number of arguments (or the minimal number for variadic
/// functions).
size_t getNumArgs() const { return ArgumentTypes.size(); }
/// Run the callback \p CB on each use and forget the use if the result is
/// true. The callback will be fed the function in which the use was
/// encountered as second argument.
void foreachUse(function_ref<bool(Use &, Function &)> CB) {
SmallVector<Use *, 8> ToBeDeleted;
for (auto &It : UsesMap) {
ToBeDeleted.clear();
for (Use *U : It.second)
if (CB(*U, *It.first))
ToBeDeleted.push_back(U);
for (Use *U : ToBeDeleted)
It.second.erase(U);
}
}
};
/// Run all OpenMP optimizations on the underlying SCC/ModuleSlice.
bool run() {
bool Changed = false;
LLVM_DEBUG(dbgs() << TAG << "Run on SCC with " << SCC.size()
<< " functions in a slice with " << ModuleSlice.size()
<< " functions\n");
Changed |= deduplicateRuntimeCalls();
return Changed;
}
private:
/// Try to eliminiate runtime calls by reusing existing ones.
bool deduplicateRuntimeCalls() {
bool Changed = false;
SmallSetVector<Value *, 16> GTIdArgs;
collectGlobalThreadIdArguments(GTIdArgs);
LLVM_DEBUG(dbgs() << TAG << "Found " << GTIdArgs.size()
<< " global thread ID arguments\n");
for (Function *F : SCC) {
Value *GTIdArg = nullptr;
for (Argument &Arg : F->args())
if (GTIdArgs.count(&Arg)) {
GTIdArg = &Arg;
break;
}
Changed |= deduplicateRuntimeCalls(
*F, RFIs[OMPRTL___kmpc_global_thread_num], GTIdArg);
Changed |= deduplicateRuntimeCalls(*F, RFIs[OMPRTL_omp_get_thread_num]);
}
return Changed;
}
/// Try to eliminiate calls of \p RFI in \p F by reusing an existing one or
/// \p ReplVal if given.
bool deduplicateRuntimeCalls(Function &F, RuntimeFunctionInfo &RFI,
Value *ReplVal = nullptr) {
auto &Uses = RFI.UsesMap[&F];
if (Uses.size() + (ReplVal != nullptr) < 2)
return false;
LLVM_DEBUG(dbgs() << TAG << "Deduplicate " << Uses.size() << " uses of "
<< RFI.Name
<< (ReplVal ? " with an existing value\n" : "\n")
<< "\n");
assert(!ReplVal || (isa<Argument>(ReplVal) &&
cast<Argument>(ReplVal)->getParent() == &F) &&
"Unexpected replacement value!");
if (!ReplVal) {
for (Use *U : Uses)
if (CallInst *CI = getCallIfRegularCall(*U, &RFI)) {
CI->moveBefore(&*F.getEntryBlock().getFirstInsertionPt());
ReplVal = CI;
break;
}
if (!ReplVal)
return false;
}
bool Changed = false;
auto ReplaceAndDeleteCB = [&](Use &U, Function &Caller) {
CallInst *CI = getCallIfRegularCall(U, &RFI);
if (!CI || CI == ReplVal || &F != &Caller)
return false;
assert(CI->getCaller() == &F && "Unexpected call!");
CGUpdater.removeCallSite(*CI);
CI->replaceAllUsesWith(ReplVal);
CI->eraseFromParent();
++NumOpenMPRuntimeCallsDeduplicated;
Changed = true;
return true;
};
RFI.foreachUse(ReplaceAndDeleteCB);
return Changed;
}
/// Collect arguments that represent the global thread id in \p GTIdArgs.
void collectGlobalThreadIdArguments(SmallSetVector<Value *, 16> &GTIdArgs) {
// TODO: Below we basically perform a fixpoint iteration with a pessimistic
// initialization. We could define an AbstractAttribute instead and
// run the Attributor here once it can be run as an SCC pass.
// Helper to check the argument \p ArgNo at all call sites of \p F for
// a GTId.
auto CallArgOpIsGTId = [&](Function &F, unsigned ArgNo, CallInst &RefCI) {
if (!F.hasLocalLinkage())
return false;
for (Use &U : F.uses()) {
if (CallInst *CI = getCallIfRegularCall(U)) {
Value *ArgOp = CI->getArgOperand(ArgNo);
if (CI == &RefCI || GTIdArgs.count(ArgOp) ||
getCallIfRegularCall(*ArgOp,
&RFIs[OMPRTL___kmpc_global_thread_num]))
continue;
}
return false;
}
return true;
};
// Helper to identify uses of a GTId as GTId arguments.
auto AddUserArgs = [&](Value &GTId) {
for (Use &U : GTId.uses())
if (CallInst *CI = dyn_cast<CallInst>(U.getUser()))
if (CI->isArgOperand(&U))
if (Function *Callee = CI->getCalledFunction())
if (CallArgOpIsGTId(*Callee, U.getOperandNo(), *CI))
GTIdArgs.insert(Callee->getArg(U.getOperandNo()));
};
// The argument users of __kmpc_global_thread_num calls are GTIds.
RuntimeFunctionInfo &GlobThreadNumRFI =
RFIs[OMPRTL___kmpc_global_thread_num];
for (auto &It : GlobThreadNumRFI.UsesMap)
for (Use *U : It.second)
if (CallInst *CI = getCallIfRegularCall(*U, &GlobThreadNumRFI))
AddUserArgs(*CI);
// Transitively search for more arguments by looking at the users of the
// ones we know already. During the search the GTIdArgs vector is extended
// so we cannot cache the size nor can we use a range based for.
for (unsigned u = 0; u < GTIdArgs.size(); ++u)
AddUserArgs(*GTIdArgs[u]);
}
/// Return the call if \p U is a callee use in a regular call. If \p RFI is
/// given it has to be the callee or a nullptr is returned.
CallInst *getCallIfRegularCall(Use &U, RuntimeFunctionInfo *RFI = nullptr) {
CallInst *CI = dyn_cast<CallInst>(U.getUser());
if (CI && CI->isCallee(&U) && !CI->hasOperandBundles() &&
(!RFI || CI->getCalledFunction() == RFI->Declaration))
return CI;
return nullptr;
}
/// Return the call if \p V is a regular call. If \p RFI is given it has to be
/// the callee or a nullptr is returned.
CallInst *getCallIfRegularCall(Value &V, RuntimeFunctionInfo *RFI = nullptr) {
CallInst *CI = dyn_cast<CallInst>(&V);
if (CI && !CI->hasOperandBundles() &&
(!RFI || CI->getCalledFunction() == RFI->Declaration))
return CI;
return nullptr;
}
/// Helper to initialize all runtime function information for those defined in
/// OpenMPKinds.def.
void initializeRuntimeFunctions() {
// Helper to collect all uses of the decleration in the UsesMap.
auto CollectUses = [&](RuntimeFunctionInfo &RFI) {
unsigned NumUses = 0;
if (!RFI.Declaration)
return NumUses;
NumOpenMPRuntimeFunctionsIdentified += 1;
NumOpenMPRuntimeFunctionUsesIdentified += RFI.Declaration->getNumUses();
// TODO: We directly convert uses into proper calls and unknown uses.
for (Use &U : RFI.Declaration->uses()) {
if (Instruction *UserI = dyn_cast<Instruction>(U.getUser())) {
if (ModuleSlice.count(UserI->getFunction())) {
RFI.UsesMap[UserI->getFunction()].insert(&U);
++NumUses;
}
} else {
RFI.UsesMap[nullptr].insert(&U);
++NumUses;
}
}
return NumUses;
};
#define OMP_RTL(_Enum, _Name, _IsVarArg, _ReturnType, ...) \
{ \
auto &RFI = RFIs[_Enum]; \
RFI.Kind = _Enum; \
RFI.Name = _Name; \
RFI.IsVarArg = _IsVarArg; \
RFI.ReturnType = _ReturnType; \
RFI.ArgumentTypes = SmallVector<Type *, 8>({__VA_ARGS__}); \
RFI.Declaration = M.getFunction(_Name); \
unsigned NumUses = CollectUses(RFI); \
(void)NumUses; \
LLVM_DEBUG({ \
dbgs() << TAG << RFI.Name << (RFI.Declaration ? "" : " not") \
<< " found\n"; \
if (RFI.Declaration) \
dbgs() << TAG << "-> got " << NumUses << " uses in " \
<< RFI.UsesMap.size() << " different functions.\n"; \
}); \
}
#include "llvm/Frontend/OpenMP/OMPKinds.def"
// TODO: We should validate the declaration agains the types we expect.
// TODO: We should attach the attributes defined in OMPKinds.def.
}
/// The underyling module.
Module &M;
/// The SCC we are operating on.
SmallPtrSetImpl<Function *> &SCC;
/// The slice of the module we are allowed to look at.
SmallPtrSetImpl<Function *> &ModuleSlice;
/// Callback to update the call graph, the first argument is a removed call,
/// the second an optional replacement call.
CallGraphUpdater &CGUpdater;
/// Map from runtime function kind to the runtime function description.
EnumeratedArray<RuntimeFunctionInfo, RuntimeFunction,
RuntimeFunction::OMPRTL___last>
RFIs;
};
} // namespace
PreservedAnalyses OpenMPOptPass::run(LazyCallGraph::SCC &C,
CGSCCAnalysisManager &AM,
LazyCallGraph &CG, CGSCCUpdateResult &UR) {
if (!containsOpenMP(*C.begin()->getFunction().getParent(), OMPInModule))
return PreservedAnalyses::all();
if (DisableOpenMPOptimizations)
return PreservedAnalyses::all();
SmallPtrSet<Function *, 16> SCC;
for (LazyCallGraph::Node &N : C)
SCC.insert(&N.getFunction());
if (SCC.empty())
return PreservedAnalyses::all();
CallGraphUpdater CGUpdater;
CGUpdater.initialize(CG, C, AM, UR);
// TODO: Compute the module slice we are allowed to look at.
OpenMPOpt OMPOpt(SCC, SCC, CGUpdater);
bool Changed = OMPOpt.run();
(void)Changed;
return PreservedAnalyses::all();
}
namespace {
struct OpenMPOptLegacyPass : public CallGraphSCCPass {
CallGraphUpdater CGUpdater;
OpenMPInModule OMPInModule;
static char ID;
OpenMPOptLegacyPass() : CallGraphSCCPass(ID) {
initializeOpenMPOptLegacyPassPass(*PassRegistry::getPassRegistry());
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
CallGraphSCCPass::getAnalysisUsage(AU);
}
bool doInitialization(CallGraph &CG) override {
// Disable the pass if there is no OpenMP (runtime call) in the module.
containsOpenMP(CG.getModule(), OMPInModule);
return false;
}
bool runOnSCC(CallGraphSCC &CGSCC) override {
if (!containsOpenMP(CGSCC.getCallGraph().getModule(), OMPInModule))
return false;
if (DisableOpenMPOptimizations || skipSCC(CGSCC))
return false;
SmallPtrSet<Function *, 16> SCC;
for (CallGraphNode *CGN : CGSCC)
if (Function *Fn = CGN->getFunction())
if (!Fn->isDeclaration())
SCC.insert(Fn);
if (SCC.empty())
return false;
CallGraph &CG = getAnalysis<CallGraphWrapperPass>().getCallGraph();
CGUpdater.initialize(CG, CGSCC);
// TODO: Compute the module slice we are allowed to look at.
OpenMPOpt OMPOpt(SCC, SCC, CGUpdater);
return OMPOpt.run();
}
bool doFinalization(CallGraph &CG) override { return CGUpdater.finalize(); }
};
} // end anonymous namespace
bool llvm::omp::containsOpenMP(Module &M, OpenMPInModule &OMPInModule) {
if (OMPInModule.isKnown())
return OMPInModule;
#define OMP_RTL(_Enum, _Name, ...) \
if (M.getFunction(_Name)) \
return OMPInModule = true;
#include "llvm/Frontend/OpenMP/OMPKinds.def"
return OMPInModule = false;
}
char OpenMPOptLegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(OpenMPOptLegacyPass, "openmpopt",
"OpenMP specific optimizations", false, false)
INITIALIZE_PASS_DEPENDENCY(CallGraphWrapperPass)
INITIALIZE_PASS_END(OpenMPOptLegacyPass, "openmpopt",
"OpenMP specific optimizations", false, false)
Pass *llvm::createOpenMPOptLegacyPass() { return new OpenMPOptLegacyPass(); }

View File

@ -599,6 +599,11 @@ void PassManagerBuilder::populateModulePassManager(
RunInliner = true;
}
// Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
// there are no OpenMP runtime calls present in the module.
if (OptLevel > 1)
MPM.add(createOpenMPOptLegacyPass());
MPM.add(createPostOrderFunctionAttrsLegacyPass());
if (OptLevel > 2)
MPM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args
@ -930,6 +935,11 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) {
// CSFDO instrumentation and use pass.
addPGOInstrPasses(PM, /* IsCS */ true);
// Try to perform OpenMP specific optimizations. This is a (quick!) no-op if
// there are no OpenMP runtime calls present in the module.
if (OptLevel > 1)
PM.add(createOpenMPOptLegacyPass());
// Optimize globals again if we ran the inliner.
if (RunInliner)
PM.add(createGlobalOptimizerPass());

View File

@ -142,6 +142,7 @@
; CHECK-O-NEXT: Running pass: InlinerPass
; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass
; CHECK-O3-NEXT: Running pass: ArgumentPromotionPass
; CHECK-O-NEXT: Running pass: OpenMPOptPass on (foo)
; CHECK-O-NEXT: Running pass: CGSCCToFunctionPassAdaptor<{{.*}}PassManager{{.*}}>
; CHECK-O-NEXT: Starting llvm::Function pass manager run.
; CHECK-O-NEXT: Running pass: SROA

View File

@ -107,6 +107,7 @@
; CHECK-O-NEXT: Running pass: InlinerPass
; CHECK-O-NEXT: Running pass: PostOrderFunctionAttrsPass
; CHECK-O3-NEXT: Running pass: ArgumentPromotionPass
; CHECK-O-NEXT: Running pass: OpenMPOptPass on (foo)
; CHECK-O-NEXT: Running pass: CGSCCToFunctionPassAdaptor<{{.*}}PassManager{{.*}}>
; CHECK-O-NEXT: Starting llvm::Function pass manager run.
; CHECK-O-NEXT: Running pass: SROA

View File

@ -59,6 +59,7 @@
; CHECK-NEXT: Call Graph SCC Pass Manager
; CHECK-NEXT: Remove unused exception handling info
; CHECK-NEXT: Function Integration/Inlining
; CHECK-NEXT: OpenMP specific optimizations
; CHECK-NEXT: Deduce function attributes
; CHECK-NEXT: FunctionPass Manager
; CHECK-NEXT: Dominator Tree Construction

View File

@ -62,6 +62,7 @@
; CHECK-NEXT: Call Graph SCC Pass Manager
; CHECK-NEXT: Remove unused exception handling info
; CHECK-NEXT: Function Integration/Inlining
; CHECK-NEXT: OpenMP specific optimizations
; CHECK-NEXT: Deduce function attributes
; CHECK-NEXT: Promote 'by reference' arguments to scalars
; CHECK-NEXT: FunctionPass Manager

View File

@ -59,6 +59,7 @@
; CHECK-NEXT: Call Graph SCC Pass Manager
; CHECK-NEXT: Remove unused exception handling info
; CHECK-NEXT: Function Integration/Inlining
; CHECK-NEXT: OpenMP specific optimizations
; CHECK-NEXT: Deduce function attributes
; CHECK-NEXT: FunctionPass Manager
; CHECK-NEXT: Dominator Tree Construction

View File

@ -46,6 +46,7 @@
; CHECK-O2-NEXT: Call Graph SCC Pass Manager
; CHECK-O2-NEXT: Remove unused exception handling info
; CHECK-O2-NEXT: Function Integration/Inlining
; CHECK-O2-NEXT: OpenMP specific optimizations
; CHECK-O2-NEXT: Deduce function attributes
; Next up is the main function pass pipeline. It shouldn't be split up and
; should contain the main loop pass pipeline as well.

View File

@ -0,0 +1,86 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature
; RUN: opt -openmpopt -S < %s | FileCheck %s
; RUN: opt -passes=openmpopt -S < %s | FileCheck %s
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
%struct.ident_t = type { i32, i32, i32, i32, i8* }
@0 = private unnamed_addr global %struct.ident_t { i32 0, i32 34, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str, i32 0, i32 0) }, align 8
@.str = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
declare i32 @__kmpc_global_thread_num(%struct.ident_t*)
declare void @useI32(i32)
define void @external(i1 %c) {
; CHECK-LABEL: define {{[^@]+}}@external
; CHECK-SAME: (i1 [[C:%.*]])
; CHECK-NEXT: entry:
; CHECK-NEXT: [[C2:%.*]] = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0)
; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[E:%.*]]
; CHECK: t:
; CHECK-NEXT: call void @internal(i32 [[C2]], i32 [[C2]])
; CHECK-NEXT: call void @useI32(i32 [[C2]])
; CHECK-NEXT: br label [[M:%.*]]
; CHECK: e:
; CHECK-NEXT: call void @internal(i32 [[C2]], i32 [[C2]])
; CHECK-NEXT: call void @useI32(i32 [[C2]])
; CHECK-NEXT: br label [[M]]
; CHECK: m:
; CHECK-NEXT: call void @internal(i32 0, i32 [[C2]])
; CHECK-NEXT: call void @useI32(i32 [[C2]])
; CHECK-NEXT: ret void
;
entry:
br i1 %c, label %t, label %e
t:
%c0 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0)
call void @internal(i32 %c0, i32 %c0)
call void @useI32(i32 %c0)
br label %m
e:
%c1 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0)
call void @internal(i32 %c1, i32 %c1)
call void @useI32(i32 %c1)
br label %m
m:
%c2 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0)
call void @internal(i32 0, i32 %c2)
call void @useI32(i32 %c2)
ret void
}
define internal void @internal(i32 %not_gtid, i32 %gtid) {
; CHECK-LABEL: define {{[^@]+}}@internal
; CHECK-SAME: (i32 [[NOT_GTID:%.*]], i32 [[GTID:%.*]])
; CHECK-NEXT: entry:
; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[GTID]], [[GTID]]
; CHECK-NEXT: br i1 [[C]], label [[T:%.*]], label [[E:%.*]]
; CHECK: t:
; CHECK-NEXT: call void @useI32(i32 [[GTID]])
; CHECK-NEXT: call void @external(i1 [[C]])
; CHECK-NEXT: br label [[M:%.*]]
; CHECK: e:
; CHECK-NEXT: call void @useI32(i32 [[GTID]])
; CHECK-NEXT: br label [[M]]
; CHECK: m:
; CHECK-NEXT: call void @useI32(i32 [[GTID]])
; CHECK-NEXT: ret void
;
entry:
%cc = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0)
%c = icmp eq i32 %cc, %gtid
br i1 %c, label %t, label %e
t:
%c0 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0)
call void @useI32(i32 %c0)
call void @external(i1 %c)
br label %m
e:
%c1 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0)
call void @useI32(i32 %c1)
br label %m
m:
%c2 = tail call i32 @__kmpc_global_thread_num(%struct.ident_t* nonnull @0)
call void @useI32(i32 %c2)
ret void
}