[Refactor] Generalize parallel code generation

+ Generalized function names and comments
    + Removed OpenMP (omp) from the names and comments
    + Use common names (non OpenMP specific) for runtime library call creation
      methodes
  + Commented the parallel code generator and all its member functions
  + Refactored some values and methodes

Differential Revision: http://reviews.llvm.org/D4990

llvm-svn: 219003
This commit is contained in:
Johannes Doerfert 2014-10-03 19:10:13 +00:00
parent fae1dc8a12
commit 12b355a2ce
11 changed files with 270 additions and 231 deletions

View File

@ -13,10 +13,11 @@
//===----------------------------------------------------------------------===//
#ifndef POLLY_LOOP_GENERATORS_H
#define POLLY_LOOP_GENERATORS_H
#include "polly/CodeGen/IRBuilder.h"
#include "llvm/ADT/SetVector.h"
#include <map>
#include "polly/CodeGen/IRBuilder.h"
#include "llvm/IR/ValueMap.h"
#include "llvm/ADT/SetVector.h"
namespace llvm {
class Value;
@ -54,73 +55,160 @@ Value *createLoop(Value *LowerBound, Value *UpperBound, Value *Stride,
ScopAnnotator *Annotator = NULL, bool Parallel = false,
bool UseGuard = true);
class OMPGenerator {
/// @brief The ParallelLoopGenerator allows to create parallelized loops
///
/// To parallelize a loop, we perform the following steps:
/// o Generate a subfunction which will hold the loop body.
/// o Create a struct to hold all outer values needed in the loop body.
/// o Create calls to a runtime library to achieve the actual parallelism.
/// These calls will spawn and join threads, define how the work (here the
/// iterations) are distributed between them and make sure each has access
/// to the struct holding all needed values.
///
/// At the moment we support only one parallel runtime, OpenMP.
///
/// If we parallelize the outer loop of the following loop nest,
///
/// S0;
/// for (int i = 0; i < N; i++)
/// for (int j = 0; j < M; j++)
/// S1(i, j);
/// S2;
///
/// we will generate the following code (with different runtime function names):
///
/// S0;
/// auto *values = storeValuesIntoStruct();
/// // Execute subfunction with multiple threads
/// spawn_threads(subfunction, values);
/// join_threads();
/// S2;
///
/// // This function is executed in parallel by different threads
/// void subfunction(values) {
/// while (auto *WorkItem = getWorkItem()) {
/// int LB = WorkItem.begin();
/// int UB = WorkItem.end();
/// for (int i = LB; i < UB; i++)
/// for (int j = 0; j < M; j++)
/// S1(i, j);
/// }
/// cleanup_thread();
/// }
class ParallelLoopGenerator {
public:
typedef std::map<Value *, Value *> ValueToValueMapTy;
using ValueToValueMapTy = llvm::ValueMap<Value *, Value *>;
OMPGenerator(PollyIRBuilder &Builder, Pass *P) : Builder(Builder), P(P) {}
/// @brief Create a parallel loop generator for the current function.
ParallelLoopGenerator(PollyIRBuilder &Builder, Pass *P, LoopInfo &LI,
DominatorTree &DT, const DataLayout &DL)
: Builder(Builder), P(P), LI(LI), DT(DT), DL(DL),
LongType(
Type::getIntNTy(Builder.getContext(), DL.getPointerSizeInBits())),
M(Builder.GetInsertBlock()->getParent()->getParent()) {}
/// @brief Create an OpenMP parallel loop.
/// @brief Create a parallel loop
///
///
/// @param LowerBound The starting value of the induction variable.
/// @param UpperBound The upper bound of the induction variable.
/// @param Stride The value by which the induction variable is
/// incremented.
/// @param LB The lower bound for the loop we parallelize.
/// @param UB The upper bound for the loop we parallelize.
/// @param Stride The stride of the loop we parallelize.
/// @param Values A set of LLVM-IR Values that should be available in
/// the new loop body.
/// @param VMap A map to allow outside access to the new versions of
/// the values in @p Values.
/// @param LoopBody A pointer to an iterator that is set to point to the
/// body of the created loop. It should be used to insert
/// instructions that form the actual loop body.
///
/// @param UsedValues A set of LLVM-IR Values that should be available to
/// the new loop body.
/// @param VMap This map is filled by createParallelLoop(). It
/// maps the values in UsedValues to Values through which
/// their content is available within the loop body.
/// @param LoopBody A pointer to an iterator that is set to point to the
/// body of the created loop. It should be used to insert
/// instructions that form the actual loop body.
///
/// @return Value* The newly created induction variable for this loop.
Value *createParallelLoop(Value *LowerBound, Value *UpperBound, Value *Stride,
SetVector<Value *> &UsedValues,
ValueToValueMapTy &VMap,
/// @return The newly created induction variable for this loop.
Value *createParallelLoop(Value *LB, Value *UB, Value *Stride,
SetVector<Value *> &Values, ValueToValueMapTy &VMap,
BasicBlock::iterator *LoopBody);
private:
/// @brief The IR builder we use to create instructions.
PollyIRBuilder &Builder;
/// @brief A pass pointer to update analysis information.
Pass *P;
IntegerType *getIntPtrTy();
Module *getModule();
/// @brief The loop info of the current function we need to update.
LoopInfo &LI;
void createCallParallelLoopStart(Value *SubFunction, Value *SubfunctionParam,
Value *NumberOfThreads, Value *LowerBound,
Value *UpperBound, Value *Stride);
Value *createCallLoopNext(Value *LowerBoundPtr, Value *UpperBoundPtr);
void createCallParallelEnd();
void createCallLoopEndNowait();
/// @brief The dominance tree of the current function we need to update.
DominatorTree &DT;
Value *loadValuesIntoStruct(SetVector<Value *> &Values);
void extractValuesFromStruct(SetVector<Value *> OldValues, Value *Struct,
ValueToValueMapTy &Map);
/// @brief The target layout to get the right size for types.
const DataLayout &DL;
/// @brief Create the OpenMP subfunction.
/// @brief The type of a "long" on this hardware used for backend calls.
Type *LongType;
/// @brief The current module
Module *M;
/// @brief Create a runtime library call to spawn the worker threads.
///
/// @param Stride The value by which the induction variable is
/// incremented.
/// @param Struct The structure that is used to make Values available to
/// the loop body.
/// @param UsedValues A set of LLVM-IR Values that should be available to
/// the new loop body.
/// @param VMap This map that is filled by createSubfunction(). It
/// maps the values in UsedValues to Values through which
/// their content is available within the loop body.
/// @param SubFunction The newly created SubFunction is returned here.
///
/// @return Value* The newly created induction variable.
Value *createSubfunction(Value *Stride, Value *Struct,
SetVector<Value *> UsedValues,
ValueToValueMapTy &VMap, Function **SubFunction);
/// @param SubFn The subfunction which holds the loop body.
/// @param SubFnParam The parameter for the subfunction (basically the struct
/// filled with the outside values).
/// @param LB The lower bound for the loop we parallelize.
/// @param UB The upper bound for the loop we parallelize.
/// @param Stride The stride of the loop we parallelize.
void createCallSpawnThreads(Value *SubFn, Value *SubFnParam, Value *LB,
Value *UB, Value *Stride);
/// @brief Create the definition of the OpenMP subfunction.
Function *createSubfunctionDefinition();
/// @brief Create a runtime library call to join the worker threads.
void createCallJoinThreads();
/// @brief Create a runtime library call to get the next work item.
///
/// @param LBPtr A pointer value to store the work item begin in.
/// @param UBPtr A pointer value to store the work item end in.
///
/// @returns A true value if the work item is not empty.
Value *createCallGetWorkItem(Value *LBPtr, Value *UBPtr);
/// @brief Create a runtime library call to allow cleanup of the thread.
///
/// @note This function is called right before the thread will exit the
/// subfunction and only if the runtime system depends depends on it.
void createCallCleanupThread();
/// @brief Create a struct for all @p Values and store them in there.
///
/// @param Values The values which should be stored in the struct.
///
/// @return The created struct.
Value *storeValuesIntoStruct(SetVector<Value *> &Values);
/// @brief Extract all values from the @p Struct and construct the mapping.
///
/// @param Values The values which were stored in the struct.
/// @param Struct The struct holding all the values in @p Values.
/// @param VMap A map to associate every element of @p Values with the
/// new llvm value loaded from the @p Struct.
void extractValuesFromStruct(SetVector<Value *> Values, Value *Struct,
ValueToValueMapTy &VMap);
/// @brief Create the definition of the parallel subfunction.
Function *createSubFnDefinition();
/// @brief Create the parallel subfunction.
///
/// @param Stride The induction variable increment.
/// @param Struct A struct holding all values in @p Values.
/// @param Values A set of LLVM-IR Values that should be available in
/// the new loop body.
/// @param VMap A map to allow outside access to the new versions of
/// the values in @p Values.
/// @param SubFn The newly created subfunction is returned here.
///
/// @return The newly created induction variable.
Value *createSubFn(Value *Stride, Value *Struct,
SetVector<Value *> UsedValues, ValueToValueMapTy &VMap,
Function **SubFn);
};
} // end namespace polly
#endif

View File

@ -246,6 +246,7 @@ private:
LoopInfo &LI;
ScalarEvolution &SE;
DominatorTree &DT;
const DataLayout &DL;
// The Builder specifies the current location to code generate at.
PollyIRBuilder &Builder;
@ -316,7 +317,7 @@ private:
/// @brief Update ClastVars and ValueMap according to a value map.
///
/// @param VMap A map from old to new values.
void updateWithValueMap(OMPGenerator::ValueToValueMapTy &VMap);
void updateWithValueMap(ParallelLoopGenerator::ValueToValueMapTy &VMap);
/// @brief Create an OpenMP parallel for loop.
///
@ -579,8 +580,8 @@ SetVector<Value *> ClastStmtCodeGen::getOMPValues(const clast_stmt *Body) {
return Values;
}
void
ClastStmtCodeGen::updateWithValueMap(OMPGenerator::ValueToValueMapTy &VMap) {
void ClastStmtCodeGen::updateWithValueMap(
ParallelLoopGenerator::ValueToValueMapTy &VMap) {
std::set<Value *> Inserted;
for (const auto &I : ClastVars) {
@ -611,8 +612,8 @@ void ClastStmtCodeGen::codegenForOpenMP(const clast_for *For) {
BasicBlock::iterator LoopBody;
IntegerType *IntPtrTy = getIntPtrTy();
SetVector<Value *> Values;
OMPGenerator::ValueToValueMapTy VMap;
OMPGenerator OMPGen(Builder, P);
ParallelLoopGenerator::ValueToValueMapTy VMap;
ParallelLoopGenerator OMPGen(Builder, P, LI, DT, DL);
Stride = Builder.getInt(APInt_from_MPZ(For->stride));
Stride = Builder.CreateSExtOrBitCast(Stride, IntPtrTy);
@ -1025,7 +1026,8 @@ void ClastStmtCodeGen::codegen(const clast_root *r) {
ClastStmtCodeGen::ClastStmtCodeGen(Scop *scop, PollyIRBuilder &B, Pass *P)
: S(scop), P(P), LI(P->getAnalysis<LoopInfo>()),
SE(P->getAnalysis<ScalarEvolution>()),
DT(P->getAnalysis<DominatorTreeWrapperPass>().getDomTree()), Builder(B),
DT(P->getAnalysis<DominatorTreeWrapperPass>().getDomTree()),
DL(P->getAnalysis<DataLayoutPass>().getDataLayout()), Builder(B),
ExpGen(Builder, ClastVars) {}
namespace {
@ -1074,9 +1076,11 @@ public:
AU.addRequired<ScopDetection>();
AU.addRequired<ScopInfo>();
AU.addRequired<DataLayoutPass>();
AU.addRequired<DataLayoutPass>();
AU.addRequired<LoopInfo>();
AU.addPreserved<CloogInfo>();
AU.addPreserved<DataLayoutPass>();
AU.addPreserved<Dependences>();
AU.addPreserved<LoopInfo>();
AU.addPreserved<DominatorTreeWrapperPass>();
@ -1103,6 +1107,7 @@ INITIALIZE_PASS_DEPENDENCY(CloogInfo);
INITIALIZE_PASS_DEPENDENCY(Dependences);
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass);
INITIALIZE_PASS_DEPENDENCY(RegionInfoPass);
INITIALIZE_PASS_DEPENDENCY(DataLayoutPass);
INITIALIZE_PASS_DEPENDENCY(ScalarEvolution);
INITIALIZE_PASS_DEPENDENCY(ScopDetection);
INITIALIZE_PASS_DEPENDENCY(DataLayoutPass);

View File

@ -7,8 +7,7 @@
//
//===----------------------------------------------------------------------===//
//
// This file contains functions to create scalar and OpenMP parallel loops
// as LLVM-IR.
// This file contains functions to create scalar and parallel loops as LLVM-IR.
//
//===----------------------------------------------------------------------===//
@ -138,61 +137,84 @@ Value *polly::createLoop(Value *LB, Value *UB, Value *Stride,
return IV;
}
void OMPGenerator::createCallParallelLoopStart(
Value *SubFunction, Value *SubfunctionParam, Value *NumberOfThreads,
Value *LowerBound, Value *UpperBound, Value *Stride) {
Module *M = getModule();
const char *Name = "GOMP_parallel_loop_runtime_start";
Value *ParallelLoopGenerator::createParallelLoop(
Value *LB, Value *UB, Value *Stride, SetVector<Value *> &UsedValues,
ValueToValueMapTy &Map, BasicBlock::iterator *LoopBody) {
Value *Struct, *IV, *SubFnParam;
Function *SubFn;
Struct = storeValuesIntoStruct(UsedValues);
BasicBlock::iterator BeforeLoop = Builder.GetInsertPoint();
IV = createSubFn(Stride, Struct, UsedValues, Map, &SubFn);
*LoopBody = Builder.GetInsertPoint();
Builder.SetInsertPoint(BeforeLoop);
SubFnParam = Builder.CreateBitCast(Struct, Builder.getInt8PtrTy(),
"polly.par.userContext");
// Add one as the upper bound provided by openmp is a < comparison
// whereas the codegenForSequential function creates a <= comparison.
UB = Builder.CreateAdd(UB, ConstantInt::get(LongType, 1));
// Tell the runtime we start a parallel loop
createCallSpawnThreads(SubFn, SubFnParam, LB, UB, Stride);
Builder.CreateCall(SubFn, SubFnParam);
createCallJoinThreads();
return IV;
}
void ParallelLoopGenerator::createCallSpawnThreads(Value *SubFn,
Value *SubFnParam, Value *LB,
Value *UB, Value *Stride) {
const std::string Name = "GOMP_parallel_loop_runtime_start";
Function *F = M->getFunction(Name);
// If F is not available, declare it.
if (!F) {
Type *LongTy = getIntPtrTy();
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
Type *Params[] = {PointerType::getUnqual(FunctionType::get(
Builder.getVoidTy(), Builder.getInt8PtrTy(), false)),
Builder.getInt8PtrTy(), Builder.getInt32Ty(), LongTy,
LongTy, LongTy};
Builder.getInt8PtrTy(), LongType, LongType, LongType,
LongType};
FunctionType *Ty = FunctionType::get(Builder.getVoidTy(), Params, false);
F = Function::Create(Ty, Linkage, Name, M);
}
Value *Args[] = {SubFunction, SubfunctionParam, NumberOfThreads,
LowerBound, UpperBound, Stride};
Value *NumberOfThreads = ConstantInt::get(LongType, 0);
Value *Args[] = {SubFn, SubFnParam, NumberOfThreads, LB, UB, Stride};
Builder.CreateCall(F, Args);
}
Value *OMPGenerator::createCallLoopNext(Value *LowerBoundPtr,
Value *UpperBoundPtr) {
Module *M = getModule();
const char *Name = "GOMP_loop_runtime_next";
Value *ParallelLoopGenerator::createCallGetWorkItem(Value *LBPtr,
Value *UBPtr) {
const std::string Name = "GOMP_loop_runtime_next";
Function *F = M->getFunction(Name);
// If F is not available, declare it.
if (!F) {
Type *LongPtrTy = PointerType::getUnqual(getIntPtrTy());
GlobalValue::LinkageTypes Linkage = Function::ExternalLinkage;
Type *Params[] = {LongPtrTy, LongPtrTy};
Type *Params[] = {LongType->getPointerTo(), LongType->getPointerTo()};
FunctionType *Ty = FunctionType::get(Builder.getInt8Ty(), Params, false);
F = Function::Create(Ty, Linkage, Name, M);
}
Value *Args[] = {LowerBoundPtr, UpperBoundPtr};
Value *Args[] = {LBPtr, UBPtr};
Value *Return = Builder.CreateCall(F, Args);
Return = Builder.CreateICmpNE(
Return, Builder.CreateZExt(Builder.getFalse(), Return->getType()));
return Return;
}
void OMPGenerator::createCallParallelEnd() {
const char *Name = "GOMP_parallel_end";
Module *M = getModule();
void ParallelLoopGenerator::createCallJoinThreads() {
const std::string Name = "GOMP_parallel_end";
Function *F = M->getFunction(Name);
// If F is not available, declare it.
@ -206,9 +228,9 @@ void OMPGenerator::createCallParallelEnd() {
Builder.CreateCall(F);
}
void OMPGenerator::createCallLoopEndNowait() {
const char *Name = "GOMP_loop_end_nowait";
Module *M = getModule();
void ParallelLoopGenerator::createCallCleanupThread() {
const std::string Name = "GOMP_loop_end_nowait";
Function *F = M->getFunction(Name);
// If F is not available, declare it.
@ -222,39 +244,32 @@ void OMPGenerator::createCallLoopEndNowait() {
Builder.CreateCall(F);
}
IntegerType *OMPGenerator::getIntPtrTy() {
return P->getAnalysis<DataLayoutPass>().getDataLayout().getIntPtrType(
Builder.getContext());
}
Module *OMPGenerator::getModule() {
return Builder.GetInsertBlock()->getParent()->getParent();
}
Function *OMPGenerator::createSubfunctionDefinition() {
Module *M = getModule();
Function *ParallelLoopGenerator::createSubFnDefinition() {
Function *F = Builder.GetInsertBlock()->getParent();
std::vector<Type *> Arguments(1, Builder.getInt8PtrTy());
FunctionType *FT = FunctionType::get(Builder.getVoidTy(), Arguments, false);
Function *FN = Function::Create(FT, Function::InternalLinkage,
F->getName() + ".omp_subfn", M);
Function *SubFn = Function::Create(FT, Function::InternalLinkage,
F->getName() + ".polly.subfn", M);
// Do not run any polly pass on the new function.
FN->addFnAttr(PollySkipFnAttr);
SubFn->addFnAttr(PollySkipFnAttr);
Function::arg_iterator AI = FN->arg_begin();
AI->setName("omp.userContext");
Function::arg_iterator AI = SubFn->arg_begin();
AI->setName("polly.par.userContext");
return FN;
return SubFn;
}
Value *OMPGenerator::loadValuesIntoStruct(SetVector<Value *> &Values) {
std::vector<Type *> Members;
Value *
ParallelLoopGenerator::storeValuesIntoStruct(SetVector<Value *> &Values) {
SmallVector<Type *, 8> Members;
for (Value *V : Values)
Members.push_back(V->getType());
StructType *Ty = StructType::get(Builder.getContext(), Members);
Value *Struct = Builder.CreateAlloca(Ty, 0, "omp.userContext");
Value *Struct =
new AllocaInst(Ty, 0, "polly.par.userContext", Builder.GetInsertPoint());
for (unsigned i = 0; i < Values.size(); i++) {
Value *Address = Builder.CreateStructGEP(Struct, i);
@ -264,121 +279,79 @@ Value *OMPGenerator::loadValuesIntoStruct(SetVector<Value *> &Values) {
return Struct;
}
void OMPGenerator::extractValuesFromStruct(SetVector<Value *> OldValues,
Value *Struct,
ValueToValueMapTy &Map) {
void ParallelLoopGenerator::extractValuesFromStruct(
SetVector<Value *> OldValues, Value *Struct, ValueToValueMapTy &Map) {
for (unsigned i = 0; i < OldValues.size(); i++) {
Value *Address = Builder.CreateStructGEP(Struct, i);
Value *NewValue = Builder.CreateLoad(Address);
Map.insert(std::make_pair(OldValues[i], NewValue));
Map[OldValues[i]] = NewValue;
}
}
Value *OMPGenerator::createSubfunction(Value *Stride, Value *StructData,
SetVector<Value *> Data,
ValueToValueMapTy &Map,
Function **SubFunction) {
Function *FN = createSubfunctionDefinition();
BasicBlock *PrevBB, *HeaderBB, *ExitBB, *CheckNextBB, *LoadIVBoundsBB,
*AfterBB;
Value *LowerBoundPtr, *UpperBoundPtr, *UserContext, *Ret1, *HasNextSchedule,
*LowerBound, *UpperBound, *IV;
Type *IntPtrTy = getIntPtrTy();
LLVMContext &Context = FN->getContext();
Value *ParallelLoopGenerator::createSubFn(Value *Stride, Value *StructData,
SetVector<Value *> Data,
ValueToValueMapTy &Map,
Function **SubFnPtr) {
BasicBlock *PrevBB, *HeaderBB, *ExitBB, *CheckNextBB, *PreHeaderBB, *AfterBB;
Value *LBPtr, *UBPtr, *UserContext, *Ret1, *HasNextSchedule, *LB, *UB, *IV;
Function *SubFn = createSubFnDefinition();
LLVMContext &Context = SubFn->getContext();
// Store the previous basic block.
PrevBB = Builder.GetInsertBlock();
// Create basic blocks.
HeaderBB = BasicBlock::Create(Context, "omp.setup", FN);
ExitBB = BasicBlock::Create(Context, "omp.exit", FN);
CheckNextBB = BasicBlock::Create(Context, "omp.checkNext", FN);
LoadIVBoundsBB = BasicBlock::Create(Context, "omp.loadIVBounds", FN);
HeaderBB = BasicBlock::Create(Context, "polly.par.setup", SubFn);
ExitBB = BasicBlock::Create(Context, "polly.par.exit", SubFn);
CheckNextBB = BasicBlock::Create(Context, "polly.par.checkNext", SubFn);
PreHeaderBB = BasicBlock::Create(Context, "polly.par.loadIVBounds", SubFn);
DominatorTree &DT = P->getAnalysis<DominatorTreeWrapperPass>().getDomTree();
DT.addNewBlock(HeaderBB, PrevBB);
DT.addNewBlock(ExitBB, HeaderBB);
DT.addNewBlock(CheckNextBB, HeaderBB);
DT.addNewBlock(LoadIVBoundsBB, HeaderBB);
DT.addNewBlock(PreHeaderBB, HeaderBB);
// Fill up basic block HeaderBB.
Builder.SetInsertPoint(HeaderBB);
LowerBoundPtr = Builder.CreateAlloca(IntPtrTy, 0, "omp.lowerBoundPtr");
UpperBoundPtr = Builder.CreateAlloca(IntPtrTy, 0, "omp.upperBoundPtr");
UserContext = Builder.CreateBitCast(FN->arg_begin(), StructData->getType(),
"omp.userContext");
LBPtr = Builder.CreateAlloca(LongType, 0, "polly.par.LBPtr");
UBPtr = Builder.CreateAlloca(LongType, 0, "polly.par.UBPtr");
UserContext = Builder.CreateBitCast(SubFn->arg_begin(), StructData->getType(),
"polly.par.userContext");
extractValuesFromStruct(Data, UserContext, Map);
Builder.CreateBr(CheckNextBB);
// Add code to check if another set of iterations will be executed.
Builder.SetInsertPoint(CheckNextBB);
Ret1 = createCallLoopNext(LowerBoundPtr, UpperBoundPtr);
Ret1 = createCallGetWorkItem(LBPtr, UBPtr);
HasNextSchedule = Builder.CreateTrunc(Ret1, Builder.getInt1Ty(),
"omp.hasNextScheduleBlock");
Builder.CreateCondBr(HasNextSchedule, LoadIVBoundsBB, ExitBB);
"polly.par.hasNextScheduleBlock");
Builder.CreateCondBr(HasNextSchedule, PreHeaderBB, ExitBB);
// Add code to to load the iv bounds for this set of iterations.
Builder.SetInsertPoint(LoadIVBoundsBB);
LowerBound = Builder.CreateLoad(LowerBoundPtr, "omp.lowerBound");
UpperBound = Builder.CreateLoad(UpperBoundPtr, "omp.upperBound");
Builder.SetInsertPoint(PreHeaderBB);
LB = Builder.CreateLoad(LBPtr, "polly.par.LB");
UB = Builder.CreateLoad(UBPtr, "polly.par.UB");
// Subtract one as the upper bound provided by openmp is a < comparison
// whereas the codegenForSequential function creates a <= comparison.
UpperBound = Builder.CreateSub(UpperBound, ConstantInt::get(IntPtrTy, 1),
"omp.upperBoundAdjusted");
UB = Builder.CreateSub(UB, ConstantInt::get(LongType, 1),
"polly.par.UBAdjusted");
Builder.CreateBr(CheckNextBB);
Builder.SetInsertPoint(--Builder.GetInsertPoint());
LoopInfo &LI = P->getAnalysis<LoopInfo>();
IV = createLoop(LowerBound, UpperBound, Stride, Builder, P, LI, DT, AfterBB,
IV = createLoop(LB, UB, Stride, Builder, P, LI, DT, AfterBB,
ICmpInst::ICMP_SLE, nullptr, true, /* UseGuard */ false);
BasicBlock::iterator LoopBody = Builder.GetInsertPoint();
Builder.SetInsertPoint(AfterBB->begin());
// Add code to terminate this openmp subfunction.
// Add code to terminate this subfunction.
Builder.SetInsertPoint(ExitBB);
createCallLoopEndNowait();
createCallCleanupThread();
Builder.CreateRetVoid();
Builder.SetInsertPoint(LoopBody);
*SubFunction = FN;
return IV;
}
Value *OMPGenerator::createParallelLoop(Value *LowerBound, Value *UpperBound,
Value *Stride,
SetVector<Value *> &Values,
ValueToValueMapTy &Map,
BasicBlock::iterator *LoopBody) {
Value *Struct, *IV, *SubfunctionParam, *NumberOfThreads;
Function *SubFunction;
Struct = loadValuesIntoStruct(Values);
BasicBlock::iterator PrevInsertPoint = Builder.GetInsertPoint();
IV = createSubfunction(Stride, Struct, Values, Map, &SubFunction);
*LoopBody = Builder.GetInsertPoint();
Builder.SetInsertPoint(PrevInsertPoint);
// Create call for GOMP_parallel_loop_runtime_start.
SubfunctionParam =
Builder.CreateBitCast(Struct, Builder.getInt8PtrTy(), "omp_data");
NumberOfThreads = Builder.getInt32(0);
// Add one as the upper bound provided by openmp is a < comparison
// whereas the codegenForSequential function creates a <= comparison.
UpperBound =
Builder.CreateAdd(UpperBound, ConstantInt::get(getIntPtrTy(), 1));
createCallParallelLoopStart(SubFunction, SubfunctionParam, NumberOfThreads,
LowerBound, UpperBound, Stride);
Builder.CreateCall(SubFunction, SubfunctionParam);
createCallParallelEnd();
*SubFnPtr = SubFn;
return IV;
}

View File

@ -1,32 +1,8 @@
; RUN: opt %loadPolly -basicaa -polly-codegen -enable-polly-openmp < %s -S | FileCheck %s
;/*
; * =============================================================================
; *
; * Filename: 20120330-argument-use.c
; *
; * Description: Polly OpenMP test case
; *
; * Test if the OpenMP subfunction uses the argument copy in
; * the OpenMP struct not the original one only available in
; * the original function.
; *
; * Run with -polly-codegen -enable-polly-openmp
; *
; * Author: Johannes Doerfert johannes@jdoerfert.de
; *
; * Created: 2012-03-30
; * Modified: 2012-03-30
; *
; * =============================================================================
; */
;
;void f(int * restrict A, int * restrict B, int n) {
; int i;
;
; for (i = 0; i < n; i++) {
; for (int i = 0; i < n; i++)
; A[i] = B[i] * 2;
; }
;}
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@ -58,11 +34,11 @@ for.end: ; preds = %for.cond
ret void
}
; CHECK: %omp.userContext1 = bitcast i8* %omp.userContext to { i32, i32*, i32* }*
; CHECK: %0 = getelementptr inbounds { i32, i32*, i32* }* %omp.userContext1, i32 0, i32 0
; CHECK: %polly.par.userContext[[NO:[0-9]*]] = bitcast i8* %polly.par.userContext to { i32, i32*, i32* }*
; CHECK: %0 = getelementptr inbounds { i32, i32*, i32* }* %polly.par.userContext[[NO]], i32 0, i32 0
; CHECK: %1 = load i32* %0
; CHECK: %2 = getelementptr inbounds { i32, i32*, i32* }* %omp.userContext1, i32 0, i32 1
; CHECK: %2 = getelementptr inbounds { i32, i32*, i32* }* %polly.par.userContext[[NO]], i32 0, i32 1
; CHECK: %3 = load i32** %2
; CHECK: %4 = getelementptr inbounds { i32, i32*, i32* }* %omp.userContext1, i32 0, i32 2
; CHECK: %4 = getelementptr inbounds { i32, i32*, i32* }* %polly.par.userContext[[NO]], i32 0, i32 2
; CHECK: %5 = load i32** %4

View File

@ -53,4 +53,4 @@ end: ; preds = %for.end, %entry
; CLOOG: Stmt_for_end(c2);
; CLOOG: }
; CHECK: @f.omp_subfn
; CHECK: @f.polly.subfn

View File

@ -31,4 +31,4 @@ for.end: ; preds = %for.body
ret void
}
; CHECK: %omp.userContext = alloca { float }
; CHECK: %polly.par.userContext = alloca { float }

View File

@ -32,4 +32,4 @@ for.end: ; preds = %for.body
ret void
}
; CHECK: %omp.userContext = alloca { float }
; CHECK: %polly.par.userContext = alloca { float }

View File

@ -49,9 +49,8 @@ entry:
call void @foo()
ret i32 0
}
; CHECK: getelementptr inbounds { [10 x float]* }* %omp.userContext, i32 0, i32 0
; CHECK: store [10 x float]* %A, [10 x float]** %0
; CHECK: %omp_data = bitcast { [10 x float]* }* %omp.userContext to i8*
; CHECK: inbounds { [10 x float]* }* %omp.userContext1, i32 0, i32 0
; CHECK: %[[V:[._a-zA-Z0-9]+]] = getelementptr inbounds { [10 x float]* }* %polly.par.userContext, i32 0, i32 0
; CHECK: store [10 x float]* %A, [10 x float]** %[[V]]
; CHECK: inbounds { [10 x float]* }* %polly.par.userContext{{[0-9]*}}, i32 0, i32 0
; CHECK: load [10 x float]**

View File

@ -1,8 +1,5 @@
; RUN: opt %loadPolly -polly-codegen < %s -enable-polly-openmp -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
;
; This test case implements the following code:
;
; for (i = 0; i < 1024; i++)
@ -10,6 +7,7 @@ target triple = "x86_64-unknown-linux-gnu"
;
; The problem is that 'param' is not references in any subscript of loop
; bound, but it must still be forwarded to the OpenMP subfunction.
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
define void @foo(double %param, [1024 x double]* %A) {
entry:
@ -35,4 +33,4 @@ for.end:
ret void
}
; CHECK: omp_subfn
; CHECK: @foo.polly.subfn

View File

@ -78,15 +78,15 @@ entry:
declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
; CHECK: %omp.userContext = alloca { i32 }
; CHECK: getelementptr inbounds { i32 }* %omp.userContext, i32 0, i32 0
; CHECK: store i32 %polly.indvar, i32* %0
; CHECK: %omp_data = bitcast { i32 }* %omp.userContext to i8*
; CHECK: call void @GOMP_parallel_loop_runtime_start(void (i8*)* @loop_openmp.omp_subfn, i8* %omp_data, i32 0, i32 0, i32 10, i32 1)
; CHECK: call void @loop_openmp.omp_subfn(i8* %omp_data)
; CHECK: %polly.par.userContext = alloca { i32 }
; CHECK: %[[NO:[._a-zA-Z0-9]*]] = getelementptr inbounds { i32 }* %polly.par.userContext, i32 0, i32 0
; CHECK: store i32 %polly.indvar, i32* %[[NO]]
; CHECK: %[[DATA:[._a-zA-Z0-9]*]] = bitcast { i32 }* %polly.par.userContext to i8*
; CHECK: call void @GOMP_parallel_loop_runtime_start(void (i8*)* @loop_openmp.polly.subfn, i8* %[[DATA]], i32 0, i32 0, i32 10, i32 1)
; CHECK: call void @loop_openmp.polly.subfn(i8* %[[DATA]])
; CHECK: call void @GOMP_parallel_end()
; Verify the new subfunction is annotated such that SCoP detection will skip it.
; CHECK: @loop_openmp.omp_subfn({{.*}}) [[ATTR:#[0-9]+]]
; CHECK: @loop_openmp.polly.subfn({{.*}}) [[ATTR:#[0-9]+]]
; CHECK: attributes [[ATTR]] = {{{[^\}]*}}polly.skip.fn{{[^\}]*}}}

View File

@ -100,6 +100,6 @@ entry:
declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
; CHECK: %omp.userContext = alloca {}
; CHECK: %omp.userContext1 = alloca { i32 }
; CHECK-DAG: %polly.par.userContext{{[0-9]*}} = alloca {}
; CHECK-DAG: %polly.par.userContext{{[0-9]*}} = alloca { i32 }