Heap allocation for new arrays.

This patch aims to implement the option of allocating new arrays created
by polly on heap instead of stack. To enable this option, a key named
'allocation' must be written in the imported json file with the value
'heap'.

We need such a feature because in a next iteration, we will implement a
mechanism of maximal static expansion which will need a way to allocate
arrays on heap. Indeed, the expansion is very costly in terms of memory
and doing the allocation on stack is not worth considering.

The malloc and the free are added respectively at polly.start and
polly.exiting such that there is no use-after-free (for instance in case
of Scop in a loop) and such that all memory cells allocated with a
malloc are free'd when we don't need them anymore.

We also add :

- In the class ScopArrayInfo, we add a boolean as member called IsOnHeap
  which represents the fact that the array in allocated on heap or not.
- A new branch in the method allocateNewArrays in the ISLNodeBuilder for
  the case of heap allocation. allocateNewArrays now takes a BBPair
  containing polly.start and polly.exiting. allocateNewArrays takes this
  two blocks and add the malloc and free calls respectively to
  polly.start and polly.exiting.
- As IntPtrTy for the malloc call, we use the DataLayout one.

To do that, we have modified :

- createScopArrayInfo and getOrCreateScopArrayInfo such that it returns
  a non-const SAI, in order to be able to call setIsOnHeap in the
  JSONImporter.
- executeScopConditionnaly such that it return both start block and end
  block of the scop, because we need this two blocs to be able to add
  the malloc and the free calls at the right position.

Differential Revision: https://reviews.llvm.org/D33688

llvm-svn: 306540
This commit is contained in:
Michael Kruse 2017-06-28 13:02:43 +00:00
parent 72d2539937
commit b738ffa845
9 changed files with 346 additions and 34 deletions

View File

@ -98,7 +98,7 @@ public:
void create(__isl_take isl_ast_node *Node);
/// Allocate memory for all new arrays created by Polly.
void allocateNewArrays();
void allocateNewArrays(BBPair StartExitBlocks);
/// Preload all memory loads that are invariant.
bool preloadInvariantLoads();

View File

@ -281,6 +281,9 @@ public:
/// Return the base pointer.
Value *getBasePtr() const { return BasePtr; }
// Set IsOnHeap to the value in parameter.
void setIsOnHeap(bool value) { IsOnHeap = value; }
/// For indirect accesses return the origin SAI of the BP, else null.
const ScopArrayInfo *getBasePtrOriginSAI() const { return BasePtrOriginSAI; }
@ -355,6 +358,12 @@ public:
/// Is this array info modeling an array?
bool isArrayKind() const { return Kind == MemoryKind::Array; }
/// Is this array allocated on heap
///
/// This property is only relevant if the array is allocated by Polly instead
/// of pre-existing. If false, it is allocated using alloca instead malloca.
bool isOnHeap() const { return IsOnHeap; }
/// Dump a readable representation to stderr.
void dump() const;
@ -412,6 +421,9 @@ private:
/// The isl id for the base pointer.
isl_id *Id;
/// True if the newly allocated array is on heap.
bool IsOnHeap;
/// The sizes of each dimension as SCEV*.
SmallVector<const SCEV *, 4> DimensionSizes;
@ -2593,8 +2605,7 @@ public:
/// @param ElementType The type of the elements stored in this array.
/// @param Kind The kind of the array info object.
/// @param BaseName The optional name of this memory reference.
const ScopArrayInfo *getOrCreateScopArrayInfo(Value *BasePtr,
Type *ElementType,
ScopArrayInfo *getOrCreateScopArrayInfo(Value *BasePtr, Type *ElementType,
ArrayRef<const SCEV *> Sizes,
MemoryKind Kind,
const char *BaseName = nullptr);
@ -2604,7 +2615,7 @@ public:
/// @param ElementType The type of the elements stored in this array.
/// @param BaseName The name of this memory reference.
/// @param Sizes The sizes of dimensions.
const ScopArrayInfo *createScopArrayInfo(Type *ElementType,
ScopArrayInfo *createScopArrayInfo(Type *ElementType,
const std::string &BaseName,
const std::vector<unsigned> &Sizes);

View File

@ -255,8 +255,8 @@ ScopArrayInfo::ScopArrayInfo(Value *BasePtr, Type *ElementType, isl_ctx *Ctx,
ArrayRef<const SCEV *> Sizes, MemoryKind Kind,
const DataLayout &DL, Scop *S,
const char *BaseName)
: BasePtr(BasePtr), ElementType(ElementType), Kind(Kind), DL(DL), S(*S),
FAD(nullptr) {
: BasePtr(BasePtr), ElementType(ElementType), IsOnHeap(false), Kind(Kind),
DL(DL), S(*S), FAD(nullptr) {
std::string BasePtrName =
BaseName ? BaseName
: getIslCompatibleName("MemRef", BasePtr, S->getNextArrayIdx(),
@ -4083,9 +4083,9 @@ void Scop::canonicalizeDynamicBasePtrs() {
}
}
const ScopArrayInfo *
Scop::getOrCreateScopArrayInfo(Value *BasePtr, Type *ElementType,
ArrayRef<const SCEV *> Sizes, MemoryKind Kind,
ScopArrayInfo *Scop::getOrCreateScopArrayInfo(Value *BasePtr, Type *ElementType,
ArrayRef<const SCEV *> Sizes,
MemoryKind Kind,
const char *BaseName) {
assert((BasePtr || BaseName) &&
"BasePtr and BaseName can not be nullptr at the same time.");
@ -4107,8 +4107,8 @@ Scop::getOrCreateScopArrayInfo(Value *BasePtr, Type *ElementType,
return SAI.get();
}
const ScopArrayInfo *
Scop::createScopArrayInfo(Type *ElementType, const std::string &BaseName,
ScopArrayInfo *Scop::createScopArrayInfo(Type *ElementType,
const std::string &BaseName,
const std::vector<unsigned> &Sizes) {
auto *DimSizeType = Type::getInt64Ty(getSE()->getContext());
std::vector<const SCEV *> SCEVSizes;

View File

@ -187,7 +187,7 @@ static bool CodeGen(Scop &S, IslAstInfo &AI, LoopInfo &LI, DominatorTree &DT,
// All arrays must have their base pointers known before
// ScopAnnotator::buildAliasScopes.
NodeBuilder.allocateNewArrays();
NodeBuilder.allocateNewArrays(StartExitBlocks);
Annotator.buildAliasScopes(S);
if (PerfMonitoring) {
@ -232,7 +232,14 @@ static bool CodeGen(Scop &S, IslAstInfo &AI, LoopInfo &LI, DominatorTree &DT,
Value *RTC = NodeBuilder.createRTC(AI.getRunCondition());
Builder.GetInsertBlock()->getTerminator()->setOperand(0, RTC);
Builder.SetInsertPoint(&StartBlock->front());
// Explicitly set the insert point to the end of the block to avoid that a
// split at the builder's current
// insert position would move the malloc calls to the wrong BasicBlock.
// Ideally we would just split the block during allocation of the new
// arrays, but this would break the assumption that there are no blocks
// between polly.start and polly.exiting (at this point).
Builder.SetInsertPoint(StartBlock->getTerminator());
NodeBuilder.create(AstRoot);
NodeBuilder.finalize();

View File

@ -1383,7 +1383,7 @@ bool IslNodeBuilder::preloadInvariantEquivClass(
return true;
}
void IslNodeBuilder::allocateNewArrays() {
void IslNodeBuilder::allocateNewArrays(BBPair StartExitBlocks) {
for (auto &SAI : S.arrays()) {
if (SAI->getBasePtr())
continue;
@ -1393,6 +1393,9 @@ void IslNodeBuilder::allocateNewArrays() {
"created arrays that require memory allocation.");
Type *NewArrayType = nullptr;
// Get the size of the array = size(dim_1)*...*size(dim_n)
uint64_t ArraySizeInt = 1;
for (int i = SAI->getNumberOfDimensions() - 1; i >= 0; i--) {
auto *DimSize = SAI->getDimensionSize(i);
unsigned UnsignedDimSize = static_cast<const SCEVConstant *>(DimSize)
@ -1403,15 +1406,44 @@ void IslNodeBuilder::allocateNewArrays() {
NewArrayType = SAI->getElementType();
NewArrayType = ArrayType::get(NewArrayType, UnsignedDimSize);
ArraySizeInt *= UnsignedDimSize;
}
auto InstIt =
Builder.GetInsertBlock()->getParent()->getEntryBlock().getTerminator();
if (SAI->isOnHeap()) {
LLVMContext &Ctx = NewArrayType->getContext();
// Get the IntPtrTy from the Datalayout
auto IntPtrTy = DL.getIntPtrType(Ctx);
// Get the size of the element type in bits
unsigned Size = SAI->getElemSizeInBytes();
// Insert the malloc call at polly.start
auto InstIt = std::get<0>(StartExitBlocks)->getTerminator();
auto *CreatedArray = CallInst::CreateMalloc(
&*InstIt, IntPtrTy, SAI->getElementType(),
ConstantInt::get(Type::getInt64Ty(Ctx), Size),
ConstantInt::get(Type::getInt64Ty(Ctx), ArraySizeInt), nullptr,
SAI->getName());
SAI->setBasePtr(CreatedArray);
// Insert the free call at polly.exiting
CallInst::CreateFree(CreatedArray,
std::get<1>(StartExitBlocks)->getTerminator());
} else {
auto InstIt = Builder.GetInsertBlock()
->getParent()
->getEntryBlock()
.getTerminator();
auto *CreatedArray = new AllocaInst(NewArrayType, DL.getAllocaAddrSpace(),
SAI->getName(), &*InstIt);
CreatedArray->setAlignment(PollyTargetFirstLevelCacheLineSize);
SAI->setBasePtr(CreatedArray);
}
}
}
bool IslNodeBuilder::preloadInvariantLoads() {

View File

@ -694,25 +694,32 @@ bool JSONImporter::importArrays(Scop &S, Json::Value &JScop) {
}
for (; ArrayIdx < Arrays.size(); ArrayIdx++) {
auto *ElementType = parseTextType(Arrays[ArrayIdx]["type"].asCString(),
S.getSE()->getContext());
auto &Array = Arrays[ArrayIdx];
auto *ElementType =
parseTextType(Array["type"].asCString(), S.getSE()->getContext());
if (!ElementType) {
errs() << "Error while parsing element type for new array.\n";
return false;
}
std::vector<unsigned> DimSizes;
for (unsigned i = 0; i < Arrays[ArrayIdx]["sizes"].size(); i++) {
auto Size = std::stoi(Arrays[ArrayIdx]["sizes"][i].asCString());
for (unsigned i = 0; i < Array["sizes"].size(); i++) {
auto Size = std::stoi(Array["sizes"][i].asCString());
// Check if the size if positive.
if (Size <= 0) {
errs() << "The size at index " << i << " is =< 0.\n";
return false;
}
DimSizes.push_back(std::stoi(Arrays[ArrayIdx]["sizes"][i].asCString()));
DimSizes.push_back(Size);
}
auto NewSAI =
S.createScopArrayInfo(ElementType, Array["name"].asCString(), DimSizes);
if (Array.isMember("allocation")) {
NewSAI->setIsOnHeap(Array["allocation"].asString() == "heap");
}
S.createScopArrayInfo(ElementType, Arrays[ArrayIdx]["name"].asCString(),
DimSizes);
}
return true;

View File

@ -0,0 +1,113 @@
; RUN: opt %loadPolly -polly-scops -analyze -polly-import-jscop-dir=%S -polly-import-jscop -polly-import-jscop-postfix=transformed < %s | FileCheck %s
; RUN: opt %loadPolly -polly-import-jscop-dir=%S -polly-import-jscop -polly-import-jscop-postfix=transformed -polly-codegen -S < %s | FileCheck %s --check-prefix=CODEGEN
;
; #define Ni 1056
; #define Nj 1056
; #define Nk 1024
;
; void create_arrays_heap(double beta, double A[Ni][Nk], double B[Ni][Nj]) {
; int i,j,k;
;
; for (i = 0; i < Ni; i++) {
; for (j = 0; j < Nj; j++) {
; for (k = 0; k < Nk; ++k) {
; B[i][j] = beta * A[i][k];
; }
; }
; }
; }
;
; Check if the info from the JSON file has been analysed without errors.
; CHECK: Arrays {
; CHECK: double MemRef_A[*][1024]; // Element size 8
; CHECK: double MemRef_beta; // Element size 8
; CHECK: double MemRef_B[*][1056]; // Element size 8
; CHECK: double D[270336]; // Element size 8
; CHECK: double E[270336][200000]; // Element size 8
; CHECK: i64 F[270336]; // Element size 8
;
; Check if there are the 3 expected malloc calls with the right parameters at polly.start.
; %D : size(D) = product_all_dimensions*sizeof(type) = 270336*8 = 2162688 cast to double*
; %E : size(E) = 270336*200000*8 = 432537600000 cast to double*
; %F : size(F) = 270336*8 = 2162688 cast to i64*
; CODEGEN: polly.start:
; CODEGEN: %malloccall = tail call i8* @malloc(i64 2162688)
; CODEGEN: %D = bitcast i8* %malloccall to double*
; CODEGEN: %malloccall1 = tail call i8* @malloc(i64 432537600000)
; CODEGEN: %E = bitcast i8* %malloccall1 to double*
; CODEGEN: %malloccall2 = tail call i8* @malloc(i64 2162688)
; CODEGEN: %F = bitcast i8* %malloccall2 to i64*
;
; Check if there are the 3 expected malloc calls with the right parameters at polly.exiting.
; Cast to i8* before freeing because malloc give us a i8 and free is waiting for a i8*
; CODEGEN: polly.exiting:
; CODEGEN: %12 = bitcast double* %D to i8*
; CODEGEN: tail call void @free(i8* %12)
; CODEGEN: %13 = bitcast double* %E to i8*
; CODEGEN: tail call void @free(i8* %13)
; CODEGEN: %14 = bitcast i64* %F to i8*
; CODEGEN: tail call void @free(i8* %14)
;
; Check if the new access for array E is present.
; CODEGEN: %polly.access.mul.E = mul nsw i64 %polly.indvar, 200000
; CODEGEN: %polly.access.add.E = add nsw i64 %polly.access.mul.E, %
; CODEGEN: %polly.access.E = getelementptr double, double* %E, i64 %polly.access.add.E
;
; ModuleID = 'create_arrays_heap.ll'
;
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
; Function Attrs: nounwind uwtable
define void @create_arrays_heap(double %beta, [1024 x double]* nocapture readonly %A, [1056 x double]* nocapture %B) local_unnamed_addr {
entry:
br label %for.cond1.preheader
for.cond1.preheader: ; preds = %for.inc16, %entry
%indvars.iv35 = phi i64 [ 0, %entry ], [ %indvars.iv.next36, %for.inc16 ]
br label %for.cond4.preheader
for.cond4.preheader: ; preds = %for.inc13, %for.cond1.preheader
%indvars.iv32 = phi i64 [ 0, %for.cond1.preheader ], [ %indvars.iv.next33, %for.inc13 ]
%arrayidx12 = getelementptr inbounds [1056 x double], [1056 x double]* %B, i64 %indvars.iv35, i64 %indvars.iv32
br label %for.body6
for.body6: ; preds = %for.body6, %for.cond4.preheader
%indvars.iv = phi i64 [ 0, %for.cond4.preheader ], [ %indvars.iv.next.3, %for.body6 ]
%arrayidx8 = getelementptr inbounds [1024 x double], [1024 x double]* %A, i64 %indvars.iv35, i64 %indvars.iv
%0 = load double, double* %arrayidx8, align 8
%mul = fmul double %0, %beta
store double %mul, double* %arrayidx12, align 8
%indvars.iv.next = or i64 %indvars.iv, 1
%arrayidx8.1 = getelementptr inbounds [1024 x double], [1024 x double]* %A, i64 %indvars.iv35, i64 %indvars.iv.next
%1 = load double, double* %arrayidx8.1, align 8
%mul.1 = fmul double %1, %beta
store double %mul.1, double* %arrayidx12, align 8
%indvars.iv.next.1 = or i64 %indvars.iv, 2
%arrayidx8.2 = getelementptr inbounds [1024 x double], [1024 x double]* %A, i64 %indvars.iv35, i64 %indvars.iv.next.1
%2 = load double, double* %arrayidx8.2, align 8
%mul.2 = fmul double %2, %beta
store double %mul.2, double* %arrayidx12, align 8
%indvars.iv.next.2 = or i64 %indvars.iv, 3
%arrayidx8.3 = getelementptr inbounds [1024 x double], [1024 x double]* %A, i64 %indvars.iv35, i64 %indvars.iv.next.2
%3 = load double, double* %arrayidx8.3, align 8
%mul.3 = fmul double %3, %beta
store double %mul.3, double* %arrayidx12, align 8
%indvars.iv.next.3 = add nsw i64 %indvars.iv, 4
%exitcond.3 = icmp eq i64 %indvars.iv.next.3, 1024
br i1 %exitcond.3, label %for.inc13, label %for.body6
for.inc13: ; preds = %for.body6
%indvars.iv.next33 = add nuw nsw i64 %indvars.iv32, 1
%exitcond34 = icmp eq i64 %indvars.iv.next33, 1056
br i1 %exitcond34, label %for.inc16, label %for.cond4.preheader
for.inc16: ; preds = %for.inc13
%indvars.iv.next36 = add nuw nsw i64 %indvars.iv35, 1
%exitcond37 = icmp eq i64 %indvars.iv.next36, 1056
br i1 %exitcond37, label %for.end18, label %for.cond1.preheader
for.end18: ; preds = %for.inc16
ret void
}

View File

@ -0,0 +1,62 @@
{
"arrays" : [
{
"name" : "MemRef_A",
"sizes" : [ "*", "1024" ],
"type" : "double"
},
{
"name" : "MemRef_B",
"sizes" : [ "*", "1056" ],
"type" : "double"
}
],
"context" : "{ : }",
"location" : "pure_c_main.c:11-16",
"name" : "%for.cond1.preheader---%for.end18",
"statements" : [
{
"accesses" : [
{
"kind" : "read",
"relation" : "{ Stmt2[i0, i1, i2] -> MemRef_A[i0, 4i2] }"
},
{
"kind" : "read",
"relation" : "{ Stmt2[i0, i1, i2] -> MemRef_beta[] }"
},
{
"kind" : "write",
"relation" : "{ Stmt2[i0, i1, i2] -> MemRef_B[i0, i1] }"
},
{
"kind" : "read",
"relation" : "{ Stmt2[i0, i1, i2] -> MemRef_A[i0, 1 + 4i2] }"
},
{
"kind" : "write",
"relation" : "{ Stmt2[i0, i1, i2] -> MemRef_B[i0, i1] }"
},
{
"kind" : "read",
"relation" : "{ Stmt2[i0, i1, i2] -> MemRef_A[i0, 2 + 4i2] }"
},
{
"kind" : "write",
"relation" : "{ Stmt2[i0, i1, i2] -> MemRef_B[i0, i1] }"
},
{
"kind" : "read",
"relation" : "{ Stmt2[i0, i1, i2] -> MemRef_A[i0, 3 + 4i2] }"
},
{
"kind" : "write",
"relation" : "{ Stmt2[i0, i1, i2] -> MemRef_B[i0, i1] }"
}
],
"domain" : "{ Stmt2[i0, i1, i2] : 0 <= i0 <= 1055 and 0 <= i1 <= 1055 and 0 <= i2 <= 255 }",
"name" : "Stmt2",
"schedule" : "{ Stmt2[i0, i1, i2] -> [i0, i1, i2] }"
}
]
}

View File

@ -0,0 +1,80 @@
{
"arrays" : [
{
"name" : "MemRef_A",
"sizes" : [ "*", "1024" ],
"type" : "double"
},
{
"name" : "MemRef_B",
"sizes" : [ "*", "1056" ],
"type" : "double"
},
{
"name" : "D",
"sizes" : [ "270336" ],
"type" : "double",
"allocation" : "heap"
},
{
"name" : "E",
"sizes" : [ "270336", "200000" ],
"type" : "double",
"allocation" : "heap"
},
{
"name" : "F",
"sizes" : [ "270336" ],
"type" : "i64",
"allocation" : "heap"
}
],
"context" : "{ : }",
"location" : "pure_c_main.c:11-16",
"name" : "%for.cond1.preheader---%for.end18",
"statements" : [
{
"accesses" : [
{
"kind" : "read",
"relation" : "{ Stmt2[i0, i1, i2] -> E[i0, 4i2] }"
},
{
"kind" : "read",
"relation" : "{ Stmt2[i0, i1, i2] -> MemRef_beta[] }"
},
{
"kind" : "write",
"relation" : "{ Stmt2[i0, i1, i2] -> MemRef_B[i0, i1] }"
},
{
"kind" : "read",
"relation" : "{ Stmt2[i0, i1, i2] -> E[i0, 1 + 4i2] }"
},
{
"kind" : "write",
"relation" : "{ Stmt2[i0, i1, i2] -> MemRef_B[i0, i1] }"
},
{
"kind" : "read",
"relation" : "{ Stmt2[i0, i1, i2] -> E[i0, 2 + 4i2] }"
},
{
"kind" : "write",
"relation" : "{ Stmt2[i0, i1, i2] -> MemRef_B[i0, i1] }"
},
{
"kind" : "read",
"relation" : "{ Stmt2[i0, i1, i2] -> E[i0, 3 + 4i2] }"
},
{
"kind" : "write",
"relation" : "{ Stmt2[i0, i1, i2] -> MemRef_B[i0, i1] }"
}
],
"domain" : "{ Stmt2[i0, i1, i2] : 0 <= i0 <= 1055 and 0 <= i1 <= 1055 and 0 <= i2 <= 255 }",
"name" : "Stmt2",
"schedule" : "{ Stmt2[i0, i1, i2] -> [i0, i1, i2] }"
}
]
}