[GPGPU] Ensure arrays where only parts are modified are copied to GPU

To do so we change the way array exents are computed. Instead of the precise
set of memory locations accessed, we now compute the extent as the range between
minimal and maximal address in the first dimension and the full extent defined
by the sizes of the inner array dimensions.

We also move the computation of the may_persist region after the construction
of the arrays, as it relies on array information. Without arrays being
constructed no useful information is computed at all.

llvm-svn: 278212
This commit is contained in:
Tobias Grosser 2016-08-10 10:58:19 +00:00
parent 85c7ea86ae
commit d58acf866a
3 changed files with 106 additions and 5 deletions

View File

@ -1700,21 +1700,78 @@ public:
/// Derive the extent of an array.
///
/// The extent of an array is defined by the set of memory locations for
/// which a memory access in the iteration domain exists.
/// The extent of an array is the set of elements that are within the
/// accessed array. For the inner dimensions, the extent constraints are
/// 0 and the size of the corresponding array dimension. For the first
/// (outermost) dimension, the extent constraints are the minimal and maximal
/// subscript value for the first dimension.
///
/// @param Array The array to derive the extent for.
///
/// @returns An isl_set describing the extent of the array.
__isl_give isl_set *getExtent(ScopArrayInfo *Array) {
unsigned NumDims = Array->getNumberOfDimensions();
isl_union_map *Accesses = S->getAccesses();
Accesses = isl_union_map_intersect_domain(Accesses, S->getDomains());
Accesses = isl_union_map_detect_equalities(Accesses);
isl_union_set *AccessUSet = isl_union_map_range(Accesses);
AccessUSet = isl_union_set_coalesce(AccessUSet);
AccessUSet = isl_union_set_detect_equalities(AccessUSet);
AccessUSet = isl_union_set_coalesce(AccessUSet);
if (isl_union_set_is_empty(AccessUSet)) {
isl_union_set_free(AccessUSet);
return isl_set_empty(Array->getSpace());
}
if (Array->getNumberOfDimensions() == 0) {
isl_union_set_free(AccessUSet);
return isl_set_universe(Array->getSpace());
}
isl_set *AccessSet =
isl_union_set_extract_set(AccessUSet, Array->getSpace());
isl_union_set_free(AccessUSet);
return AccessSet;
isl_union_set_free(AccessUSet);
isl_local_space *LS = isl_local_space_from_space(Array->getSpace());
isl_pw_aff *Val =
isl_pw_aff_from_aff(isl_aff_var_on_domain(LS, isl_dim_set, 0));
isl_pw_aff *OuterMin = isl_set_dim_min(isl_set_copy(AccessSet), 0);
isl_pw_aff *OuterMax = isl_set_dim_max(AccessSet, 0);
OuterMin = isl_pw_aff_add_dims(OuterMin, isl_dim_in,
isl_pw_aff_dim(Val, isl_dim_in));
OuterMax = isl_pw_aff_add_dims(OuterMax, isl_dim_in,
isl_pw_aff_dim(Val, isl_dim_in));
OuterMin =
isl_pw_aff_set_tuple_id(OuterMin, isl_dim_in, Array->getBasePtrId());
OuterMax =
isl_pw_aff_set_tuple_id(OuterMax, isl_dim_in, Array->getBasePtrId());
isl_set *Extent = isl_set_universe(Array->getSpace());
Extent = isl_set_intersect(
Extent, isl_pw_aff_le_set(OuterMin, isl_pw_aff_copy(Val)));
Extent = isl_set_intersect(Extent, isl_pw_aff_ge_set(OuterMax, Val));
for (unsigned i = 1; i < NumDims; ++i)
Extent = isl_set_lower_bound_si(Extent, isl_dim_set, i, 0);
for (unsigned i = 1; i < NumDims; ++i) {
isl_pw_aff *PwAff =
const_cast<isl_pw_aff *>(Array->getDimensionSizePw(i));
isl_pw_aff *Val = isl_pw_aff_from_aff(isl_aff_var_on_domain(
isl_local_space_from_space(Array->getSpace()), isl_dim_set, i));
PwAff = isl_pw_aff_add_dims(PwAff, isl_dim_in,
isl_pw_aff_dim(Val, isl_dim_in));
PwAff = isl_pw_aff_set_tuple_id(PwAff, isl_dim_in,
isl_pw_aff_get_tuple_id(Val, isl_dim_in));
auto *Set = isl_pw_aff_gt_set(PwAff, Val);
Extent = isl_set_intersect(Set, Extent);
}
return Extent;
}
/// Derive the bounds of an array.
@ -1827,7 +1884,6 @@ public:
isl_union_map_copy(PPCGScop->tagged_must_kills);
PPCGProg->to_inner = getArrayIdentity();
PPCGProg->to_outer = getArrayIdentity();
PPCGProg->may_persist = compute_may_persist(PPCGProg);
PPCGProg->any_to_outer = nullptr;
PPCGProg->array_order = nullptr;
PPCGProg->n_stmts = std::distance(S->begin(), S->end());
@ -1838,6 +1894,8 @@ public:
createArrays(PPCGProg);
PPCGProg->may_persist = compute_may_persist(PPCGProg);
return PPCGProg;
}

View File

@ -24,6 +24,7 @@ declare void @llvm.lifetime.start(i64, i8* nocapture) #0
; CODE-NEXT: {
; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (512) * (512) * sizeof(double), cudaMemcpyHostToDevice));
; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_R, MemRef_R, (p_0 + 1) * (512) * sizeof(double), cudaMemcpyHostToDevice));
; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_Q, MemRef_Q, (512) * (512) * sizeof(double), cudaMemcpyHostToDevice));
; CODE-NEXT: {
; CODE-NEXT: dim3 k0_dimBlock(32);
; CODE-NEXT: dim3 k0_dimGrid(16);

View File

@ -0,0 +1,42 @@
; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-code \
; RUN: -disable-output < %s | \
; RUN: FileCheck -check-prefix=CODE %s
;
; REQUIRES: pollyacc
;
; void foo(float A[], float B[]) {
; for (long i = 0; i < 1024; i++)
; A[2 * i] = B[i];
; }
; CODE: cudaCheckReturn(cudaMemcpy(dev_MemRef_B, MemRef_B, (1024) * sizeof(i32), cudaMemcpyHostToDevice));
; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (2047) * sizeof(i32), cudaMemcpyHostToDevice));
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define void @foo(float* %A, float* %B) {
bb:
br label %bb1
bb1: ; preds = %bb8, %bb
%i.0 = phi i64 [ 0, %bb ], [ %tmp9, %bb8 ]
%exitcond = icmp ne i64 %i.0, 1024
br i1 %exitcond, label %bb2, label %bb10
bb2: ; preds = %bb1
%tmp = getelementptr inbounds float, float* %B, i64 %i.0
%tmp3 = bitcast float* %tmp to i32*
%tmp4 = load i32, i32* %tmp3, align 4
%tmp5 = shl nsw i64 %i.0, 1
%tmp6 = getelementptr inbounds float, float* %A, i64 %tmp5
%tmp7 = bitcast float* %tmp6 to i32*
store i32 %tmp4, i32* %tmp7, align 4
br label %bb8
bb8: ; preds = %bb2
%tmp9 = add nuw nsw i64 %i.0, 1
br label %bb1
bb10: ; preds = %bb1
ret void
}