mirror of
https://github.com/capstone-engine/llvm-capstone.git
synced 2024-12-27 02:09:54 +00:00
[GPGPU] Ensure arrays where only parts are modified are copied to GPU
To do so we change the way array exents are computed. Instead of the precise set of memory locations accessed, we now compute the extent as the range between minimal and maximal address in the first dimension and the full extent defined by the sizes of the inner array dimensions. We also move the computation of the may_persist region after the construction of the arrays, as it relies on array information. Without arrays being constructed no useful information is computed at all. llvm-svn: 278212
This commit is contained in:
parent
85c7ea86ae
commit
d58acf866a
@ -1700,21 +1700,78 @@ public:
|
||||
|
||||
/// Derive the extent of an array.
|
||||
///
|
||||
/// The extent of an array is defined by the set of memory locations for
|
||||
/// which a memory access in the iteration domain exists.
|
||||
/// The extent of an array is the set of elements that are within the
|
||||
/// accessed array. For the inner dimensions, the extent constraints are
|
||||
/// 0 and the size of the corresponding array dimension. For the first
|
||||
/// (outermost) dimension, the extent constraints are the minimal and maximal
|
||||
/// subscript value for the first dimension.
|
||||
///
|
||||
/// @param Array The array to derive the extent for.
|
||||
///
|
||||
/// @returns An isl_set describing the extent of the array.
|
||||
__isl_give isl_set *getExtent(ScopArrayInfo *Array) {
|
||||
unsigned NumDims = Array->getNumberOfDimensions();
|
||||
isl_union_map *Accesses = S->getAccesses();
|
||||
Accesses = isl_union_map_intersect_domain(Accesses, S->getDomains());
|
||||
Accesses = isl_union_map_detect_equalities(Accesses);
|
||||
isl_union_set *AccessUSet = isl_union_map_range(Accesses);
|
||||
AccessUSet = isl_union_set_coalesce(AccessUSet);
|
||||
AccessUSet = isl_union_set_detect_equalities(AccessUSet);
|
||||
AccessUSet = isl_union_set_coalesce(AccessUSet);
|
||||
|
||||
if (isl_union_set_is_empty(AccessUSet)) {
|
||||
isl_union_set_free(AccessUSet);
|
||||
return isl_set_empty(Array->getSpace());
|
||||
}
|
||||
|
||||
if (Array->getNumberOfDimensions() == 0) {
|
||||
isl_union_set_free(AccessUSet);
|
||||
return isl_set_universe(Array->getSpace());
|
||||
}
|
||||
|
||||
isl_set *AccessSet =
|
||||
isl_union_set_extract_set(AccessUSet, Array->getSpace());
|
||||
isl_union_set_free(AccessUSet);
|
||||
|
||||
return AccessSet;
|
||||
isl_union_set_free(AccessUSet);
|
||||
isl_local_space *LS = isl_local_space_from_space(Array->getSpace());
|
||||
|
||||
isl_pw_aff *Val =
|
||||
isl_pw_aff_from_aff(isl_aff_var_on_domain(LS, isl_dim_set, 0));
|
||||
|
||||
isl_pw_aff *OuterMin = isl_set_dim_min(isl_set_copy(AccessSet), 0);
|
||||
isl_pw_aff *OuterMax = isl_set_dim_max(AccessSet, 0);
|
||||
OuterMin = isl_pw_aff_add_dims(OuterMin, isl_dim_in,
|
||||
isl_pw_aff_dim(Val, isl_dim_in));
|
||||
OuterMax = isl_pw_aff_add_dims(OuterMax, isl_dim_in,
|
||||
isl_pw_aff_dim(Val, isl_dim_in));
|
||||
OuterMin =
|
||||
isl_pw_aff_set_tuple_id(OuterMin, isl_dim_in, Array->getBasePtrId());
|
||||
OuterMax =
|
||||
isl_pw_aff_set_tuple_id(OuterMax, isl_dim_in, Array->getBasePtrId());
|
||||
|
||||
isl_set *Extent = isl_set_universe(Array->getSpace());
|
||||
|
||||
Extent = isl_set_intersect(
|
||||
Extent, isl_pw_aff_le_set(OuterMin, isl_pw_aff_copy(Val)));
|
||||
Extent = isl_set_intersect(Extent, isl_pw_aff_ge_set(OuterMax, Val));
|
||||
|
||||
for (unsigned i = 1; i < NumDims; ++i)
|
||||
Extent = isl_set_lower_bound_si(Extent, isl_dim_set, i, 0);
|
||||
|
||||
for (unsigned i = 1; i < NumDims; ++i) {
|
||||
isl_pw_aff *PwAff =
|
||||
const_cast<isl_pw_aff *>(Array->getDimensionSizePw(i));
|
||||
isl_pw_aff *Val = isl_pw_aff_from_aff(isl_aff_var_on_domain(
|
||||
isl_local_space_from_space(Array->getSpace()), isl_dim_set, i));
|
||||
PwAff = isl_pw_aff_add_dims(PwAff, isl_dim_in,
|
||||
isl_pw_aff_dim(Val, isl_dim_in));
|
||||
PwAff = isl_pw_aff_set_tuple_id(PwAff, isl_dim_in,
|
||||
isl_pw_aff_get_tuple_id(Val, isl_dim_in));
|
||||
auto *Set = isl_pw_aff_gt_set(PwAff, Val);
|
||||
Extent = isl_set_intersect(Set, Extent);
|
||||
}
|
||||
|
||||
return Extent;
|
||||
}
|
||||
|
||||
/// Derive the bounds of an array.
|
||||
@ -1827,7 +1884,6 @@ public:
|
||||
isl_union_map_copy(PPCGScop->tagged_must_kills);
|
||||
PPCGProg->to_inner = getArrayIdentity();
|
||||
PPCGProg->to_outer = getArrayIdentity();
|
||||
PPCGProg->may_persist = compute_may_persist(PPCGProg);
|
||||
PPCGProg->any_to_outer = nullptr;
|
||||
PPCGProg->array_order = nullptr;
|
||||
PPCGProg->n_stmts = std::distance(S->begin(), S->end());
|
||||
@ -1838,6 +1894,8 @@ public:
|
||||
|
||||
createArrays(PPCGProg);
|
||||
|
||||
PPCGProg->may_persist = compute_may_persist(PPCGProg);
|
||||
|
||||
return PPCGProg;
|
||||
}
|
||||
|
||||
|
@ -24,6 +24,7 @@ declare void @llvm.lifetime.start(i64, i8* nocapture) #0
|
||||
; CODE-NEXT: {
|
||||
; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (512) * (512) * sizeof(double), cudaMemcpyHostToDevice));
|
||||
; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_R, MemRef_R, (p_0 + 1) * (512) * sizeof(double), cudaMemcpyHostToDevice));
|
||||
; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_Q, MemRef_Q, (512) * (512) * sizeof(double), cudaMemcpyHostToDevice));
|
||||
; CODE-NEXT: {
|
||||
; CODE-NEXT: dim3 k0_dimBlock(32);
|
||||
; CODE-NEXT: dim3 k0_dimGrid(16);
|
||||
|
42
polly/test/GPGPU/only-part-of-array-modified.ll
Normal file
42
polly/test/GPGPU/only-part-of-array-modified.ll
Normal file
@ -0,0 +1,42 @@
|
||||
; RUN: opt %loadPolly -polly-codegen-ppcg -polly-acc-dump-code \
|
||||
; RUN: -disable-output < %s | \
|
||||
; RUN: FileCheck -check-prefix=CODE %s
|
||||
;
|
||||
; REQUIRES: pollyacc
|
||||
;
|
||||
; void foo(float A[], float B[]) {
|
||||
; for (long i = 0; i < 1024; i++)
|
||||
; A[2 * i] = B[i];
|
||||
; }
|
||||
|
||||
; CODE: cudaCheckReturn(cudaMemcpy(dev_MemRef_B, MemRef_B, (1024) * sizeof(i32), cudaMemcpyHostToDevice));
|
||||
; CODE-NEXT: cudaCheckReturn(cudaMemcpy(dev_MemRef_A, MemRef_A, (2047) * sizeof(i32), cudaMemcpyHostToDevice));
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
define void @foo(float* %A, float* %B) {
|
||||
bb:
|
||||
br label %bb1
|
||||
|
||||
bb1: ; preds = %bb8, %bb
|
||||
%i.0 = phi i64 [ 0, %bb ], [ %tmp9, %bb8 ]
|
||||
%exitcond = icmp ne i64 %i.0, 1024
|
||||
br i1 %exitcond, label %bb2, label %bb10
|
||||
|
||||
bb2: ; preds = %bb1
|
||||
%tmp = getelementptr inbounds float, float* %B, i64 %i.0
|
||||
%tmp3 = bitcast float* %tmp to i32*
|
||||
%tmp4 = load i32, i32* %tmp3, align 4
|
||||
%tmp5 = shl nsw i64 %i.0, 1
|
||||
%tmp6 = getelementptr inbounds float, float* %A, i64 %tmp5
|
||||
%tmp7 = bitcast float* %tmp6 to i32*
|
||||
store i32 %tmp4, i32* %tmp7, align 4
|
||||
br label %bb8
|
||||
|
||||
bb8: ; preds = %bb2
|
||||
%tmp9 = add nuw nsw i64 %i.0, 1
|
||||
br label %bb1
|
||||
|
||||
bb10: ; preds = %bb1
|
||||
ret void
|
||||
}
|
Loading…
Reference in New Issue
Block a user