diff --git a/polly/include/polly/CodeGen/IslAst.h b/polly/include/polly/CodeGen/IslAst.h index 48ae3ffdca0b..059205c629ee 100644 --- a/polly/include/polly/CodeGen/IslAst.h +++ b/polly/include/polly/CodeGen/IslAst.h @@ -103,6 +103,10 @@ public: /// Cleanup all isl structs on destruction. ~IslAstUserPayload(); + /// Does the dependence analysis determine that there are no loop-carried + /// dependencies? + bool IsParallel = false; + /// Flag to mark innermost loops. bool IsInnermost = false; @@ -116,7 +120,7 @@ public: bool IsReductionParallel = false; /// The minimal dependence distance for non parallel loops. - isl_pw_aff *MinimalDependenceDistance = nullptr; + isl::pw_aff MinimalDependenceDistance; /// The build environment at the time this node was constructed. isl_ast_build *Build = nullptr; diff --git a/polly/lib/CodeGen/IslAst.cpp b/polly/lib/CodeGen/IslAst.cpp index 0923726c87bc..1862950ed67d 100644 --- a/polly/lib/CodeGen/IslAst.cpp +++ b/polly/lib/CodeGen/IslAst.cpp @@ -119,6 +119,9 @@ struct AstBuildUserInfo { /// Flag to indicate that we are inside a parallel for node. bool InParallelFor = false; + /// Flag to indicate that we are inside an SIMD node. + bool InSIMD = false; + /// The last iterator id created for the current SCoP. isl_id *LastForNodeId = nullptr; }; @@ -131,7 +134,6 @@ static void freeIslAstUserPayload(void *Ptr) { IslAstInfo::IslAstUserPayload::~IslAstUserPayload() { isl_ast_build_free(Build); - isl_pw_aff_free(MinimalDependenceDistance); } /// Print a string @p str in a single line using @p Printer. @@ -226,7 +228,10 @@ static bool astScheduleDimIsParallel(__isl_keep isl_ast_build *Build, D->getDependences(Dependences::TYPE_RAW | Dependences::TYPE_WAW | Dependences::TYPE_WAR | Dependences::TYPE_TC_RED) .release(); - D->isParallel(Schedule, DepsAll, &NodeInfo->MinimalDependenceDistance); + isl_pw_aff *MinimalDependenceDistance = nullptr; + D->isParallel(Schedule, DepsAll, &MinimalDependenceDistance); + NodeInfo->MinimalDependenceDistance = + isl::manage(MinimalDependenceDistance); isl_union_map_free(Schedule); return false; } @@ -268,10 +273,13 @@ static __isl_give isl_id *astBuildBeforeFor(__isl_keep isl_ast_build *Build, Id = isl_id_set_free_user(Id, freeIslAstUserPayload); BuildInfo->LastForNodeId = Id; + Payload->IsParallel = + astScheduleDimIsParallel(Build, BuildInfo->Deps, Payload); + // Test for parallelism only if we are not already inside a parallel loop - if (!BuildInfo->InParallelFor) + if (!BuildInfo->InParallelFor && !BuildInfo->InSIMD) BuildInfo->InParallelFor = Payload->IsOutermostParallel = - astScheduleDimIsParallel(Build, BuildInfo->Deps, Payload); + Payload->IsParallel; return Id; } @@ -296,18 +304,8 @@ astBuildAfterFor(__isl_take isl_ast_node *Node, __isl_keep isl_ast_build *Build, Payload->Build = isl_ast_build_copy(Build); Payload->IsInnermost = (Id == BuildInfo->LastForNodeId); - // Innermost loops that are surrounded by parallel loops have not yet been - // tested for parallelism. Test them here to ensure we check all innermost - // loops for parallelism. - if (Payload->IsInnermost && BuildInfo->InParallelFor) { - if (Payload->IsOutermostParallel) { - Payload->IsInnermostParallel = true; - } else { - if (PollyVectorizerChoice == VECTORIZER_NONE) - Payload->IsInnermostParallel = - astScheduleDimIsParallel(Build, BuildInfo->Deps, Payload); - } - } + Payload->IsInnermostParallel = + Payload->IsInnermost && (BuildInfo->InSIMD || Payload->IsParallel); if (Payload->IsOutermostParallel) BuildInfo->InParallelFor = false; @@ -323,7 +321,7 @@ static isl_stat astBuildBeforeMark(__isl_keep isl_id *MarkId, AstBuildUserInfo *BuildInfo = (AstBuildUserInfo *)User; if (strcmp(isl_id_get_name(MarkId), "SIMD") == 0) - BuildInfo->InParallelFor = true; + BuildInfo->InSIMD = true; return isl_stat_ok; } @@ -335,7 +333,7 @@ astBuildAfterMark(__isl_take isl_ast_node *Node, AstBuildUserInfo *BuildInfo = (AstBuildUserInfo *)User; auto *Id = isl_ast_node_mark_get_id(Node); if (strcmp(isl_id_get_name(Id), "SIMD") == 0) - BuildInfo->InParallelFor = false; + BuildInfo->InSIMD = false; isl_id_free(Id); return Node; } @@ -565,6 +563,7 @@ void IslAst::init(const Dependences &D) { if (PerformParallelTest) { BuildInfo.Deps = &D; BuildInfo.InParallelFor = false; + BuildInfo.InSIMD = false; Build = isl_ast_build_set_before_each_for(Build, &astBuildBeforeFor, &BuildInfo); @@ -664,8 +663,7 @@ IslAstInfo::getSchedule(__isl_keep isl_ast_node *Node) { __isl_give isl_pw_aff * IslAstInfo::getMinimalDependenceDistance(__isl_keep isl_ast_node *Node) { IslAstUserPayload *Payload = getNodePayload(Node); - return Payload ? isl_pw_aff_copy(Payload->MinimalDependenceDistance) - : nullptr; + return Payload ? Payload->MinimalDependenceDistance.copy() : nullptr; } IslAstInfo::MemoryAccessSet * diff --git a/polly/test/ScheduleOptimizer/SIMDInParallelFor.ll b/polly/test/ScheduleOptimizer/SIMDInParallelFor.ll new file mode 100644 index 000000000000..a5d65c81caf8 --- /dev/null +++ b/polly/test/ScheduleOptimizer/SIMDInParallelFor.ll @@ -0,0 +1,65 @@ +; RUN: opt %loadPolly -polly-parallel -polly-vectorizer=stripmine -polly-codegen-verify -polly-opt-isl -polly-ast -polly-codegen -analyze < %s | FileCheck %s +; +; Check that there are no nested #pragma omp parallel for inside a +; #pragma omp parallel for loop. +; See llvm.org/PR38073 and llvm.org/PR33153 +; +; This test unfortunately is very dependent on the result of the schedule +; optimizer (-polly-opt-isl). +; +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" + +@b = external dso_local unnamed_addr global [1984 x [1984 x double]], align 16 +@c = external dso_local unnamed_addr global [1984 x [1984 x double]], align 16 + +define dso_local void @main() local_unnamed_addr { +entry: + %cond = select i1 undef, i32 undef, i32 1984 + %tmp = zext i32 %cond to i64 + %cond63 = select i1 undef, i32 undef, i32 1984 + %tmp1 = zext i32 %cond63 to i64 + br label %for.cond51.preheader + +for.cond51.preheader: + %indvars.iv213 = phi i64 [ 0, %entry ], [ %indvars.iv.next214, %for.inc98 ] + %cond73 = select i1 undef, i32 undef, i32 1984 + %tmp2 = zext i32 %cond73 to i64 + br label %for.cond56.preheader + +for.cond56.preheader: + %indvars.iv223 = phi i64 [ 0, %for.cond51.preheader ], [ %indvars.iv.next224, %for.inc95 ] + br label %for.cond66.preheader + +for.cond66.preheader: + %indvars.iv219 = phi i64 [ %indvars.iv.next220, %for.inc92 ], [ 0, %for.cond56.preheader ] + br label %for.body75 + +for.body75: + %indvars.iv215 = phi i64 [ %indvars.iv213, %for.cond66.preheader ], [ %indvars.iv.next216, %for.body75 ] + %arrayidx83 = getelementptr inbounds [1984 x [1984 x double]], [1984 x [1984 x double]]* @b, i64 0, i64 %indvars.iv219, i64 %indvars.iv215 + %tmp3 = load double, double* %arrayidx83, align 8 + %arrayidx87 = getelementptr inbounds [1984 x [1984 x double]], [1984 x [1984 x double]]* @c, i64 0, i64 %indvars.iv223, i64 %indvars.iv215 + store double undef, double* %arrayidx87, align 8 + %indvars.iv.next216 = add nuw nsw i64 %indvars.iv215, 1 + %cmp74 = icmp ult i64 %indvars.iv.next216, %tmp2 + br i1 %cmp74, label %for.body75, label %for.inc92 + +for.inc92: + %indvars.iv.next220 = add nuw nsw i64 %indvars.iv219, 1 + %cmp64 = icmp ult i64 %indvars.iv.next220, %tmp1 + br i1 %cmp64, label %for.cond66.preheader, label %for.inc95 + +for.inc95: + %indvars.iv.next224 = add nuw nsw i64 %indvars.iv223, 1 + %cmp54 = icmp ult i64 %indvars.iv.next224, %tmp + br i1 %cmp54, label %for.cond56.preheader, label %for.inc98 + +for.inc98: + %indvars.iv.next214 = add nuw nsw i64 %indvars.iv213, 48 + br label %for.cond51.preheader +} + +; No parallel loop except the to outermost. +; CHECK: #pragma omp parallel for +; CHECK: #pragma omp parallel for +; CHECK-NOT: #pragma omp parallel for diff --git a/polly/test/ScheduleOptimizer/full_partial_tile_separation.ll b/polly/test/ScheduleOptimizer/full_partial_tile_separation.ll index ede09e7c3fb1..4a0c33152922 100644 --- a/polly/test/ScheduleOptimizer/full_partial_tile_separation.ll +++ b/polly/test/ScheduleOptimizer/full_partial_tile_separation.ll @@ -5,12 +5,15 @@ ; CHECK-NEXT: #pragma known-parallel ; CHECK-NEXT: for (int c0 = 0; c0 <= floord(ni - 1, 32); c0 += 1) ; CHECK-NEXT: for (int c1 = 0; c1 <= floord(nj - 1, 32); c1 += 1) +; CHECK-NEXT: #pragma minimal dependence distance: 1 ; CHECK-NEXT: for (int c2 = 0; c2 <= floord(nk - 1, 32); c2 += 1) { ; CHECK-NEXT: // 1st level tiling - Points ; CHECK-NEXT: for (int c3 = 0; c3 <= min(31, ni - 32 * c0 - 1); c3 += 1) { ; CHECK-NEXT: for (int c4 = 0; c4 <= min(7, -8 * c1 + nj / 4 - 1); c4 += 1) +; CHECK-NEXT: #pragma minimal dependence distance: 1 ; CHECK-NEXT: for (int c5 = 0; c5 <= min(31, nk - 32 * c2 - 1); c5 += 1) { ; CHECK-NEXT: // SIMD +; CHECK-NEXT: #pragma simd ; CHECK-NEXT: for (int c6 = 0; c6 <= 3; c6 += 1) ; CHECK-NEXT: Stmt_for_body_6(32 * c0 + c3, 32 * c1 + 4 * c4 + c6, 32 * c2 + c5); ; CHECK-NEXT: } @@ -18,6 +21,7 @@ ; CHECK-NEXT: #pragma minimal dependence distance: 1 ; CHECK-NEXT: for (int c5 = 0; c5 <= min(31, nk - 32 * c2 - 1); c5 += 1) { ; CHECK-NEXT: // SIMD +; CHECK-NEXT: #pragma simd ; CHECK-NEXT: for (int c6 = 0; c6 < nj % 4; c6 += 1) ; CHECK-NEXT: Stmt_for_body_6(32 * c0 + c3, -(nj % 4) + nj + c6, 32 * c2 + c5); ; CHECK-NEXT: }