diff --git a/polly/include/polly/Dependences.h b/polly/include/polly/Dependences.h index d40d58f7fa0a..9a704a855492 100755 --- a/polly/include/polly/Dependences.h +++ b/polly/include/polly/Dependences.h @@ -91,6 +91,9 @@ public: /// different kinds are 'ored' together. isl_union_map *getDependences(int Kinds); + /// @brief Report if valid dependences are available. + bool hasValidDependences(); + bool runOnScop(Scop &S); void printScop(raw_ostream &OS) const; virtual void releaseMemory(); diff --git a/polly/lib/Analysis/Dependences.cpp b/polly/lib/Analysis/Dependences.cpp index 736bccaf70ea..3b0980fbc37c 100644 --- a/polly/lib/Analysis/Dependences.cpp +++ b/polly/lib/Analysis/Dependences.cpp @@ -317,6 +317,7 @@ void Dependences::releaseMemory() { } isl_union_map *Dependences::getDependences(int Kinds) { + assert(hasValidDependences() && "No valid dependences available"); isl_space *Space = isl_union_map_get_space(RAW); isl_union_map *Deps = isl_union_map_empty(Space); @@ -334,6 +335,10 @@ isl_union_map *Dependences::getDependences(int Kinds) { return Deps; } +bool Dependences::hasValidDependences() { + return (RAW != NULL) && (WAR != NULL) && (WAW != NULL); +} + void Dependences::getAnalysisUsage(AnalysisUsage &AU) const { ScopPass::getAnalysisUsage(AU); } diff --git a/polly/lib/DeadCodeElimination.cpp b/polly/lib/DeadCodeElimination.cpp index 0aacd0064f49..8e6e1eacf8b7 100644 --- a/polly/lib/DeadCodeElimination.cpp +++ b/polly/lib/DeadCodeElimination.cpp @@ -94,9 +94,12 @@ isl_union_set *DeadCodeElim::getLastWrites(__isl_take isl_union_map *Writes, /// combine a certain number of precise steps with one approximating step that /// simplifies the life set with an affine hull. bool DeadCodeElim::eliminateDeadCode(Scop &S, int PreciseSteps) { - isl_union_set *Live = this->getLastWrites(S.getWrites(), S.getSchedule()); - Dependences *D = &getAnalysis(); + + if (!D->hasValidDependences()) + return false; + + isl_union_set *Live = this->getLastWrites(S.getWrites(), S.getSchedule()); isl_union_map *Dep = D->getDependences(Dependences::TYPE_RAW); Dep = isl_union_map_reverse(Dep); diff --git a/polly/lib/ScheduleOptimizer.cpp b/polly/lib/ScheduleOptimizer.cpp index 9e4e4ce3c649..9d4720aa84b8 100644 --- a/polly/lib/ScheduleOptimizer.cpp +++ b/polly/lib/ScheduleOptimizer.cpp @@ -431,6 +431,9 @@ isl_union_map *IslScheduleOptimizer::getScheduleMap(isl_schedule *Schedule) { bool IslScheduleOptimizer::runOnScop(Scop &S) { Dependences *D = &getAnalysis(); + if (!D->hasValidDependences()) + return false; + isl_schedule_free(LastSchedule); LastSchedule = NULL; diff --git a/polly/test/DeadCodeElimination/computeout.ll b/polly/test/DeadCodeElimination/computeout.ll new file mode 100644 index 000000000000..a908c4ed3142 --- /dev/null +++ b/polly/test/DeadCodeElimination/computeout.ll @@ -0,0 +1,64 @@ +; RUN: opt -S %loadPolly -basicaa -polly-dce -polly-ast -analyze < %s | FileCheck %s +; RUN: opt -S %loadPolly -basicaa -polly-dce -polly-ast -analyze -polly-dependences-computeout=1 < %s | FileCheck %s -check-prefix=TIMEOUT +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-pc-linux-gnu" + +; for(i = 0; i < 100; i++ ) +; S1: A[i] = 2; +; +; for (i = 0; i < 10; i++ ) +; S2: A[i] = 5; +; +; for (i = 0; i < 200; i++ ) +; S3: A[i] = 5; + +define void @sequential_writes() { +entry: + %A = alloca [200 x i32] + br label %S1 + +S1: + %indvar.1 = phi i64 [ 0, %entry ], [ %indvar.next.1, %S1 ] + %arrayidx.1 = getelementptr [200 x i32]* %A, i64 0, i64 %indvar.1 + store i32 2, i32* %arrayidx.1 + %indvar.next.1 = add i64 %indvar.1, 1 + %exitcond.1 = icmp ne i64 %indvar.next.1, 100 + br i1 %exitcond.1, label %S1, label %exit.1 + +exit.1: + br label %S2 + +S2: + %indvar.2 = phi i64 [ 0, %exit.1 ], [ %indvar.next.2, %S2 ] + %arrayidx.2 = getelementptr [200 x i32]* %A, i64 0, i64 %indvar.2 + store i32 5, i32* %arrayidx.2 + %indvar.next.2 = add i64 %indvar.2, 1 + %exitcond.2 = icmp ne i64 %indvar.next.2, 10 + br i1 %exitcond.2, label %S2, label %exit.2 + +exit.2: + br label %S3 + +S3: + %indvar.3 = phi i64 [ 0, %exit.2 ], [ %indvar.next.3, %S3 ] + %arrayidx.3 = getelementptr [200 x i32]* %A, i64 0, i64 %indvar.3 + store i32 7, i32* %arrayidx.3 + %indvar.next.3 = add i64 %indvar.3, 1 + %exitcond.3 = icmp ne i64 %indvar.next.3, 200 + br i1 %exitcond.3, label %S3 , label %exit.3 + +exit.3: + ret void +} + +; CHECK-NOT: Stmt_S +; CHECK: for (int c1 = 0; c1 <= 199; c1 += 1) +; CHECK: Stmt_S3(c1); + +; TIMEOUT: for (int c1 = 0; c1 <= 99; c1 += 1) +; TIMEOUT: Stmt_S1(c1); +; TIMEOUT: for (int c1 = 0; c1 <= 9; c1 += 1) +; TIMEOUT: Stmt_S2(c1); +; TIMEOUT: for (int c1 = 0; c1 <= 199; c1 += 1) +; TIMEOUT: Stmt_S3(c1); + diff --git a/polly/test/ScheduleOptimizer/computeout.ll b/polly/test/ScheduleOptimizer/computeout.ll new file mode 100644 index 000000000000..578478c3a113 --- /dev/null +++ b/polly/test/ScheduleOptimizer/computeout.ll @@ -0,0 +1,70 @@ +; RUN: opt -S %loadPolly -basicaa -polly-opt-isl -polly-opt-fusion=max -polly-ast -analyze < %s | FileCheck %s +; RUN: opt -S %loadPolly -basicaa -polly-opt-isl -polly-opt-fusion=max -polly-ast -analyze -polly-dependences-computeout=1 < %s | FileCheck %s -check-prefix=TIMEOUT +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" +target triple = "x86_64-pc-linux-gnu" + +; for(i = 0; i < 100; i++ ) +; S1: A[i] = 2; +; +; for (i = 0; i < 10; i++ ) +; S2: A[i] = 5; +; +; for (i = 0; i < 200; i++ ) +; S3: A[i] = 5; + +define void @sequential_writes() { +entry: + %A = alloca [200 x i32] + br label %S1 + +S1: + %indvar.1 = phi i64 [ 0, %entry ], [ %indvar.next.1, %S1 ] + %arrayidx.1 = getelementptr [200 x i32]* %A, i64 0, i64 %indvar.1 + store i32 2, i32* %arrayidx.1 + %indvar.next.1 = add i64 %indvar.1, 1 + %exitcond.1 = icmp ne i64 %indvar.next.1, 100 + br i1 %exitcond.1, label %S1, label %exit.1 + +exit.1: + br label %S2 + +S2: + %indvar.2 = phi i64 [ 0, %exit.1 ], [ %indvar.next.2, %S2 ] + %arrayidx.2 = getelementptr [200 x i32]* %A, i64 0, i64 %indvar.2 + store i32 5, i32* %arrayidx.2 + %indvar.next.2 = add i64 %indvar.2, 1 + %exitcond.2 = icmp ne i64 %indvar.next.2, 10 + br i1 %exitcond.2, label %S2, label %exit.2 + +exit.2: + br label %S3 + +S3: + %indvar.3 = phi i64 [ 0, %exit.2 ], [ %indvar.next.3, %S3 ] + %arrayidx.3 = getelementptr [200 x i32]* %A, i64 0, i64 %indvar.3 + store i32 7, i32* %arrayidx.3 + %indvar.next.3 = add i64 %indvar.3, 1 + %exitcond.3 = icmp ne i64 %indvar.next.3, 200 + br i1 %exitcond.3, label %S3 , label %exit.3 + +exit.3: + ret void +} + + +; CHECK: for (int c0 = 0; c0 <= 199; c0 += 1) { +; CHECK: if (c0 <= 99) { +; CHECK: Stmt_S1(c0); +; CHECK: if (c0 <= 9) +; CHECK: Stmt_S2(c0); +; CHECK: } +; CHECK: Stmt_S3(c0); +; CHECK: } + +; TIMEOUT: for (int c1 = 0; c1 <= 99; c1 += 1) +; TIMEOUT: Stmt_S1(c1); +; TIMEOUT: for (int c1 = 0; c1 <= 9; c1 += 1) +; TIMEOUT: Stmt_S2(c1); +; TIMEOUT: for (int c1 = 0; c1 <= 199; c1 += 1) +; TIMEOUT: Stmt_S3(c1); +