diff --git a/polly/lib/Analysis/ScopDetection.cpp b/polly/lib/Analysis/ScopDetection.cpp index 909c114e6c41..44b17824981d 100644 --- a/polly/lib/Analysis/ScopDetection.cpp +++ b/polly/lib/Analysis/ScopDetection.cpp @@ -53,6 +53,7 @@ #include "polly/Support/ScopLocation.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/Loads.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/RegionIterator.h" #include "llvm/Analysis/ScalarEvolution.h" @@ -343,6 +344,8 @@ bool ScopDetection::addOverApproximatedRegion(Region *AR, bool ScopDetection::onlyValidRequiredInvariantLoads( InvariantLoadsSetTy &RequiredILS, DetectionContext &Context) const { Region &CurRegion = Context.CurRegion; + const DataLayout &DL = + CurRegion.getEntry()->getParent()->getParent()->getDataLayout(); if (!PollyInvariantLoadHoisting && !RequiredILS.empty()) return false; @@ -351,10 +354,16 @@ bool ScopDetection::onlyValidRequiredInvariantLoads( if (!isHoistableLoad(Load, CurRegion, *LI, *SE, *DT)) return false; - for (auto NonAffineRegion : Context.NonAffineSubRegionSet) + for (auto NonAffineRegion : Context.NonAffineSubRegionSet) { + + if (isSafeToLoadUnconditionally(Load->getPointerOperand(), + Load->getAlignment(), DL)) + continue; + if (NonAffineRegion->contains(Load) && Load->getParent() != NonAffineRegion->getEntry()) return false; + } } Context.RequiredILS.insert(RequiredILS.begin(), RequiredILS.end()); diff --git a/polly/lib/Analysis/ScopInfo.cpp b/polly/lib/Analysis/ScopInfo.cpp index 0a8b4bb4f996..574428c66232 100644 --- a/polly/lib/Analysis/ScopInfo.cpp +++ b/polly/lib/Analysis/ScopInfo.cpp @@ -3719,6 +3719,11 @@ __isl_give isl_set *Scop::getNonHoistableCtx(MemoryAccess *Access, if (hasNonHoistableBasePtrInScop(Access, Writes)) return nullptr; + auto &DL = getFunction().getParent()->getDataLayout(); + if (isSafeToLoadUnconditionally(LI->getPointerOperand(), LI->getAlignment(), + DL)) + return isl_set_empty(getParamSpace()); + // Skip accesses in non-affine subregions as they might not be executed // under the same condition as the entry of the non-affine subregion. if (BB != LI->getParent()) diff --git a/polly/test/Isl/CodeGen/reduction_2.ll b/polly/test/Isl/CodeGen/reduction_2.ll index 3ee742ae2cdd..886d7a7c57ac 100644 --- a/polly/test/Isl/CodeGen/reduction_2.ll +++ b/polly/test/Isl/CodeGen/reduction_2.ll @@ -89,15 +89,13 @@ if.end: ; preds = %if.then, %for.end declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind -; Negative test. At the moment we will optimistically assume RED[0] in the conditional after the -; loop might be invariant and expand the SCoP from the loop to include the conditional. However, -; during SCoP generation we will realize that RED[0] is in fact not invariant and bail. +; At some point this was a negative test, where we optimistically assumed RED[0] +; in the conditional after the loop is invariant and expanded the SCoP from +; the loop to include the conditional. However, during SCoP generation we +; realized that RED[0] is in fact not invariant and bailed. ; -; Possible solutions could be: -; - Do not optimistically assume it to be invariant (as before this commit), however we would loose -; a lot of invariant cases due to possible aliasing. -; - Reduce the size of the SCoP if an assumed invariant access is in fact not invariant instead of -; rejecting the whole region. +; Today, LLVM can derive that the load is indeed invariant and Polly uses this +; information to unconditionally invariant load hoist RED[0]. ; -; CHECK-NOT: for (int c0 = 0; c0 <= 1018; c0 += 1) -; CHECK-NOT: Stmt_for_body(c0); +; CHECK: for (int c0 = 0; c0 <= 1018; c0 += 1) +; CHECK-NEXT: Stmt_for_body(c0); diff --git a/polly/test/ScopInfo/invariant_load_dereferenceable.ll b/polly/test/ScopInfo/invariant_load_dereferenceable.ll new file mode 100644 index 000000000000..420bbfed2544 --- /dev/null +++ b/polly/test/ScopInfo/invariant_load_dereferenceable.ll @@ -0,0 +1,112 @@ +; RUN: opt %loadPolly -polly-detect -polly-scops \ +; RUN: -polly-invariant-load-hoisting=true \ +; RUN: -analyze < %s | FileCheck %s + +; CHECK-NOT: Function: foo_undereferanceable + +; CHECK: Function: foo_dereferanceable + +; CHECK: Invariant Accesses: { +; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [sizeA] -> { Stmt_for_body_j__TO__for_latch_j[i0, i1] -> MemRef_sizeA_ptr[0] }; +; CHECK-NEXT: Execution Context: [sizeA] -> { : } +; CHECK-NEXT: } + +; CHECK: MayWriteAccess := [Reduction Type: NONE] [Scalar: 0] +; CHECK-NEXT: [sizeA] -> { Stmt_for_body_j__TO__for_latch_j[i0, i1] -> MemRef_A[i1, i0] }; + +; CHECK-NOT: Function: foo_undereferanceable + +define void @foo_dereferanceable(double* %A, double* %B, i64* dereferenceable(8) %sizeA_ptr, + i32 %lb.i, i32 %lb.j, i32 %ub.i, i32 %ub.j) { +entry: + br label %for.i + +for.i: + %indvar.i = phi i32 [0, %entry], [%indvar.next.i, %for.latch.i] + %indvar.next.i = add i32 %indvar.i, 1 + %cmp.i = icmp sle i32 %indvar.i, 1024 + br i1 %cmp.i, label %for.body.i, label %exit + +for.body.i: + br label %for.j + +for.j: + %indvar.j = phi i32 [0, %for.body.i], [%indvar.next.j, %for.latch.j] + %indvar.next.j = add i32 %indvar.j, 1 + %cmp.j = icmp sle i32 %indvar.j, 1024 + br i1 %cmp.j, label %for.body.j, label %for.latch.i + +for.body.j: + %prod = mul i32 %indvar.j, %indvar.j + %cmp = icmp sle i32 %prod, 1024 + br i1 %cmp, label %stmt, label %for.latch.j + +stmt: + %sext.i = sext i32 %indvar.i to i64 + %sext.j = sext i32 %indvar.j to i64 + + %sizeA = load i64, i64* %sizeA_ptr + %prodA = mul i64 %sext.j, %sizeA + %offsetA = add i64 %sext.i, %prodA + %ptrA = getelementptr double, double* %A, i64 %offsetA + store double 42.0, double* %ptrA + + br label %for.latch.j + +for.latch.j: + br label %for.j + +for.latch.i: + br label %for.i + +exit: + ret void +} + +define void @foo_undereferanceable(double* %A, double* %B, i64* %sizeA_ptr) { +entry: + br label %for.i + +for.i: + %indvar.i = phi i32 [0, %entry], [%indvar.next.i, %for.latch.i] + %indvar.next.i = add i32 %indvar.i, 1 + %cmp.i = icmp sle i32 %indvar.i, 1024 + br i1 %cmp.i, label %for.body.i, label %exit + +for.body.i: + br label %for.j + +for.j: + %indvar.j = phi i32 [0, %for.body.i], [%indvar.next.j, %for.latch.j] + %indvar.next.j = add i32 %indvar.j, 1 + %cmp.j = icmp sle i32 %indvar.j, 1024 + br i1 %cmp.j, label %for.body.j, label %for.latch.i + +for.body.j: + %prod = mul i32 %indvar.j, %indvar.j + %cmp = icmp sle i32 %prod, 1024 + br i1 %cmp, label %stmt, label %for.latch.j + +stmt: + %sext.i = sext i32 %indvar.i to i64 + %sext.j = sext i32 %indvar.j to i64 + + %sizeA = load i64, i64* %sizeA_ptr + %prodA = mul i64 %sext.j, %sizeA + %offsetA = add i64 %sext.i, %prodA + %ptrA = getelementptr double, double* %A, i64 %offsetA + store double 42.0, double* %ptrA + + br label %for.latch.j + +for.latch.j: + br label %for.j + +for.latch.i: + br label %for.i + +exit: + ret void +} +