[ScopDetect/Info] Allow unconditional hoisting of loads from dereferenceable ptrs

In case LLVM pointers are annotated with !dereferencable attributes/metadata
or LLVM can look at the allocation from which a pointer is derived, we can know
that dereferencing pointers is safe and can be done unconditionally. We use this
information to proof certain pointers as save to hoist and then hoist them
unconditionally.

llvm-svn: 297375
This commit is contained in:
Tobias Grosser 2017-03-09 11:36:00 +00:00
parent 20e588e1af
commit 8bd7f3c0a5
4 changed files with 135 additions and 11 deletions

View File

@ -53,6 +53,7 @@
#include "polly/Support/ScopLocation.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/Loads.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/RegionIterator.h"
#include "llvm/Analysis/ScalarEvolution.h"
@ -343,6 +344,8 @@ bool ScopDetection::addOverApproximatedRegion(Region *AR,
bool ScopDetection::onlyValidRequiredInvariantLoads(
InvariantLoadsSetTy &RequiredILS, DetectionContext &Context) const {
Region &CurRegion = Context.CurRegion;
const DataLayout &DL =
CurRegion.getEntry()->getParent()->getParent()->getDataLayout();
if (!PollyInvariantLoadHoisting && !RequiredILS.empty())
return false;
@ -351,10 +354,16 @@ bool ScopDetection::onlyValidRequiredInvariantLoads(
if (!isHoistableLoad(Load, CurRegion, *LI, *SE, *DT))
return false;
for (auto NonAffineRegion : Context.NonAffineSubRegionSet)
for (auto NonAffineRegion : Context.NonAffineSubRegionSet) {
if (isSafeToLoadUnconditionally(Load->getPointerOperand(),
Load->getAlignment(), DL))
continue;
if (NonAffineRegion->contains(Load) &&
Load->getParent() != NonAffineRegion->getEntry())
return false;
}
}
Context.RequiredILS.insert(RequiredILS.begin(), RequiredILS.end());

View File

@ -3719,6 +3719,11 @@ __isl_give isl_set *Scop::getNonHoistableCtx(MemoryAccess *Access,
if (hasNonHoistableBasePtrInScop(Access, Writes))
return nullptr;
auto &DL = getFunction().getParent()->getDataLayout();
if (isSafeToLoadUnconditionally(LI->getPointerOperand(), LI->getAlignment(),
DL))
return isl_set_empty(getParamSpace());
// Skip accesses in non-affine subregions as they might not be executed
// under the same condition as the entry of the non-affine subregion.
if (BB != LI->getParent())

View File

@ -89,15 +89,13 @@ if.end: ; preds = %if.then, %for.end
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
; Negative test. At the moment we will optimistically assume RED[0] in the conditional after the
; loop might be invariant and expand the SCoP from the loop to include the conditional. However,
; during SCoP generation we will realize that RED[0] is in fact not invariant and bail.
; At some point this was a negative test, where we optimistically assumed RED[0]
; in the conditional after the loop is invariant and expanded the SCoP from
; the loop to include the conditional. However, during SCoP generation we
; realized that RED[0] is in fact not invariant and bailed.
;
; Possible solutions could be:
; - Do not optimistically assume it to be invariant (as before this commit), however we would loose
; a lot of invariant cases due to possible aliasing.
; - Reduce the size of the SCoP if an assumed invariant access is in fact not invariant instead of
; rejecting the whole region.
; Today, LLVM can derive that the load is indeed invariant and Polly uses this
; information to unconditionally invariant load hoist RED[0].
;
; CHECK-NOT: for (int c0 = 0; c0 <= 1018; c0 += 1)
; CHECK-NOT: Stmt_for_body(c0);
; CHECK: for (int c0 = 0; c0 <= 1018; c0 += 1)
; CHECK-NEXT: Stmt_for_body(c0);

View File

@ -0,0 +1,112 @@
; RUN: opt %loadPolly -polly-detect -polly-scops \
; RUN: -polly-invariant-load-hoisting=true \
; RUN: -analyze < %s | FileCheck %s
; CHECK-NOT: Function: foo_undereferanceable
; CHECK: Function: foo_dereferanceable
; CHECK: Invariant Accesses: {
; CHECK-NEXT: ReadAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: [sizeA] -> { Stmt_for_body_j__TO__for_latch_j[i0, i1] -> MemRef_sizeA_ptr[0] };
; CHECK-NEXT: Execution Context: [sizeA] -> { : }
; CHECK-NEXT: }
; CHECK: MayWriteAccess := [Reduction Type: NONE] [Scalar: 0]
; CHECK-NEXT: [sizeA] -> { Stmt_for_body_j__TO__for_latch_j[i0, i1] -> MemRef_A[i1, i0] };
; CHECK-NOT: Function: foo_undereferanceable
define void @foo_dereferanceable(double* %A, double* %B, i64* dereferenceable(8) %sizeA_ptr,
i32 %lb.i, i32 %lb.j, i32 %ub.i, i32 %ub.j) {
entry:
br label %for.i
for.i:
%indvar.i = phi i32 [0, %entry], [%indvar.next.i, %for.latch.i]
%indvar.next.i = add i32 %indvar.i, 1
%cmp.i = icmp sle i32 %indvar.i, 1024
br i1 %cmp.i, label %for.body.i, label %exit
for.body.i:
br label %for.j
for.j:
%indvar.j = phi i32 [0, %for.body.i], [%indvar.next.j, %for.latch.j]
%indvar.next.j = add i32 %indvar.j, 1
%cmp.j = icmp sle i32 %indvar.j, 1024
br i1 %cmp.j, label %for.body.j, label %for.latch.i
for.body.j:
%prod = mul i32 %indvar.j, %indvar.j
%cmp = icmp sle i32 %prod, 1024
br i1 %cmp, label %stmt, label %for.latch.j
stmt:
%sext.i = sext i32 %indvar.i to i64
%sext.j = sext i32 %indvar.j to i64
%sizeA = load i64, i64* %sizeA_ptr
%prodA = mul i64 %sext.j, %sizeA
%offsetA = add i64 %sext.i, %prodA
%ptrA = getelementptr double, double* %A, i64 %offsetA
store double 42.0, double* %ptrA
br label %for.latch.j
for.latch.j:
br label %for.j
for.latch.i:
br label %for.i
exit:
ret void
}
define void @foo_undereferanceable(double* %A, double* %B, i64* %sizeA_ptr) {
entry:
br label %for.i
for.i:
%indvar.i = phi i32 [0, %entry], [%indvar.next.i, %for.latch.i]
%indvar.next.i = add i32 %indvar.i, 1
%cmp.i = icmp sle i32 %indvar.i, 1024
br i1 %cmp.i, label %for.body.i, label %exit
for.body.i:
br label %for.j
for.j:
%indvar.j = phi i32 [0, %for.body.i], [%indvar.next.j, %for.latch.j]
%indvar.next.j = add i32 %indvar.j, 1
%cmp.j = icmp sle i32 %indvar.j, 1024
br i1 %cmp.j, label %for.body.j, label %for.latch.i
for.body.j:
%prod = mul i32 %indvar.j, %indvar.j
%cmp = icmp sle i32 %prod, 1024
br i1 %cmp, label %stmt, label %for.latch.j
stmt:
%sext.i = sext i32 %indvar.i to i64
%sext.j = sext i32 %indvar.j to i64
%sizeA = load i64, i64* %sizeA_ptr
%prodA = mul i64 %sext.j, %sizeA
%offsetA = add i64 %sext.i, %prodA
%ptrA = getelementptr double, double* %A, i64 %offsetA
store double 42.0, double* %ptrA
br label %for.latch.j
for.latch.j:
br label %for.j
for.latch.i:
br label %for.i
exit:
ret void
}