Allow multiple reductions per statement

Iterate over all store memory accesses and check for valid binary reduction
  candidate loads by following the operands of the stored value.  For each
  candidate pair we check if they have the same base address and there are no
  other accesses which may overlap with them. This ensures that no intermediate
  value can escape into other memory locations or is overwritten at some point.

  + 17 test cases for reduction detection and reduction dependency modeling

llvm-svn: 211957
This commit is contained in:
Johannes Doerfert 2014-06-27 20:31:28 +00:00
parent e1c1138a38
commit e58a012094
22 changed files with 1305 additions and 24 deletions

View File

@ -324,7 +324,14 @@ class ScopStmt {
__isl_give isl_set *buildDomain(TempScop &tempScop, const Region &CurRegion);
void buildScattering(SmallVectorImpl<unsigned> &Scatter);
void buildAccesses(TempScop &tempScop, const Region &CurRegion);
void checkForReduction();
/// @brief Detect and mark reductions in the ScopStmt
void checkForReductions();
/// @brief Collect loads which might form a reduction chain with @p StoreMA
void
collectCandiateReductionLoads(MemoryAccess *StoreMA,
llvm::SmallVectorImpl<MemoryAccess *> &Loads);
//@}
/// Create the ScopStmt from a BasicBlock.

View File

@ -308,7 +308,6 @@ void Dependences::calculateDependences(Scop &S) {
isl_map *Identity =
isl_map_from_domain_and_range(isl_set_copy(AccDomW), AccDomW);
RED = isl_union_map_add_map(RED, Identity);
break;
}
}

View File

@ -706,45 +706,125 @@ ScopStmt::ScopStmt(Scop &parent, TempScop &tempScop, const Region &CurRegion,
Domain = buildDomain(tempScop, CurRegion);
buildScattering(Scatter);
buildAccesses(tempScop, CurRegion);
checkForReduction();
checkForReductions();
}
void ScopStmt::checkForReduction() {
// Skip statements with more than one binary reduction
if (MemAccs.size() != 2)
return;
// Skip if there is not exactly one load and one store
unsigned LoadIdx = MemAccs[0]->isRead() ? 0 : 1;
auto *Load = dyn_cast<LoadInst>(MemAccs[LoadIdx]->getAccessInstruction());
auto *Store =
dyn_cast<StoreInst>(MemAccs[1 - LoadIdx]->getAccessInstruction());
if (!Load || !Store ||
Load->getPointerOperand() != Store->getPointerOperand())
/// @brief Collect loads which might form a reduction chain with @p StoreMA
///
/// Check if the stored value for @p StoreMA is a binary operator with one or
/// two loads as operands. If the binary operand is commutative & associative,
/// used only once (by @p StoreMA) and its load operands are also used only
/// once, we have found a possible reduction chain. It starts at an operand
/// load and includes the binary operator and @p StoreMA.
///
/// Note: We allow only one use to ensure the load and binary operator cannot
/// escape this block or into any other store except @p StoreMA.
void ScopStmt::collectCandiateReductionLoads(
MemoryAccess *StoreMA, SmallVectorImpl<MemoryAccess *> &Loads) {
auto *Store = dyn_cast<StoreInst>(StoreMA->getAccessInstruction());
if (!Store)
return;
// Skip if there is not one binary operator between the load and the store
auto *BinOp = dyn_cast<BinaryOperator>(Store->getValueOperand());
if (!BinOp || (BinOp->getOperand(0) != Load && BinOp->getOperand(1) != Load))
if (!BinOp)
return;
// Skip if the binary operators has multiple uses
if (BinOp->getNumUses() != 1)
return;
// Skip if the opcode of the binary operator is not commutative/associative
if (!BinOp->isCommutative() || !BinOp->isAssociative())
return;
// Skip if the load has multiple uses
if (Load->getNumUses() != 1)
return;
// Skip if it is a multiplicative reduction and we disabled them
if (DisableMultiplicativeReductions &&
(BinOp->getOpcode() == Instruction::Mul ||
BinOp->getOpcode() == Instruction::FMul))
return;
// Valid reduction like access
MemAccs[0]->markReductionLike();
MemAccs[1]->markReductionLike();
// Check the binary operator operands for a candidate load
auto *PossibleLoad0 = dyn_cast<LoadInst>(BinOp->getOperand(0));
auto *PossibleLoad1 = dyn_cast<LoadInst>(BinOp->getOperand(1));
if (!PossibleLoad0 && !PossibleLoad1)
return;
// A load is only a candidate if it cannot escape (thus has only this use)
if (PossibleLoad0 && PossibleLoad0->getNumUses() == 1)
Loads.push_back(lookupAccessFor(PossibleLoad0));
if (PossibleLoad1 && PossibleLoad1->getNumUses() == 1)
Loads.push_back(lookupAccessFor(PossibleLoad1));
}
/// @brief Check for reductions in this ScopStmt
///
/// Iterate over all store memory accesses and check for valid binary reduction
/// like chains. For all candidates we check if they have the same base address
/// and there are no other accesses which overlap with them. The base address
/// check rules out impossible reductions candidates early. The overlap check,
/// together with the "only one user" check in collectCandiateReductionLoads,
/// guarantees that none of the intermediate results will escape during
/// execution of the loop nest. We basically check here that no other memory
/// access can access the same memory as the potential reduction.
void ScopStmt::checkForReductions() {
SmallVector<MemoryAccess *, 2> Loads;
SmallVector<std::pair<MemoryAccess *, MemoryAccess *>, 4> Candidates;
// First collect candidate load-store reduction chains by iterating over all
// stores and collecting possible reduction loads.
for (MemoryAccess *StoreMA : MemAccs) {
if (StoreMA->isRead())
continue;
Loads.clear();
collectCandiateReductionLoads(StoreMA, Loads);
for (MemoryAccess *LoadMA : Loads)
Candidates.push_back(std::make_pair(LoadMA, StoreMA));
}
// Then check each possible candidate pair.
for (const auto &CandidatePair : Candidates) {
bool Valid = true;
isl_map *LoadAccs = CandidatePair.first->getAccessRelation();
isl_map *StoreAccs = CandidatePair.second->getAccessRelation();
// Skip those with obviously unequal base addresses.
if (!isl_map_has_equal_space(LoadAccs, StoreAccs)) {
isl_map_free(LoadAccs);
isl_map_free(StoreAccs);
continue;
}
// And check if the remaining for overlap with other memory accesses.
isl_map *AllAccsRel = isl_map_union(LoadAccs, StoreAccs);
AllAccsRel = isl_map_intersect_domain(AllAccsRel, getDomain());
isl_set *AllAccs = isl_map_range(AllAccsRel);
for (MemoryAccess *MA : MemAccs) {
if (MA == CandidatePair.first || MA == CandidatePair.second)
continue;
isl_map *AccRel =
isl_map_intersect_domain(MA->getAccessRelation(), getDomain());
isl_set *Accs = isl_map_range(AccRel);
if (isl_set_has_equal_space(AllAccs, Accs) || isl_set_free(Accs)) {
isl_set *OverlapAccs = isl_set_intersect(Accs, isl_set_copy(AllAccs));
Valid = Valid && isl_set_is_empty(OverlapAccs);
isl_set_free(OverlapAccs);
}
}
isl_set_free(AllAccs);
if (!Valid)
continue;
// If no overlapping access was found we mark the load and store as
// reduction like.
CandidatePair.first->markReductionLike();
CandidatePair.second->markReductionLike();
}
}
std::string ScopStmt::getDomainStr() const { return stringFromIslObj(Domain); }

View File

@ -0,0 +1,59 @@
; RUN: opt -basicaa %loadPolly -polly-dependences -analyze < %s | FileCheck %s
;
; CHECK: RAW dependences:
; CHECK: { }
; CHECK: WAR dependences:
; CHECK: { }
; CHECK: WAW dependences:
; CHECK: { }
; CHECK: Reduction dependences:
; CHECK: { Stmt_for_body3[i0, i1] -> Stmt_for_body3[2 + i0, -1 + i1] : i0 <= 97 and i0 >= 0 and i1 <= 99 and i1 >= 1 }
;
; void f(int *sum) {
; for (int i = 0; i < 100; i++)
; for (int j = 0; j < 100; j++)
; sum[i+j*2] += j * i;
; }
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
define void @f(i32* %sum) {
entry:
br label %for.cond
for.cond: ; preds = %for.inc6, %entry
%i.0 = phi i32 [ 0, %entry ], [ %inc7, %for.inc6 ]
%exitcond1 = icmp ne i32 %i.0, 100
br i1 %exitcond1, label %for.body, label %for.end8
for.body: ; preds = %for.cond
br label %for.cond1
for.cond1: ; preds = %for.inc, %for.body
%j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
%exitcond = icmp ne i32 %j.0, 100
br i1 %exitcond, label %for.body3, label %for.end
for.body3: ; preds = %for.cond1
%mul = mul nsw i32 %j.0, %i.0
%mul4 = shl nsw i32 %j.0, 1
%add = add nsw i32 %i.0, %mul4
%arrayidx = getelementptr inbounds i32* %sum, i32 %add
%tmp = load i32* %arrayidx, align 4
%add5 = add nsw i32 %tmp, %mul
store i32 %add5, i32* %arrayidx, align 4
br label %for.inc
for.inc: ; preds = %for.body3
%inc = add nsw i32 %j.0, 1
br label %for.cond1
for.end: ; preds = %for.cond1
br label %for.inc6
for.inc6: ; preds = %for.end
%inc7 = add nsw i32 %i.0, 1
br label %for.cond
for.end8: ; preds = %for.cond
ret void
}

View File

@ -0,0 +1,60 @@
; RUN: opt %loadPolly -polly-dependences -analyze < %s | FileCheck %s
;
; CHECK: RAW dependences:
; CHECK-DAG: Stmt_for_body3[i0, 1] -> Stmt_for_body3[1 + i0, o1] : i0 <= 1022 and i0 >= 0 and o1 <= 511 and o1 >= 1
; CHECK-DAG: Stmt_for_body3[i0, 0] -> Stmt_for_body3[i0, o1] : i0 <= 1023 and i0 >= 0 and o1 <= 511 and o1 >= 1
; CHECK: WAR dependences:
; CHECK: { }
; CHECK: WAW dependences:
; CHECK-DAG: Stmt_for_body3[i0, i1] -> Stmt_for_body3[1 + i0, -1 + i1] : i0 <= 1022 and i0 >= 0 and i1 <= 511 and i1 >= 2
; CHECK-DAG: Stmt_for_body3[i0, 2] -> Stmt_for_body3[2 + i0, 0] : i0 <= 1021 and i0 >= 0
; CHECK: Reduction dependences:
; CHECK: { Stmt_for_body3[i0, 1] -> Stmt_for_body3[1 + i0, 0] : i0 >= 0 and i0 <= 1022 }
;
; void f(int *sum) {
; for (int i = 0; i < 1024; i++)
; for (int j = 0; j < 512; j++)
; sum[i + j] = sum[i] + 3;
; }
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
define void @f(i32* %sum) {
entry:
br label %for.cond
for.cond: ; preds = %for.inc6, %entry
%i.0 = phi i32 [ 0, %entry ], [ %inc7, %for.inc6 ]
%exitcond1 = icmp ne i32 %i.0, 1024
br i1 %exitcond1, label %for.body, label %for.end8
for.body: ; preds = %for.cond
br label %for.cond1
for.cond1: ; preds = %for.inc, %for.body
%j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
%exitcond = icmp ne i32 %j.0, 512
br i1 %exitcond, label %for.body3, label %for.end
for.body3: ; preds = %for.cond1
%arrayidx = getelementptr inbounds i32* %sum, i32 %i.0
%tmp = load i32* %arrayidx, align 4
%add = add nsw i32 %tmp, 3
%add4 = add nsw i32 %i.0, %j.0
%arrayidx5 = getelementptr inbounds i32* %sum, i32 %add4
store i32 %add, i32* %arrayidx5, align 4
br label %for.inc
for.inc: ; preds = %for.body3
%inc = add nsw i32 %j.0, 1
br label %for.cond1
for.end: ; preds = %for.cond1
br label %for.inc6
for.inc6: ; preds = %for.end
%inc7 = add nsw i32 %i.0, 1
br label %for.cond
for.end8: ; preds = %for.cond
ret void
}

View File

@ -0,0 +1,77 @@
; RUN: opt -basicaa %loadPolly -polly-dependences -analyze < %s | FileCheck %s
;
; Verify that only the inner reduction like accesses cause reduction dependences
;
; CHECK: Reduction dependences:
; CHECK: { Stmt_for_body3[i0, i1] -> Stmt_for_body3[i0, 1 + i1] : i0 <= 99 and i0 >= 0 and i1 <= 98 and i1 >= 0 }
;
; void f(int * restrict A, int * restrict sum) {
; int i, j, k;
; for (i = 0; i < 100; i++) {
; *sum *= 7;
; for (j = 0; j < 100; j++) {
; *sum += A[i+j];
; for (k = 0; k< 100; k++) {}
; }
; }
; }
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
define void @f(i32* noalias %A, i32* noalias %sum) {
entry:
br label %for.cond
for.cond: ; preds = %for.inc11, %entry
%i.0 = phi i32 [ 0, %entry ], [ %inc12, %for.inc11 ]
%exitcond2 = icmp ne i32 %i.0, 100
br i1 %exitcond2, label %for.body, label %for.end13
for.body: ; preds = %for.cond
%tmp = load i32* %sum, align 4
%mul = mul nsw i32 %tmp, 7
store i32 %mul, i32* %sum, align 4
br label %for.cond1
for.cond1: ; preds = %for.inc8, %for.body
%j.0 = phi i32 [ 0, %for.body ], [ %inc9, %for.inc8 ]
%exitcond1 = icmp ne i32 %j.0, 100
br i1 %exitcond1, label %for.body3, label %for.end10
for.body3: ; preds = %for.cond1
%add = add nsw i32 %i.0, %j.0
%arrayidx = getelementptr inbounds i32* %A, i32 %add
%tmp3 = load i32* %arrayidx, align 4
%tmp4 = load i32* %sum, align 4
%add4 = add nsw i32 %tmp4, %tmp3
store i32 %add4, i32* %sum, align 4
br label %for.cond5
for.cond5: ; preds = %for.inc, %for.body3
%k.0 = phi i32 [ 0, %for.body3 ], [ %inc, %for.inc ]
%exitcond = icmp ne i32 %k.0, 100
br i1 %exitcond, label %for.body7, label %for.end
for.body7: ; preds = %for.cond5
br label %for.inc
for.inc: ; preds = %for.body7
%inc = add nsw i32 %k.0, 1
br label %for.cond5
for.end: ; preds = %for.cond5
br label %for.inc8
for.inc8: ; preds = %for.end
%inc9 = add nsw i32 %j.0, 1
br label %for.cond1
for.end10: ; preds = %for.cond1
br label %for.inc11
for.inc11: ; preds = %for.end10
%inc12 = add nsw i32 %i.0, 1
br label %for.cond
for.end13: ; preds = %for.cond
ret void
}

View File

@ -0,0 +1,79 @@
; RUN: opt %loadPolly -polly-dependences -analyze -basicaa < %s | FileCheck %s
;
; CHECK: RAW dependences:
; CHECK: { }
; CHECK: WAR dependences:
; CHECK: { }
; CHECK: WAW dependences:
; CHECK: { }
; CHECK: Reduction dependences:
; CHECK-DAG: Stmt_for_body3[i0, i1] -> Stmt_for_body3[i0, 1 + i1] : i0 <= 99 and i0 >= 0 and i1 <= 98 and i1 >= 0
; CHECK-DAG: Stmt_for_body3[i0, 99] -> Stmt_for_body3[1 + i0, 0] : i0 <= 98 and i0 >= 0
;
; int f(int * restrict A, int * restrict sum) {
; int i, j, k;
; for (i = 0; i < 100; i++) {
; for (j = 0; j < 100; j++) {
; sum += A[i+j];
; for (k = 0; k< 100; k++) {}
; }
; }
; return sum;
; }
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
define void @f(i32* noalias %A, i32* noalias %sum) {
entry:
br label %for.cond
for.cond: ; preds = %for.inc11, %entry
%i.0 = phi i32 [ 0, %entry ], [ %inc12, %for.inc11 ]
%exitcond2 = icmp ne i32 %i.0, 100
br i1 %exitcond2, label %for.body, label %for.end13
for.body: ; preds = %for.cond
br label %for.cond1
for.cond1: ; preds = %for.inc8, %for.body
%j.0 = phi i32 [ 0, %for.body ], [ %inc9, %for.inc8 ]
%exitcond1 = icmp ne i32 %j.0, 100
br i1 %exitcond1, label %for.body3, label %for.end10
for.body3: ; preds = %for.cond1
%add = add nsw i32 %i.0, %j.0
%arrayidx = getelementptr inbounds i32* %A, i32 %add
%tmp3 = load i32* %arrayidx, align 4
%tmp4 = load i32* %sum, align 4
%add4 = add nsw i32 %tmp4, %tmp3
store i32 %add4, i32* %sum, align 4
br label %for.cond5
for.cond5: ; preds = %for.inc, %for.body3
%k.0 = phi i32 [ 0, %for.body3 ], [ %inc, %for.inc ]
%exitcond = icmp ne i32 %k.0, 100
br i1 %exitcond, label %for.body7, label %for.end
for.body7: ; preds = %for.cond5
br label %for.inc
for.inc: ; preds = %for.body7
%inc = add nsw i32 %k.0, 1
br label %for.cond5
for.end: ; preds = %for.cond5
br label %for.inc8
for.inc8: ; preds = %for.end
%inc9 = add nsw i32 %j.0, 1
br label %for.cond1
for.end10: ; preds = %for.cond1
br label %for.inc11
for.inc11: ; preds = %for.end10
%inc12 = add nsw i32 %i.0, 1
br label %for.cond
for.end13: ; preds = %for.cond
ret void
}

View File

@ -0,0 +1,73 @@
; RUN: opt %loadPolly -polly-dependences -analyze -basicaa < %s | FileCheck %s
;
; CHECK: Reduction dependences:
; CHECK: { Stmt_for_inc[i0, i1] -> Stmt_for_inc[i0, 1 + i1] : i0 <= 99 and i0 >= 0 and i1 <= 98 and i1 >= 0 }
;
; int f(int * __restrict__ A) {
; int i, j, sum = 0;
; for (k = 0; k < 37; k = g(k)) {
; for (i = 0; i < 100; i++) {
; sum *= 2;
; for (j = 0; j < 100; j++) {
; sum += A[i+j];
; }
; }
; }
; return sum;
; }
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
declare i64 @g(i64)
define i32 @f(i32* noalias %A) {
entry:
%sum.04.reg2mem = alloca i32
%sum.12.reg2mem = alloca i32
br label %entry.split
entry.split: ; preds = %entry
store i32 0, i32* %sum.04.reg2mem
br label %for.body_outer_split
for.body_outer_split: ; preds = %entry.split, %for.inc5
%indvars.ivK = phi i64 [ 0, %entry.split ], [ %incK, %for.bos2 ]
br label %for.body_outer
for.body_outer: ; preds = %for.body_outer_split
%incK = call i64 @g(i64 %indvars.ivK)
%exitcondK = icmp eq i64 %incK, 100
br i1 %exitcondK, label %for.end7, label %for.body
for.body: ; preds = %for.inc5, %for.body_outer
%indvars.iv23 = phi i64 [ 0, %for.body_outer ], [ %3, %for.inc5 ]
%sum.04.reload = load i32* %sum.04.reg2mem
%mul = shl nsw i32 %sum.04.reload, 1
store i32 %mul, i32* %sum.12.reg2mem
br label %for.inc
for.inc: ; preds = %for.inc, %for.body
%indvars.iv1 = phi i64 [ 0, %for.body ], [ %1, %for.inc ]
%sum.12.reload = load i32* %sum.12.reg2mem
%0 = add i64 %indvars.iv23, %indvars.iv1
%arrayidx = getelementptr i32* %A, i64 %0
%tmp5 = load i32* %arrayidx, align 4
%add4 = add nsw i32 %tmp5, %sum.12.reload
%1 = add nuw nsw i64 %indvars.iv1, 1
%exitcond1 = icmp eq i64 %1, 100
store i32 %add4, i32* %sum.12.reg2mem
br i1 %exitcond1, label %for.inc5, label %for.inc
for.inc5: ; preds = %for.inc
%2 = load i32* %sum.12.reg2mem
%3 = add nuw nsw i64 %indvars.iv23, 1
%exitcond2 = icmp eq i64 %3, 100
store i32 %2, i32* %sum.04.reg2mem
br i1 %exitcond2, label %for.bos2, label %for.body
for.bos2:
br label %for.body_outer_split
for.end7: ; preds = %for.inc5
%4 = load i32* %sum.04.reg2mem
ret i32 %4
}

View File

@ -0,0 +1,69 @@
; RUN: opt %loadPolly -polly-dependences -analyze -basicaa < %s | FileCheck %s
;
; CHECK: Reduction dependences:
; CHECK: [N] -> { Stmt_for_body3[i0, i1] -> Stmt_for_body3[i0, 1 + i1] : i0 <= 1023 and i0 >= 0 and i1 <= 1022 and i1 >= 0 and i1 >= 1024 - N + i0 }
;
; void f(int N, int * restrict sums, int * restrict escape) {
; for (int i = 0; i < 1024; i++) {
; for (int j = 0; j < 1024; j++) {
; sums[i] += 5;
; if (N - i + j < 1024)
; escape[N - i + j] = sums[i];
; }
; }
; }
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
define void @f(i32 %N, i32* noalias %sums, i32* noalias %escape) {
entry:
br label %for.cond
for.cond: ; preds = %for.inc10, %entry
%i.0 = phi i32 [ 0, %entry ], [ %inc11, %for.inc10 ]
%exitcond1 = icmp ne i32 %i.0, 1024
br i1 %exitcond1, label %for.body, label %for.end12
for.body: ; preds = %for.cond
br label %for.cond1
for.cond1: ; preds = %for.inc, %for.body
%j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
%exitcond = icmp ne i32 %j.0, 1024
br i1 %exitcond, label %for.body3, label %for.end
for.body3: ; preds = %for.cond1
%arrayidx = getelementptr inbounds i32* %sums, i32 %i.0
%tmp = load i32* %arrayidx, align 4
%add = add nsw i32 %tmp, 5
store i32 %add, i32* %arrayidx, align 4
%sub = sub nsw i32 %N, %i.0
%add4 = add nsw i32 %sub, %j.0
%cmp5 = icmp slt i32 %add4, 1024
br i1 %cmp5, label %if.then, label %if.end
if.then: ; preds = %for.body3
%arrayidx6 = getelementptr inbounds i32* %sums, i32 %i.0
%tmp2 = load i32* %arrayidx6, align 4
%sub7 = sub nsw i32 %N, %i.0
%add8 = add nsw i32 %sub7, %j.0
%arrayidx9 = getelementptr inbounds i32* %escape, i32 %add8
store i32 %tmp2, i32* %arrayidx9, align 4
br label %if.end
if.end: ; preds = %if.then, %for.body3
br label %for.inc
for.inc: ; preds = %if.end
%inc = add nsw i32 %j.0, 1
br label %for.cond1
for.end: ; preds = %for.cond1
br label %for.inc10
for.inc10: ; preds = %for.end
%inc11 = add nsw i32 %i.0, 1
br label %for.cond
for.end12: ; preds = %for.cond
ret void
}

View File

@ -0,0 +1,73 @@
; RUN: opt %loadPolly -basicaa -polly-dependences -analyze < %s | FileCheck %s
;
; CHECK: RAW dependences:
; CHECK: { }
; CHECK: WAR dependences:
; CHECK: { }
; CHECK: WAW dependences:
; CHECK: { }
; CHECK: Reduction dependences:
; CHECK-DAG: Stmt_for_body3[i0, i1] -> Stmt_for_body3[i0, 1 + i1] : i0 <= 1023 and i0 >= 0 and i1 <= 1022 and i1 >= 0
; CHECK-DAG: Stmt_for_body3[i0, i1] -> Stmt_for_body3[1 + i0, i1] : i0 <= 1022 and i0 >= 0 and i1 <= 1023 and i1 >= 0
;
; void f(int *restrict A, int *restrict B, int *restrict Values) {
; for (int i = 0; i < 1024; i++) {
; for (int j = 0; j < 1024; j++) {
; A[i] += Values[i + j - 1];
; B[j] += Values[i + j + 42];
; }
; }
; }
;
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
define void @f(i32* noalias %A, i32* noalias %B, i32* noalias %Values) {
entry:
br label %for.cond
for.cond: ; preds = %for.inc11, %entry
%i.0 = phi i32 [ 0, %entry ], [ %inc12, %for.inc11 ]
%exitcond1 = icmp ne i32 %i.0, 1024
br i1 %exitcond1, label %for.body, label %for.end13
for.body: ; preds = %for.cond
br label %for.cond1
for.cond1: ; preds = %for.inc, %for.body
%j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
%exitcond = icmp ne i32 %j.0, 1024
br i1 %exitcond, label %for.body3, label %for.end
for.body3: ; preds = %for.cond1
%add = add nsw i32 %i.0, %j.0
%sub = add nsw i32 %add, -1
%arrayidx = getelementptr inbounds i32* %Values, i32 %sub
%tmp = load i32* %arrayidx, align 4
%arrayidx4 = getelementptr inbounds i32* %A, i32 %i.0
%tmp2 = load i32* %arrayidx4, align 4
%add5 = add nsw i32 %tmp2, %tmp
store i32 %add5, i32* %arrayidx4, align 4
%add6 = add nsw i32 %i.0, %j.0
%add7 = add nsw i32 %add6, 42
%arrayidx8 = getelementptr inbounds i32* %Values, i32 %add7
%tmp3 = load i32* %arrayidx8, align 4
%arrayidx9 = getelementptr inbounds i32* %B, i32 %j.0
%tmp4 = load i32* %arrayidx9, align 4
%add10 = add nsw i32 %tmp4, %tmp3
store i32 %add10, i32* %arrayidx9, align 4
br label %for.inc
for.inc: ; preds = %for.body3
%inc = add nsw i32 %j.0, 1
br label %for.cond1
for.end: ; preds = %for.cond1
br label %for.inc11
for.inc11: ; preds = %for.end
%inc12 = add nsw i32 %i.0, 1
br label %for.cond
for.end13: ; preds = %for.cond
ret void
}

View File

@ -0,0 +1,38 @@
; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
;
; FIXME: We cannot detect this SCoP yet but as soon as we can we should check
; that the reduction is detected!
;
; CHECK-NOT: Scattering
;
; void f(int *A) {
; for (int i = 0; i < 1024; i++)
; A[i % 2] += i;
; }
;
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
define void @f(i32* %A) {
entry:
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
%exitcond = icmp ne i32 %i.0, 1024
br i1 %exitcond, label %for.body, label %for.end
for.body: ; preds = %for.cond
%rem = srem i32 %i.0, 2
%arrayidx = getelementptr inbounds i32* %A, i32 %rem
%tmp = load i32* %arrayidx, align 4
%add = add nsw i32 %tmp, %i.0
store i32 %add, i32* %arrayidx, align 4
br label %for.inc
for.inc: ; preds = %for.body
%inc = add nsw i32 %i.0, 1
br label %for.cond
for.end: ; preds = %for.cond
ret void
}

View File

@ -0,0 +1,52 @@
; RUN: opt -basicaa %loadPolly -polly-scops -analyze -polly-disable-multiplicative-reductions < %s | FileCheck %s
;
; CHECK: ReadAccess := [Reduction like: 1]
; CHECK: { Stmt_for_body[i0] -> MemRef_sum[0] };
; CHECK: MustWriteAccess := [Reduction like: 1]
; CHECK: { Stmt_for_body[i0] -> MemRef_sum[0] };
; CHECK: ReadAccess := [Reduction like: 0]
; CHECK: { Stmt_for_body[i0] -> MemRef_prod[0] };
; CHECK: MustWriteAccess := [Reduction like: 0]
; CHECK: { Stmt_for_body[i0] -> MemRef_prod[0] };
;
; int sum, prod;
;
; void f() {
; int i;
; for (int i = 0; i < 100; i++) {
; sum += i * 3;
; prod *= (i + 3);
; }
; }
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
@sum = common global i32 0, align 4
@prod = common global i32 0, align 4
define void @f() #0 {
entry:
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%i1.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
%exitcond = icmp ne i32 %i1.0, 100
br i1 %exitcond, label %for.body, label %for.end
for.body: ; preds = %for.cond
%mul = mul nsw i32 %i1.0, 3
%tmp = load i32* @sum, align 4
%add = add nsw i32 %tmp, %mul
store i32 %add, i32* @sum, align 4
%add2 = add nsw i32 %i1.0, 3
%tmp1 = load i32* @prod, align 4
%mul3 = mul nsw i32 %tmp1, %add2
store i32 %mul3, i32* @prod, align 4
br label %for.inc
for.inc: ; preds = %for.body
%inc = add nsw i32 %i1.0, 1
br label %for.cond
for.end: ; preds = %for.cond
ret void
}

View File

@ -0,0 +1,62 @@
; RUN: opt %loadPolly -basicaa -polly-scops -analyze < %s | FileCheck %s
;
; void f(int N, int * restrict sums, int * restrict escape) {
; int i, j;
; for (i = 0; i < 1024; i++) {
; for (j = 0; j < 1024; j++) {
; sums[i] += 5;
; escape[N - i + j] = sums[i];
; }
; }
; }
;
; CHECK: Reduction like: 0
; CHECK: sums
; CHECK: Reduction like: 0
; CHECK: sums
; CHECK: Reduction like: 0
; CHECK: escape
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
define void @f(i32 %N, i32* noalias %sums, i32* noalias %escape) {
entry:
br label %for.cond
for.cond: ; preds = %for.inc7, %entry
%i.0 = phi i32 [ 0, %entry ], [ %inc8, %for.inc7 ]
%exitcond1 = icmp ne i32 %i.0, 1024
br i1 %exitcond1, label %for.body, label %for.end9
for.body: ; preds = %for.cond
br label %for.cond1
for.cond1: ; preds = %for.inc, %for.body
%j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
%exitcond = icmp ne i32 %j.0, 1024
br i1 %exitcond, label %for.body3, label %for.end
for.body3: ; preds = %for.cond1
%arrayidx = getelementptr inbounds i32* %sums, i32 %i.0
%tmp = load i32* %arrayidx, align 4
%add = add nsw i32 %tmp, 5
store i32 %add, i32* %arrayidx, align 4
%sub = sub nsw i32 %N, %i.0
%add5 = add nsw i32 %sub, %j.0
%arrayidx6 = getelementptr inbounds i32* %escape, i32 %add5
store i32 %add, i32* %arrayidx6, align 4
br label %for.inc
for.inc: ; preds = %for.body3
%inc = add nsw i32 %j.0, 1
br label %for.cond1
for.end: ; preds = %for.cond1
br label %for.inc7
for.inc7: ; preds = %for.end
%inc8 = add nsw i32 %i.0, 1
br label %for.cond
for.end9: ; preds = %for.cond
ret void
}

View File

@ -0,0 +1,72 @@
; RUN: opt %loadPolly -basicaa -polly-scops -analyze < %s | FileCheck %s
;
; void f(int N, int * restrict sums, int * restrict escape) {
; int i, j;
; for (i = 0; i < 1024; i++) {
; for (j = 0; j < 1024; j++) {
; sums[i] += 5;
; escape[N-j] = escape[i] + sums[i-1];
; }
; }
; }
;
; CHECK: Reduction like: 0
; CHECK: sums
; CHECK: Reduction like: 0
; CHECK: sums
; CHECK: Reduction like: 0
; CHECK: escape
; CHECK: Reduction like: 0
; CHECK: sums
; CHECK: Reduction like: 0
; CHECK: escape
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
define void @f(i32 %N, i32* noalias %sums, i32* noalias %escape) {
entry:
br label %for.cond
for.cond: ; preds = %for.inc10, %entry
%i.0 = phi i32 [ 0, %entry ], [ %inc11, %for.inc10 ]
%exitcond1 = icmp ne i32 %i.0, 1024
br i1 %exitcond1, label %for.body, label %for.end12
for.body: ; preds = %for.cond
br label %for.cond1
for.cond1: ; preds = %for.inc, %for.body
%j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
%exitcond = icmp ne i32 %j.0, 1024
br i1 %exitcond, label %for.body3, label %for.end
for.body3: ; preds = %for.cond1
%arrayidx = getelementptr inbounds i32* %sums, i32 %i.0
%tmp = load i32* %arrayidx, align 4
%add = add nsw i32 %tmp, 5
store i32 %add, i32* %arrayidx, align 4
%arrayidx4 = getelementptr inbounds i32* %escape, i32 %i.0
%tmp2 = load i32* %arrayidx4, align 4
%sub = add nsw i32 %i.0, -1
%arrayidx5 = getelementptr inbounds i32* %sums, i32 %sub
%tmp3 = load i32* %arrayidx5, align 4
%add6 = add nsw i32 %tmp2, %tmp3
%sub7 = sub nsw i32 %N, %i.0
%add8 = add nsw i32 %sub7, %j.0
%arrayidx9 = getelementptr inbounds i32* %escape, i32 %add8
store i32 %add6, i32* %arrayidx9, align 4
br label %for.inc
for.inc: ; preds = %for.body3
%inc = add nsw i32 %j.0, 1
br label %for.cond1
for.end: ; preds = %for.cond1
br label %for.inc10
for.inc10: ; preds = %for.end
%inc11 = add nsw i32 %i.0, 1
br label %for.cond
for.end12: ; preds = %for.cond
ret void
}

View File

@ -0,0 +1,49 @@
; RUN: opt %loadPolly -basicaa -polly-scops -analyze < %s | FileCheck %s
;
; int f() {
; int i, sum = 0, sth = 0;
; for (i = 0; i < 1024; i++) {
; sum += 5;
; sth = sth + sth * sth + sth;
; sum *= 5;
; }
; return sum + sth;
; }
;
; CHECK-NOT: Reduction like: 1
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
define i32 @f() {
entry:
%sum.0 = alloca i32
%sth.0 = alloca i32
br label %entry.split
entry.split: ; preds = %entry
store i32 0, i32* %sum.0
store i32 0, i32* %sth.0
br label %for.cond
for.cond: ; preds = %for.cond, %entry.split
%i.0 = phi i32 [ 0, %entry.split ], [ %inc, %for.cond ]
%sth.0.reload = load i32* %sth.0
%sum.0.reload = load i32* %sum.0
%exitcond = icmp ne i32 %i.0, 1024
%mul = mul nsw i32 %sth.0.reload, %sth.0.reload
%add1 = add nsw i32 %sth.0.reload, %mul
%tmp = mul i32 %sum.0.reload, 5
store i32 %tmp, i32* %sum.0
%sum.1.reload = load i32* %sum.0
%mul3 = add i32 %sum.1.reload, 25
%add2 = add nsw i32 %add1, %sth.0.reload
%inc = add nsw i32 %i.0, 1
store i32 %mul3, i32* %sum.0
store i32 %add2, i32* %sth.0
br i1 %exitcond, label %for.cond, label %for.end
for.end: ; preds = %for.cond
%sum.0.reload.2 = load i32* %sum.0
%sth.0.reload.2 = load i32* %sth.0
%add4 = add nsw i32 %sum.0.reload.2, %sth.0.reload.2
ret i32 %add4
}

View File

@ -0,0 +1,58 @@
; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
;
; void f(int *sums) {
; int i, j;
; for (i = 0; i < 1024; i++) {
; for (j = 0; j < 1024; j++) {
; sums[i] += 5;
; sums[i+10] *= 5;
; }
; }
; }
;
; CHECK-NOT: Reduction like: 1
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
define void @f(i32* %sums) {
entry:
br label %for.cond
for.cond: ; preds = %for.inc6, %entry
%i.0 = phi i32 [ 0, %entry ], [ %inc7, %for.inc6 ]
%exitcond1 = icmp ne i32 %i.0, 1024
br i1 %exitcond1, label %for.body, label %for.end8
for.body: ; preds = %for.cond
br label %for.cond1
for.cond1: ; preds = %for.inc, %for.body
%j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
%exitcond = icmp ne i32 %j.0, 1024
br i1 %exitcond, label %for.body3, label %for.end
for.body3: ; preds = %for.cond1
%arrayidx = getelementptr inbounds i32* %sums, i32 %i.0
%tmp = load i32* %arrayidx, align 4
%add = add nsw i32 %tmp, 5
store i32 %add, i32* %arrayidx, align 4
%add4 = add nsw i32 %i.0, 10
%arrayidx5 = getelementptr inbounds i32* %sums, i32 %add4
%tmp2 = load i32* %arrayidx5, align 4
%mul = mul nsw i32 %tmp2, 5
store i32 %mul, i32* %arrayidx5, align 4
br label %for.inc
for.inc: ; preds = %for.body3
%inc = add nsw i32 %j.0, 1
br label %for.cond1
for.end: ; preds = %for.cond1
br label %for.inc6
for.inc6: ; preds = %for.end
%inc7 = add nsw i32 %i.0, 1
br label %for.cond
for.end8: ; preds = %for.cond
ret void
}

View File

@ -0,0 +1,78 @@
; RUN: opt -basicaa %loadPolly -polly-scops -analyze < %s | FileCheck %s
;
; CHECK: Stmt_for_body
; CHECK: Reduction like: 1
; CHECK: MemRef_sum
; CHECK: Reduction like: 1
; CHECK: MemRef_sum
; CHECK: Stmt_for_body3
; CHECK: Reduction like: 0
; CHECK: MemRef_A
; CHECK: Reduction like: 1
; CHECK: MemRef_sum
; CHECK: Reduction like: 1
; CHECK: MemRef_sum
; CHECK: Stmt_for_end
; CHECK: Reduction like: 1
; CHECK: MemRef_sum
; CHECK: Reduction like: 1
; CHECK: MemRef_sum
;
; void f(int *restrict A, int *restrict sum) {
; int i, j;
; for (i = 0; i < 100; i++) {
; *sum *= 7;
; for (j = 0; j < 100; j++) {
; *sum += A[i + j];
; }
; *sum *= 5;
; }
; }
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
define void @f(i32* noalias %A, i32* noalias %sum) {
entry:
br label %for.cond
for.cond: ; preds = %for.inc6, %entry
%i.0 = phi i32 [ 0, %entry ], [ %inc7, %for.inc6 ]
%exitcond1 = icmp ne i32 %i.0, 100
br i1 %exitcond1, label %for.body, label %for.end8
for.body: ; preds = %for.cond
%tmp = load i32* %sum, align 4
%mul = mul nsw i32 %tmp, 7
store i32 %mul, i32* %sum, align 4
br label %for.cond1
for.cond1: ; preds = %for.inc, %for.body
%j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
%exitcond = icmp ne i32 %j.0, 100
br i1 %exitcond, label %for.body3, label %for.end
for.body3: ; preds = %for.cond1
%add = add nsw i32 %i.0, %j.0
%arrayidx = getelementptr inbounds i32* %A, i32 %add
%tmp2 = load i32* %arrayidx, align 4
%tmp3 = load i32* %sum, align 4
%add4 = add nsw i32 %tmp3, %tmp2
store i32 %add4, i32* %sum, align 4
br label %for.inc
for.inc: ; preds = %for.body3
%inc = add nsw i32 %j.0, 1
br label %for.cond1
for.end: ; preds = %for.cond1
%tmp4 = load i32* %sum, align 4
%mul5 = mul nsw i32 %tmp4, 5
store i32 %mul5, i32* %sum, align 4
br label %for.inc6
for.inc6: ; preds = %for.end
%inc7 = add nsw i32 %i.0, 1
br label %for.cond
for.end8: ; preds = %for.cond
ret void
}

View File

@ -0,0 +1,72 @@
; RUN: opt -basicaa %loadPolly -polly-scops -analyze < %s | FileCheck %s
;
; CHECK: Stmt_for_body
; CHECK: Reduction like: 0
; CHECK: MemRef_sum_04
; CHECK: Reduction like: 0
; CHECK: MemRef_sum_12
; CHECK: Stmt_for_inc
; CHECK: Reduction like: 1
; CHECK: MemRef_sum_12
; CHECK: Reduction like: 0
; CHECK: MemRef_A
; CHECK: Reduction like: 1
; CHECK: MemRef_sum_12
; CHECK: Stmt_for_inc5
; CHECK: Reduction like: 0
; CHECK: MemRef_sum_12
; CHECK: Reduction like: 0
; CHECK: MemRef_sum_04
;
; int f(int * __restrict__ A) {
; int i, j, sum = 1;
; for (i = 0; i < 100; i++) {
; sum *= 7;
; for (j = 0; j < 100; j++) {
; sum += A[i+j];
; }
; }
; return sum;
; }
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define i32 @f(i32* noalias %A) {
entry:
%sum.04.reg2mem = alloca i32
%sum.12.reg2mem = alloca i32
br label %entry.split
entry.split: ; preds = %entry
store i32 0, i32* %sum.04.reg2mem
br label %for.body
for.body: ; preds = %for.inc5, %entry.split
%indvars.iv23 = phi i64 [ 0, %entry.split ], [ %3, %for.inc5 ]
%sum.04.reload = load i32* %sum.04.reg2mem
%mul = mul nsw i32 %sum.04.reload, 7
store i32 %mul, i32* %sum.12.reg2mem
br label %for.inc
for.inc: ; preds = %for.inc, %for.body
%indvars.iv1 = phi i64 [ 0, %for.body ], [ %1, %for.inc ]
%sum.12.reload = load i32* %sum.12.reg2mem
%0 = add i64 %indvars.iv23, %indvars.iv1
%arrayidx = getelementptr i32* %A, i64 %0
%tmp5 = load i32* %arrayidx, align 4
%add4 = add nsw i32 %tmp5, %sum.12.reload
%1 = add nuw nsw i64 %indvars.iv1, 1
%exitcond1 = icmp eq i64 %1, 100
store i32 %add4, i32* %sum.12.reg2mem
br i1 %exitcond1, label %for.inc5, label %for.inc
for.inc5: ; preds = %for.inc
%2 = load i32* %sum.12.reg2mem
%3 = add nuw nsw i64 %indvars.iv23, 1
%exitcond2 = icmp eq i64 %3, 100
store i32 %2, i32* %sum.04.reg2mem
br i1 %exitcond2, label %for.end7, label %for.body
for.end7: ; preds = %for.inc5
%4 = load i32* %sum.04.reg2mem
ret i32 %4
}

View File

@ -0,0 +1,98 @@
; RUN: opt -basicaa %loadPolly -polly-scops -analyze < %s | FileCheck %s
;
; CHECK: ReadAccess := [Reduction like: 0]
; CHECK: { Stmt_for_body[i0] -> MemRef_A[1 + i0] };
; CHECK: ReadAccess := [Reduction like: 0]
; CHECK: { Stmt_for_body[i0] -> MemRef_A[i0] };
; CHECK: MustWriteAccess := [Reduction like: 0]
; CHECK: { Stmt_for_body[i0] -> MemRef_first[0] };
; CHECK: ReadAccess := [Reduction like: 1]
; CHECK: { Stmt_for_body[i0] -> MemRef_sum[0] };
; CHECK: MustWriteAccess := [Reduction like: 1]
; CHECK: { Stmt_for_body[i0] -> MemRef_sum[0] };
; CHECK: ReadAccess := [Reduction like: 0]
; CHECK: { Stmt_for_body[i0] -> MemRef_A[-1 + i0] };
; CHECK: ReadAccess := [Reduction like: 0]
; CHECK: { Stmt_for_body[i0] -> MemRef_A[i0] };
; CHECK: MustWriteAccess := [Reduction like: 0]
; CHECK: { Stmt_for_body[i0] -> MemRef_middle[0] };
; CHECK: ReadAccess := [Reduction like: 1]
; CHECK: { Stmt_for_body[i0] -> MemRef_prod[0] };
; CHECK: MustWriteAccess := [Reduction like: 1]
; CHECK: { Stmt_for_body[i0] -> MemRef_prod[0] };
; CHECK: ReadAccess := [Reduction like: 0]
; CHECK: { Stmt_for_body[i0] -> MemRef_A[-1 + i0] };
; CHECK: ReadAccess := [Reduction like: 0]
; CHECK: { Stmt_for_body[i0] -> MemRef_A[1 + i0] };
; CHECK: MustWriteAccess := [Reduction like: 0]
; CHECK: { Stmt_for_body[i0] -> MemRef_last[0] };
;
; int first, sum, middle, prod, last;
;
; void f(int * restrict A) {
; int i;
; for (int i = 0; i < 100; i++) {
; first = A[i+1] + A[i];
; sum += i * 3;
; middle = A[i-1] + A[i];
; prod *= (i + 3);
; last = A[i-1] + A[i+1];
; }
; }
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
@first = common global i32 0, align 4
@sum = common global i32 0, align 4
@middle = common global i32 0, align 4
@prod = common global i32 0, align 4
@last = common global i32 0, align 4
define void @f(i32* noalias %A) {
entry:
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%i1.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
%exitcond = icmp ne i32 %i1.0, 100
br i1 %exitcond, label %for.body, label %for.end
for.body: ; preds = %for.cond
%add = add nsw i32 %i1.0, 1
%arrayidx = getelementptr inbounds i32* %A, i32 %add
%tmp = load i32* %arrayidx, align 4
%arrayidx2 = getelementptr inbounds i32* %A, i32 %i1.0
%tmp1 = load i32* %arrayidx2, align 4
%add3 = add nsw i32 %tmp, %tmp1
store i32 %add3, i32* @first, align 4
%mul = mul nsw i32 %i1.0, 3
%tmp2 = load i32* @sum, align 4
%add4 = add nsw i32 %tmp2, %mul
store i32 %add4, i32* @sum, align 4
%sub = add nsw i32 %i1.0, -1
%arrayidx5 = getelementptr inbounds i32* %A, i32 %sub
%tmp3 = load i32* %arrayidx5, align 4
%arrayidx6 = getelementptr inbounds i32* %A, i32 %i1.0
%tmp4 = load i32* %arrayidx6, align 4
%add7 = add nsw i32 %tmp3, %tmp4
store i32 %add7, i32* @middle, align 4
%add8 = add nsw i32 %i1.0, 3
%tmp5 = load i32* @prod, align 4
%mul9 = mul nsw i32 %tmp5, %add8
store i32 %mul9, i32* @prod, align 4
%sub10 = add nsw i32 %i1.0, -1
%arrayidx11 = getelementptr inbounds i32* %A, i32 %sub10
%tmp6 = load i32* %arrayidx11, align 4
%add12 = add nsw i32 %i1.0, 1
%arrayidx13 = getelementptr inbounds i32* %A, i32 %add12
%tmp7 = load i32* %arrayidx13, align 4
%add14 = add nsw i32 %tmp6, %tmp7
store i32 %add14, i32* @last, align 4
br label %for.inc
for.inc: ; preds = %for.body
%inc = add nsw i32 %i1.0, 1
br label %for.cond
for.end: ; preds = %for.cond
ret void
}

View File

@ -0,0 +1,60 @@
; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
;
; CHECK: Reduction like: 1
; CHECK: Reduction like: 1
; CHECK: Reduction like: 1
; CHECK: Reduction like: 1
;
; void f(int *sums) {
; for (int i = 0; i < 1024; i++) {
; for (int j = 0; j < 1024; j++) {
; sums[i] += 5;
; sums[i+1024] *= 5;
; }
; }
; }
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
define void @f(i32* %sums) {
entry:
br label %for.cond
for.cond: ; preds = %for.inc6, %entry
%i.0 = phi i32 [ 0, %entry ], [ %inc7, %for.inc6 ]
%exitcond1 = icmp ne i32 %i.0, 1024
br i1 %exitcond1, label %for.body, label %for.end8
for.body: ; preds = %for.cond
br label %for.cond1
for.cond1: ; preds = %for.inc, %for.body
%j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
%exitcond = icmp ne i32 %j.0, 1024
br i1 %exitcond, label %for.body3, label %for.end
for.body3: ; preds = %for.cond1
%arrayidx = getelementptr inbounds i32* %sums, i32 %i.0
%tmp = load i32* %arrayidx, align 4
%add = add nsw i32 %tmp, 5
store i32 %add, i32* %arrayidx, align 4
%add4 = add nsw i32 %i.0, 1024
%arrayidx5 = getelementptr inbounds i32* %sums, i32 %add4
%tmp2 = load i32* %arrayidx5, align 4
%mul = mul nsw i32 %tmp2, 5
store i32 %mul, i32* %arrayidx5, align 4
br label %for.inc
for.inc: ; preds = %for.body3
%inc = add nsw i32 %j.0, 1
br label %for.cond1
for.end: ; preds = %for.cond1
br label %for.inc6
for.inc6: ; preds = %for.end
%inc7 = add nsw i32 %i.0, 1
br label %for.cond
for.end8: ; preds = %for.cond
ret void
}

View File

@ -1,6 +1,6 @@
; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
;
; CHECK: Reduction like: 0
; CHECK: Reduction like: 1
;
; void f(int *sum) {
; for (int i = 0; i < 100; i++)

View File

@ -0,0 +1,66 @@
; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
;
; CHECK-NOT: Reduction like: 1
;
; Check that we do not mark these accesses as reduction like.
; We do this for the case the loads are modelt with the same LLVM-IR value and
; for the case there are different LLVM-IR values.
;
; void f(int *A) {
; for (int i = 0; i < 1024; i++)
; A[i] = A[i] + A[i];
; }
;
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
define void @f_one_load_case(i32* %A) {
entry:
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
%exitcond = icmp ne i32 %i.0, 1024
br i1 %exitcond, label %for.body, label %for.end
for.body: ; preds = %for.cond
%arrayidx = getelementptr inbounds i32* %A, i32 %i.0
%tmp = load i32* %arrayidx, align 4
%add = add nsw i32 %tmp, %tmp
%arrayidx2 = getelementptr inbounds i32* %A, i32 %i.0
store i32 %add, i32* %arrayidx2, align 4
br label %for.inc
for.inc: ; preds = %for.body
%inc = add nsw i32 %i.0, 1
br label %for.cond
for.end: ; preds = %for.cond
ret void
}
define void @f_two_loads_case(i32* %A) {
entry:
br label %for.cond
for.cond: ; preds = %for.inc, %entry
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
%exitcond = icmp ne i32 %i.0, 1024
br i1 %exitcond, label %for.body, label %for.end
for.body: ; preds = %for.cond
%arrayidx = getelementptr inbounds i32* %A, i32 %i.0
%tmp = load i32* %arrayidx, align 4
%arrayidxCopy = getelementptr inbounds i32* %A, i32 %i.0
%tmpCopy = load i32* %arrayidxCopy, align 4
%add = add nsw i32 %tmp, %tmpCopy
%arrayidx2 = getelementptr inbounds i32* %A, i32 %i.0
store i32 %add, i32* %arrayidx2, align 4
br label %for.inc
for.inc: ; preds = %for.body
%inc = add nsw i32 %i.0, 1
br label %for.cond
for.end: ; preds = %for.cond
ret void
}