Allow multiple reductions per statement

Iterate over all store memory accesses and check for valid binary reduction candidate loads by following the operands of the stored value. For each candidate pair we check if they have the same base address and there are no other accesses which may overlap with them. This ensures that no intermediate value can escape into other memory locations or is overwritten at some point. + 17 test cases for reduction detection and reduction dependency modeling llvm-svn: 211957
2025-01-16 05:01:56 +00:00 · 2014-06-27 20:31:28 +00:00 · 2014-06-27 20:31:28 +00:00 · e58a012094
commit e58a012094
parent e1c1138a38
22 changed files with 1305 additions and 24 deletions
--- a/polly/include/polly/ScopInfo.h
+++ b/polly/include/polly/ScopInfo.h
@ -324,7 +324,14 @@ class ScopStmt {
  __isl_give isl_set *buildDomain(TempScop &tempScop, const Region &CurRegion);
  void buildScattering(SmallVectorImpl<unsigned> &Scatter);
  void buildAccesses(TempScop &tempScop, const Region &CurRegion);
-  void checkForReduction();
+
+  /// @brief Detect and mark reductions in the ScopStmt
+  void checkForReductions();
+
+  /// @brief Collect loads which might form a reduction chain with @p StoreMA
+  void
+  collectCandiateReductionLoads(MemoryAccess *StoreMA,
+                                llvm::SmallVectorImpl<MemoryAccess *> &Loads);
  //@}

  /// Create the ScopStmt from a BasicBlock.
--- a/polly/lib/Analysis/Dependences.cpp
+++ b/polly/lib/Analysis/Dependences.cpp
@ -308,7 +308,6 @@ void Dependences::calculateDependences(Scop &S) {
      isl_map *Identity =
          isl_map_from_domain_and_range(isl_set_copy(AccDomW), AccDomW);
      RED = isl_union_map_add_map(RED, Identity);
-      break;
    }
  }

--- a/polly/lib/Analysis/ScopInfo.cpp
+++ b/polly/lib/Analysis/ScopInfo.cpp
@ -706,45 +706,125 @@ ScopStmt::ScopStmt(Scop &parent, TempScop &tempScop, const Region &CurRegion,
  Domain = buildDomain(tempScop, CurRegion);
  buildScattering(Scatter);
  buildAccesses(tempScop, CurRegion);
-  checkForReduction();
+  checkForReductions();
 }

-void ScopStmt::checkForReduction() {
-  // Skip statements with more than one binary reduction
-  if (MemAccs.size() != 2)
-    return;
-
-  // Skip if there is not exactly one load and one store
-  unsigned LoadIdx = MemAccs[0]->isRead() ? 0 : 1;
-  auto *Load = dyn_cast<LoadInst>(MemAccs[LoadIdx]->getAccessInstruction());
-  auto *Store =
-      dyn_cast<StoreInst>(MemAccs[1 - LoadIdx]->getAccessInstruction());
-  if (!Load || !Store ||
-      Load->getPointerOperand() != Store->getPointerOperand())
+/// @brief Collect loads which might form a reduction chain with @p StoreMA
+///
+/// Check if the stored value for @p StoreMA is a binary operator with one or
+/// two loads as operands. If the binary operand is commutative & associative,
+/// used only once (by @p StoreMA) and its load operands are also used only
+/// once, we have found a possible reduction chain. It starts at an operand
+/// load and includes the binary operator and @p StoreMA.
+///
+/// Note: We allow only one use to ensure the load and binary operator cannot
+///       escape this block or into any other store except @p StoreMA.
+void ScopStmt::collectCandiateReductionLoads(
+    MemoryAccess *StoreMA, SmallVectorImpl<MemoryAccess *> &Loads) {
+  auto *Store = dyn_cast<StoreInst>(StoreMA->getAccessInstruction());
+  if (!Store)
    return;

  // Skip if there is not one binary operator between the load and the store
  auto *BinOp = dyn_cast<BinaryOperator>(Store->getValueOperand());
-  if (!BinOp || (BinOp->getOperand(0) != Load && BinOp->getOperand(1) != Load))
+  if (!BinOp)
+    return;
+
+  // Skip if the binary operators has multiple uses
+  if (BinOp->getNumUses() != 1)
    return;

  // Skip if the opcode of the binary operator is not commutative/associative
  if (!BinOp->isCommutative() || !BinOp->isAssociative())
    return;

-  // Skip if the load has multiple uses
-  if (Load->getNumUses() != 1)
-    return;
-
  // Skip if it is a multiplicative reduction and we disabled them
  if (DisableMultiplicativeReductions &&
      (BinOp->getOpcode() == Instruction::Mul ||
       BinOp->getOpcode() == Instruction::FMul))
    return;

-  // Valid reduction like access
-  MemAccs[0]->markReductionLike();
-  MemAccs[1]->markReductionLike();
+  // Check the binary operator operands for a candidate load
+  auto *PossibleLoad0 = dyn_cast<LoadInst>(BinOp->getOperand(0));
+  auto *PossibleLoad1 = dyn_cast<LoadInst>(BinOp->getOperand(1));
+  if (!PossibleLoad0 && !PossibleLoad1)
+    return;
+
+  // A load is only a candidate if it cannot escape (thus has only this use)
+  if (PossibleLoad0 && PossibleLoad0->getNumUses() == 1)
+    Loads.push_back(lookupAccessFor(PossibleLoad0));
+  if (PossibleLoad1 && PossibleLoad1->getNumUses() == 1)
+    Loads.push_back(lookupAccessFor(PossibleLoad1));
+}
+
+/// @brief Check for reductions in this ScopStmt
+///
+/// Iterate over all store memory accesses and check for valid binary reduction
+/// like chains. For all candidates we check if they have the same base address
+/// and there are no other accesses which overlap with them. The base address
+/// check rules out impossible reductions candidates early. The overlap check,
+/// together with the "only one user" check in collectCandiateReductionLoads,
+/// guarantees that none of the intermediate results will escape during
+/// execution of the loop nest. We basically check here that no other memory
+/// access can access the same memory as the potential reduction.
+void ScopStmt::checkForReductions() {
+  SmallVector<MemoryAccess *, 2> Loads;
+  SmallVector<std::pair<MemoryAccess *, MemoryAccess *>, 4> Candidates;
+
+  // First collect candidate load-store reduction chains by iterating over all
+  // stores and collecting possible reduction loads.
+  for (MemoryAccess *StoreMA : MemAccs) {
+    if (StoreMA->isRead())
+      continue;
+
+    Loads.clear();
+    collectCandiateReductionLoads(StoreMA, Loads);
+    for (MemoryAccess *LoadMA : Loads)
+      Candidates.push_back(std::make_pair(LoadMA, StoreMA));
+  }
+
+  // Then check each possible candidate pair.
+  for (const auto &CandidatePair : Candidates) {
+    bool Valid = true;
+    isl_map *LoadAccs = CandidatePair.first->getAccessRelation();
+    isl_map *StoreAccs = CandidatePair.second->getAccessRelation();
+
+    // Skip those with obviously unequal base addresses.
+    if (!isl_map_has_equal_space(LoadAccs, StoreAccs)) {
+      isl_map_free(LoadAccs);
+      isl_map_free(StoreAccs);
+      continue;
+    }
+
+    // And check if the remaining for overlap with other memory accesses.
+    isl_map *AllAccsRel = isl_map_union(LoadAccs, StoreAccs);
+    AllAccsRel = isl_map_intersect_domain(AllAccsRel, getDomain());
+    isl_set *AllAccs = isl_map_range(AllAccsRel);
+
+    for (MemoryAccess *MA : MemAccs) {
+      if (MA == CandidatePair.first || MA == CandidatePair.second)
+        continue;
+
+      isl_map *AccRel =
+          isl_map_intersect_domain(MA->getAccessRelation(), getDomain());
+      isl_set *Accs = isl_map_range(AccRel);
+
+      if (isl_set_has_equal_space(AllAccs, Accs) || isl_set_free(Accs)) {
+        isl_set *OverlapAccs = isl_set_intersect(Accs, isl_set_copy(AllAccs));
+        Valid = Valid && isl_set_is_empty(OverlapAccs);
+        isl_set_free(OverlapAccs);
+      }
+    }
+
+    isl_set_free(AllAccs);
+    if (!Valid)
+      continue;
+
+    // If no overlapping access was found we mark the load and store as
+    // reduction like.
+    CandidatePair.first->markReductionLike();
+    CandidatePair.second->markReductionLike();
+  }
 }

 std::string ScopStmt::getDomainStr() const { return stringFromIslObj(Domain); }
--- a/polly/test/Dependences/reduction_complex_location.ll
+++ b/polly/test/Dependences/reduction_complex_location.ll
@ -0,0 +1,59 @@
+; RUN: opt -basicaa %loadPolly -polly-dependences -analyze < %s | FileCheck %s
+;
+; CHECK: RAW dependences:
+; CHECK:   {  }
+; CHECK: WAR dependences:
+; CHECK:   {  }
+; CHECK: WAW dependences:
+; CHECK:   {  }
+; CHECK: Reduction dependences:
+; CHECK:   { Stmt_for_body3[i0, i1] -> Stmt_for_body3[2 + i0, -1 + i1] : i0 <= 97 and i0 >= 0 and i1 <= 99 and i1 >= 1 }
+;
+; void f(int *sum) {
+;   for (int i = 0; i < 100; i++)
+;     for (int j = 0; j < 100; j++)
+;       sum[i+j*2] += j * i;
+; }
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+define void @f(i32* %sum) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc6, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc7, %for.inc6 ]
+  %exitcond1 = icmp ne i32 %i.0, 100
+  br i1 %exitcond1, label %for.body, label %for.end8
+
+for.body:                                         ; preds = %for.cond
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.inc, %for.body
+  %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
+  %exitcond = icmp ne i32 %j.0, 100
+  br i1 %exitcond, label %for.body3, label %for.end
+
+for.body3:                                        ; preds = %for.cond1
+  %mul = mul nsw i32 %j.0, %i.0
+  %mul4 = shl nsw i32 %j.0, 1
+  %add = add nsw i32 %i.0, %mul4
+  %arrayidx = getelementptr inbounds i32* %sum, i32 %add
+  %tmp = load i32* %arrayidx, align 4
+  %add5 = add nsw i32 %tmp, %mul
+  store i32 %add5, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body3
+  %inc = add nsw i32 %j.0, 1
+  br label %for.cond1
+
+for.end:                                          ; preds = %for.cond1
+  br label %for.inc6
+
+for.inc6:                                         ; preds = %for.end
+  %inc7 = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end8:                                         ; preds = %for.cond
+  ret void
+}
--- a/polly/test/Dependences/reduction_mixed_reduction_and_non_reduction_dependences.ll
+++ b/polly/test/Dependences/reduction_mixed_reduction_and_non_reduction_dependences.ll
@ -0,0 +1,60 @@
+; RUN: opt %loadPolly -polly-dependences -analyze < %s | FileCheck %s
+;
+; CHECK: RAW dependences:
+; CHECK-DAG: Stmt_for_body3[i0, 1] -> Stmt_for_body3[1 + i0, o1] : i0 <= 1022 and i0 >= 0 and o1 <= 511 and o1 >= 1
+; CHECK-DAG: Stmt_for_body3[i0, 0] -> Stmt_for_body3[i0, o1] : i0 <= 1023 and i0 >= 0 and o1 <= 511 and o1 >= 1
+; CHECK: WAR dependences:
+; CHECK:   {  }
+; CHECK: WAW dependences:
+; CHECK-DAG: Stmt_for_body3[i0, i1] -> Stmt_for_body3[1 + i0, -1 + i1] : i0 <= 1022 and i0 >= 0 and i1 <= 511 and i1 >= 2
+; CHECK-DAG: Stmt_for_body3[i0, 2] -> Stmt_for_body3[2 + i0, 0] : i0 <= 1021 and i0 >= 0
+; CHECK: Reduction dependences:
+; CHECK:   { Stmt_for_body3[i0, 1] -> Stmt_for_body3[1 + i0, 0] : i0 >= 0 and i0 <= 1022 }
+;
+; void f(int *sum) {
+;   for (int i = 0; i < 1024; i++)
+;     for (int j = 0; j < 512; j++)
+;       sum[i + j] = sum[i] + 3;
+; }
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+define void @f(i32* %sum) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc6, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc7, %for.inc6 ]
+  %exitcond1 = icmp ne i32 %i.0, 1024
+  br i1 %exitcond1, label %for.body, label %for.end8
+
+for.body:                                         ; preds = %for.cond
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.inc, %for.body
+  %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
+  %exitcond = icmp ne i32 %j.0, 512
+  br i1 %exitcond, label %for.body3, label %for.end
+
+for.body3:                                        ; preds = %for.cond1
+  %arrayidx = getelementptr inbounds i32* %sum, i32 %i.0
+  %tmp = load i32* %arrayidx, align 4
+  %add = add nsw i32 %tmp, 3
+  %add4 = add nsw i32 %i.0, %j.0
+  %arrayidx5 = getelementptr inbounds i32* %sum, i32 %add4
+  store i32 %add, i32* %arrayidx5, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body3
+  %inc = add nsw i32 %j.0, 1
+  br label %for.cond1
+
+for.end:                                          ; preds = %for.cond1
+  br label %for.inc6
+
+for.inc6:                                         ; preds = %for.end
+  %inc7 = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end8:                                         ; preds = %for.cond
+  ret void
+}
--- a/polly/test/Dependences/reduction_multiple_loops_array_sum.ll
+++ b/polly/test/Dependences/reduction_multiple_loops_array_sum.ll
@ -0,0 +1,77 @@
+; RUN: opt -basicaa %loadPolly -polly-dependences -analyze < %s | FileCheck %s
+;
+; Verify that only the inner reduction like accesses cause reduction dependences
+;
+; CHECK: Reduction dependences:
+; CHECK:   { Stmt_for_body3[i0, i1] -> Stmt_for_body3[i0, 1 + i1] : i0 <= 99 and i0 >= 0 and i1 <= 98 and i1 >= 0 }
+;
+; void f(int * restrict A, int * restrict sum) {
+;   int i, j, k;
+;   for (i = 0; i < 100; i++) {
+;     *sum *= 7;
+;     for (j = 0; j < 100; j++) {
+;       *sum += A[i+j];
+;       for (k = 0; k< 100; k++) {}
+;     }
+;   }
+; }
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+define void @f(i32* noalias %A, i32* noalias %sum) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc11, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc12, %for.inc11 ]
+  %exitcond2 = icmp ne i32 %i.0, 100
+  br i1 %exitcond2, label %for.body, label %for.end13
+
+for.body:                                         ; preds = %for.cond
+  %tmp = load i32* %sum, align 4
+  %mul = mul nsw i32 %tmp, 7
+  store i32 %mul, i32* %sum, align 4
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.inc8, %for.body
+  %j.0 = phi i32 [ 0, %for.body ], [ %inc9, %for.inc8 ]
+  %exitcond1 = icmp ne i32 %j.0, 100
+  br i1 %exitcond1, label %for.body3, label %for.end10
+
+for.body3:                                        ; preds = %for.cond1
+  %add = add nsw i32 %i.0, %j.0
+  %arrayidx = getelementptr inbounds i32* %A, i32 %add
+  %tmp3 = load i32* %arrayidx, align 4
+  %tmp4 = load i32* %sum, align 4
+  %add4 = add nsw i32 %tmp4, %tmp3
+  store i32 %add4, i32* %sum, align 4
+  br label %for.cond5
+
+for.cond5:                                        ; preds = %for.inc, %for.body3
+  %k.0 = phi i32 [ 0, %for.body3 ], [ %inc, %for.inc ]
+  %exitcond = icmp ne i32 %k.0, 100
+  br i1 %exitcond, label %for.body7, label %for.end
+
+for.body7:                                        ; preds = %for.cond5
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body7
+  %inc = add nsw i32 %k.0, 1
+  br label %for.cond5
+
+for.end:                                          ; preds = %for.cond5
+  br label %for.inc8
+
+for.inc8:                                         ; preds = %for.end
+  %inc9 = add nsw i32 %j.0, 1
+  br label %for.cond1
+
+for.end10:                                        ; preds = %for.cond1
+  br label %for.inc11
+
+for.inc11:                                        ; preds = %for.end10
+  %inc12 = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end13:                                        ; preds = %for.cond
+  ret void
+}
--- a/polly/test/Dependences/reduction_multiple_loops_array_sum_2.ll
+++ b/polly/test/Dependences/reduction_multiple_loops_array_sum_2.ll
@ -0,0 +1,79 @@
+; RUN: opt %loadPolly -polly-dependences -analyze -basicaa < %s | FileCheck %s
+;
+; CHECK: RAW dependences:
+; CHECK:   {  }
+; CHECK: WAR dependences:
+; CHECK:   {  }
+; CHECK: WAW dependences:
+; CHECK:   {  }
+; CHECK: Reduction dependences:
+; CHECK-DAG: Stmt_for_body3[i0, i1] -> Stmt_for_body3[i0, 1 + i1] : i0 <= 99 and i0 >= 0 and i1 <= 98 and i1 >= 0
+; CHECK-DAG: Stmt_for_body3[i0, 99] -> Stmt_for_body3[1 + i0, 0] : i0 <= 98 and i0 >= 0
+;
+; int f(int * restrict A, int * restrict sum) {
+;   int i, j, k;
+;   for (i = 0; i < 100; i++) {
+;     for (j = 0; j < 100; j++) {
+;       sum += A[i+j];
+;       for (k = 0; k< 100; k++) {}
+;     }
+;   }
+;   return sum;
+; }
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+define void @f(i32* noalias %A, i32* noalias %sum) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc11, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc12, %for.inc11 ]
+  %exitcond2 = icmp ne i32 %i.0, 100
+  br i1 %exitcond2, label %for.body, label %for.end13
+
+for.body:                                         ; preds = %for.cond
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.inc8, %for.body
+  %j.0 = phi i32 [ 0, %for.body ], [ %inc9, %for.inc8 ]
+  %exitcond1 = icmp ne i32 %j.0, 100
+  br i1 %exitcond1, label %for.body3, label %for.end10
+
+for.body3:                                        ; preds = %for.cond1
+  %add = add nsw i32 %i.0, %j.0
+  %arrayidx = getelementptr inbounds i32* %A, i32 %add
+  %tmp3 = load i32* %arrayidx, align 4
+  %tmp4 = load i32* %sum, align 4
+  %add4 = add nsw i32 %tmp4, %tmp3
+  store i32 %add4, i32* %sum, align 4
+  br label %for.cond5
+
+for.cond5:                                        ; preds = %for.inc, %for.body3
+  %k.0 = phi i32 [ 0, %for.body3 ], [ %inc, %for.inc ]
+  %exitcond = icmp ne i32 %k.0, 100
+  br i1 %exitcond, label %for.body7, label %for.end
+
+for.body7:                                        ; preds = %for.cond5
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body7
+  %inc = add nsw i32 %k.0, 1
+  br label %for.cond5
+
+for.end:                                          ; preds = %for.cond5
+  br label %for.inc8
+
+for.inc8:                                         ; preds = %for.end
+  %inc9 = add nsw i32 %j.0, 1
+  br label %for.cond1
+
+for.end10:                                        ; preds = %for.cond1
+  br label %for.inc11
+
+for.inc11:                                        ; preds = %for.end10
+  %inc12 = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end13:                                        ; preds = %for.cond
+  ret void
+}
--- a/polly/test/Dependences/reduction_multiple_loops_array_sum_3.ll
+++ b/polly/test/Dependences/reduction_multiple_loops_array_sum_3.ll
@ -0,0 +1,73 @@
+; RUN: opt %loadPolly -polly-dependences -analyze -basicaa < %s | FileCheck %s
+;
+; CHECK: Reduction dependences:
+; CHECK:   { Stmt_for_inc[i0, i1] -> Stmt_for_inc[i0, 1 + i1] : i0 <= 99 and i0 >= 0 and i1 <= 98 and i1 >= 0 }
+;
+; int f(int * __restrict__ A) {
+;   int i, j, sum = 0;
+;   for (k = 0; k < 37; k = g(k)) {
+;     for (i = 0; i < 100; i++) {
+;       sum *= 2;
+;       for (j = 0; j < 100; j++) {
+;         sum += A[i+j];
+;       }
+;     }
+;   }
+;   return sum;
+; }
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+declare i64 @g(i64)
+
+define i32 @f(i32* noalias %A) {
+entry:
+  %sum.04.reg2mem = alloca i32
+  %sum.12.reg2mem = alloca i32
+  br label %entry.split
+
+entry.split:                                      ; preds = %entry
+  store i32 0, i32* %sum.04.reg2mem
+  br label %for.body_outer_split
+
+for.body_outer_split:                             ; preds = %entry.split, %for.inc5
+  %indvars.ivK = phi i64 [ 0, %entry.split ], [ %incK, %for.bos2 ]
+  br label %for.body_outer
+
+for.body_outer:                                   ; preds = %for.body_outer_split
+  %incK = call i64 @g(i64 %indvars.ivK)
+  %exitcondK = icmp eq i64 %incK, 100
+  br i1 %exitcondK, label %for.end7, label %for.body
+
+for.body:                                         ; preds = %for.inc5, %for.body_outer
+  %indvars.iv23 = phi i64 [ 0, %for.body_outer ], [ %3, %for.inc5 ]
+  %sum.04.reload = load i32* %sum.04.reg2mem
+  %mul = shl nsw i32 %sum.04.reload, 1
+  store i32 %mul, i32* %sum.12.reg2mem
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.inc, %for.body
+  %indvars.iv1 = phi i64 [ 0, %for.body ], [ %1, %for.inc ]
+  %sum.12.reload = load i32* %sum.12.reg2mem
+  %0 = add i64 %indvars.iv23, %indvars.iv1
+  %arrayidx = getelementptr i32* %A, i64 %0
+  %tmp5 = load i32* %arrayidx, align 4
+  %add4 = add nsw i32 %tmp5, %sum.12.reload
+  %1 = add nuw nsw i64 %indvars.iv1, 1
+  %exitcond1 = icmp eq i64 %1, 100
+  store i32 %add4, i32* %sum.12.reg2mem
+  br i1 %exitcond1, label %for.inc5, label %for.inc
+
+for.inc5:                                         ; preds = %for.inc
+  %2 = load i32* %sum.12.reg2mem
+  %3 = add nuw nsw i64 %indvars.iv23, 1
+  %exitcond2 = icmp eq i64 %3, 100
+  store i32 %2, i32* %sum.04.reg2mem
+  br i1 %exitcond2, label %for.bos2, label %for.body
+
+for.bos2:
+  br label %for.body_outer_split
+
+for.end7:                                         ; preds = %for.inc5
+  %4 = load i32* %sum.04.reg2mem
+  ret i32 %4
+}
--- a/polly/test/Dependences/reduction_partially_escaping_intermediate_in_other_stmt.ll
+++ b/polly/test/Dependences/reduction_partially_escaping_intermediate_in_other_stmt.ll
@ -0,0 +1,69 @@
+; RUN: opt %loadPolly -polly-dependences -analyze -basicaa < %s | FileCheck %s
+;
+; CHECK: Reduction dependences:
+; CHECK:   [N] -> { Stmt_for_body3[i0, i1] -> Stmt_for_body3[i0, 1 + i1] : i0 <= 1023 and i0 >= 0 and i1 <= 1022 and i1 >= 0 and i1 >= 1024 - N + i0 }
+;
+; void f(int N, int * restrict sums, int * restrict escape) {
+;   for (int i = 0; i < 1024; i++) {
+;     for (int j = 0; j < 1024; j++) {
+;       sums[i] += 5;
+;       if (N - i + j < 1024)
+;         escape[N - i + j] = sums[i];
+;     }
+;   }
+; }
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+define void @f(i32 %N, i32* noalias %sums, i32* noalias %escape) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc10, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc11, %for.inc10 ]
+  %exitcond1 = icmp ne i32 %i.0, 1024
+  br i1 %exitcond1, label %for.body, label %for.end12
+
+for.body:                                         ; preds = %for.cond
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.inc, %for.body
+  %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
+  %exitcond = icmp ne i32 %j.0, 1024
+  br i1 %exitcond, label %for.body3, label %for.end
+
+for.body3:                                        ; preds = %for.cond1
+  %arrayidx = getelementptr inbounds i32* %sums, i32 %i.0
+  %tmp = load i32* %arrayidx, align 4
+  %add = add nsw i32 %tmp, 5
+  store i32 %add, i32* %arrayidx, align 4
+  %sub = sub nsw i32 %N, %i.0
+  %add4 = add nsw i32 %sub, %j.0
+  %cmp5 = icmp slt i32 %add4, 1024
+  br i1 %cmp5, label %if.then, label %if.end
+
+if.then:                                          ; preds = %for.body3
+  %arrayidx6 = getelementptr inbounds i32* %sums, i32 %i.0
+  %tmp2 = load i32* %arrayidx6, align 4
+  %sub7 = sub nsw i32 %N, %i.0
+  %add8 = add nsw i32 %sub7, %j.0
+  %arrayidx9 = getelementptr inbounds i32* %escape, i32 %add8
+  store i32 %tmp2, i32* %arrayidx9, align 4
+  br label %if.end
+
+if.end:                                           ; preds = %if.then, %for.body3
+  br label %for.inc
+
+for.inc:                                          ; preds = %if.end
+  %inc = add nsw i32 %j.0, 1
+  br label %for.cond1
+
+for.end:                                          ; preds = %for.cond1
+  br label %for.inc10
+
+for.inc10:                                        ; preds = %for.end
+  %inc11 = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end12:                                        ; preds = %for.cond
+  ret void
+}
--- a/polly/test/Dependences/reduction_two_reductions_different_rloops.ll
+++ b/polly/test/Dependences/reduction_two_reductions_different_rloops.ll
@ -0,0 +1,73 @@
+; RUN: opt %loadPolly -basicaa -polly-dependences -analyze < %s | FileCheck %s
+;
+; CHECK: RAW dependences:
+; CHECK:   {  }
+; CHECK: WAR dependences:
+; CHECK:   {  }
+; CHECK: WAW dependences:
+; CHECK:   {  }
+; CHECK: Reduction dependences:
+; CHECK-DAG: Stmt_for_body3[i0, i1] -> Stmt_for_body3[i0, 1 + i1] : i0 <= 1023 and i0 >= 0 and i1 <= 1022 and i1 >= 0
+; CHECK-DAG: Stmt_for_body3[i0, i1] -> Stmt_for_body3[1 + i0, i1] : i0 <= 1022 and i0 >= 0 and i1 <= 1023 and i1 >= 0
+;
+; void f(int *restrict A, int *restrict B, int *restrict Values) {
+;   for (int i = 0; i < 1024; i++) {
+;     for (int j = 0; j < 1024; j++) {
+;       A[i] += Values[i + j - 1];
+;       B[j] += Values[i + j + 42];
+;     }
+;   }
+; }
+;
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+define void @f(i32* noalias %A, i32* noalias %B, i32* noalias %Values)  {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc11, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc12, %for.inc11 ]
+  %exitcond1 = icmp ne i32 %i.0, 1024
+  br i1 %exitcond1, label %for.body, label %for.end13
+
+for.body:                                         ; preds = %for.cond
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.inc, %for.body
+  %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
+  %exitcond = icmp ne i32 %j.0, 1024
+  br i1 %exitcond, label %for.body3, label %for.end
+
+for.body3:                                        ; preds = %for.cond1
+  %add = add nsw i32 %i.0, %j.0
+  %sub = add nsw i32 %add, -1
+  %arrayidx = getelementptr inbounds i32* %Values, i32 %sub
+  %tmp = load i32* %arrayidx, align 4
+  %arrayidx4 = getelementptr inbounds i32* %A, i32 %i.0
+  %tmp2 = load i32* %arrayidx4, align 4
+  %add5 = add nsw i32 %tmp2, %tmp
+  store i32 %add5, i32* %arrayidx4, align 4
+  %add6 = add nsw i32 %i.0, %j.0
+  %add7 = add nsw i32 %add6, 42
+  %arrayidx8 = getelementptr inbounds i32* %Values, i32 %add7
+  %tmp3 = load i32* %arrayidx8, align 4
+  %arrayidx9 = getelementptr inbounds i32* %B, i32 %j.0
+  %tmp4 = load i32* %arrayidx9, align 4
+  %add10 = add nsw i32 %tmp4, %tmp3
+  store i32 %add10, i32* %arrayidx9, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body3
+  %inc = add nsw i32 %j.0, 1
+  br label %for.cond1
+
+for.end:                                          ; preds = %for.cond1
+  br label %for.inc11
+
+for.inc11:                                        ; preds = %for.end
+  %inc12 = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end13:                                        ; preds = %for.cond
+  ret void
+}
--- a/polly/test/ScopInfo/reduction_alternating_base.ll
+++ b/polly/test/ScopInfo/reduction_alternating_base.ll
@ -0,0 +1,38 @@
+; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
+;
+; FIXME: We cannot detect this SCoP yet but as soon as we can we should check
+;        that the reduction is detected!
+;
+; CHECK-NOT: Scattering
+;
+;    void f(int *A) {
+;      for (int i = 0; i < 1024; i++)
+;        A[i % 2] += i;
+;    }
+;
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+define void @f(i32* %A) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %exitcond = icmp ne i32 %i.0, 1024
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %rem = srem i32 %i.0, 2
+  %arrayidx = getelementptr inbounds i32* %A, i32 %rem
+  %tmp = load i32* %arrayidx, align 4
+  %add = add nsw i32 %tmp, %i.0
+  store i32 %add, i32* %arrayidx, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
--- a/polly/test/ScopInfo/reduction_disabled_multiplicative.ll
+++ b/polly/test/ScopInfo/reduction_disabled_multiplicative.ll
@ -0,0 +1,52 @@
+; RUN: opt -basicaa %loadPolly -polly-scops -analyze -polly-disable-multiplicative-reductions < %s | FileCheck %s
+;
+; CHECK: ReadAccess :=       [Reduction like: 1]
+; CHECK:     { Stmt_for_body[i0] -> MemRef_sum[0] };
+; CHECK: MustWriteAccess :=  [Reduction like: 1]
+; CHECK:     { Stmt_for_body[i0] -> MemRef_sum[0] };
+; CHECK: ReadAccess :=       [Reduction like: 0]
+; CHECK:     { Stmt_for_body[i0] -> MemRef_prod[0] };
+; CHECK: MustWriteAccess :=  [Reduction like: 0]
+; CHECK:     { Stmt_for_body[i0] -> MemRef_prod[0] };
+;
+; int sum, prod;
+; 
+; void f() {
+;   int i;
+;   for (int i = 0; i < 100; i++) {
+;     sum += i * 3;
+;     prod *= (i + 3);
+;   }
+; }
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+@sum = common global i32 0, align 4
+@prod = common global i32 0, align 4
+
+define void @f() #0 {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %i1.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %exitcond = icmp ne i32 %i1.0, 100
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %mul = mul nsw i32 %i1.0, 3
+  %tmp = load i32* @sum, align 4
+  %add = add nsw i32 %tmp, %mul
+  store i32 %add, i32* @sum, align 4
+  %add2 = add nsw i32 %i1.0, 3
+  %tmp1 = load i32* @prod, align 4
+  %mul3 = mul nsw i32 %tmp1, %add2
+  store i32 %mul3, i32* @prod, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %inc = add nsw i32 %i1.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
--- a/polly/test/ScopInfo/reduction_escaping_intermediate.ll
+++ b/polly/test/ScopInfo/reduction_escaping_intermediate.ll
@ -0,0 +1,62 @@
+; RUN: opt %loadPolly -basicaa -polly-scops -analyze < %s | FileCheck %s
+;
+; void f(int N, int * restrict sums, int * restrict escape) {
+;   int i, j;
+;   for (i = 0; i < 1024; i++) {
+;     for (j = 0; j < 1024; j++) {
+;       sums[i] += 5;
+;       escape[N - i + j] = sums[i];
+;     }
+;   }
+; }
+;
+; CHECK: Reduction like: 0
+; CHECK: sums
+; CHECK: Reduction like: 0
+; CHECK: sums
+; CHECK: Reduction like: 0
+; CHECK: escape
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+define void @f(i32 %N, i32* noalias %sums, i32* noalias %escape) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc7, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc8, %for.inc7 ]
+  %exitcond1 = icmp ne i32 %i.0, 1024
+  br i1 %exitcond1, label %for.body, label %for.end9
+
+for.body:                                         ; preds = %for.cond
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.inc, %for.body
+  %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
+  %exitcond = icmp ne i32 %j.0, 1024
+  br i1 %exitcond, label %for.body3, label %for.end
+
+for.body3:                                        ; preds = %for.cond1
+  %arrayidx = getelementptr inbounds i32* %sums, i32 %i.0
+  %tmp = load i32* %arrayidx, align 4
+  %add = add nsw i32 %tmp, 5
+  store i32 %add, i32* %arrayidx, align 4
+  %sub = sub nsw i32 %N, %i.0
+  %add5 = add nsw i32 %sub, %j.0
+  %arrayidx6 = getelementptr inbounds i32* %escape, i32 %add5
+  store i32 %add, i32* %arrayidx6, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body3
+  %inc = add nsw i32 %j.0, 1
+  br label %for.cond1
+
+for.end:                                          ; preds = %for.cond1
+  br label %for.inc7
+
+for.inc7:                                         ; preds = %for.end
+  %inc8 = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end9:                                         ; preds = %for.cond
+  ret void
+}
--- a/polly/test/ScopInfo/reduction_escaping_intermediate_2.ll
+++ b/polly/test/ScopInfo/reduction_escaping_intermediate_2.ll
@ -0,0 +1,72 @@
+; RUN: opt %loadPolly -basicaa -polly-scops -analyze < %s | FileCheck %s
+;
+; void f(int N, int * restrict sums, int * restrict escape) {
+;   int i, j;
+;   for (i = 0; i < 1024; i++) {
+;     for (j = 0; j < 1024; j++) {
+;       sums[i] += 5;
+;       escape[N-j] = escape[i] + sums[i-1];
+;     }
+;   }
+; }
+;
+; CHECK: Reduction like: 0
+; CHECK: sums
+; CHECK: Reduction like: 0
+; CHECK: sums
+; CHECK: Reduction like: 0
+; CHECK: escape
+; CHECK: Reduction like: 0
+; CHECK: sums
+; CHECK: Reduction like: 0
+; CHECK: escape
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+define void @f(i32 %N, i32* noalias %sums, i32* noalias %escape) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc10, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc11, %for.inc10 ]
+  %exitcond1 = icmp ne i32 %i.0, 1024
+  br i1 %exitcond1, label %for.body, label %for.end12
+
+for.body:                                         ; preds = %for.cond
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.inc, %for.body
+  %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
+  %exitcond = icmp ne i32 %j.0, 1024
+  br i1 %exitcond, label %for.body3, label %for.end
+
+for.body3:                                        ; preds = %for.cond1
+  %arrayidx = getelementptr inbounds i32* %sums, i32 %i.0
+  %tmp = load i32* %arrayidx, align 4
+  %add = add nsw i32 %tmp, 5
+  store i32 %add, i32* %arrayidx, align 4
+  %arrayidx4 = getelementptr inbounds i32* %escape, i32 %i.0
+  %tmp2 = load i32* %arrayidx4, align 4
+  %sub = add nsw i32 %i.0, -1
+  %arrayidx5 = getelementptr inbounds i32* %sums, i32 %sub
+  %tmp3 = load i32* %arrayidx5, align 4
+  %add6 = add nsw i32 %tmp2, %tmp3
+  %sub7 = sub nsw i32 %N, %i.0
+  %add8 = add nsw i32 %sub7, %j.0
+  %arrayidx9 = getelementptr inbounds i32* %escape, i32 %add8
+  store i32 %add6, i32* %arrayidx9, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body3
+  %inc = add nsw i32 %j.0, 1
+  br label %for.cond1
+
+for.end:                                          ; preds = %for.cond1
+  br label %for.inc10
+
+for.inc10:                                        ; preds = %for.end
+  %inc11 = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end12:                                        ; preds = %for.cond
+  ret void
+}
--- a/polly/test/ScopInfo/reduction_invalid_different_operators.ll
+++ b/polly/test/ScopInfo/reduction_invalid_different_operators.ll
@ -0,0 +1,49 @@
+; RUN: opt %loadPolly -basicaa -polly-scops -analyze < %s | FileCheck %s
+;
+; int f() {
+;   int i, sum = 0, sth = 0;
+;   for (i = 0; i < 1024; i++) {
+;     sum += 5;
+;     sth = sth + sth * sth + sth;
+;     sum *= 5;
+;   }
+;   return sum + sth;
+; }
+;
+; CHECK-NOT: Reduction like: 1
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+define i32 @f() {
+entry:
+  %sum.0 = alloca i32
+  %sth.0 = alloca i32
+  br label %entry.split
+
+entry.split:                                      ; preds = %entry
+  store i32 0, i32* %sum.0
+  store i32 0, i32* %sth.0
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond, %entry.split
+  %i.0 = phi i32 [ 0, %entry.split ], [ %inc, %for.cond ]
+  %sth.0.reload = load i32* %sth.0
+  %sum.0.reload = load i32* %sum.0
+  %exitcond = icmp ne i32 %i.0, 1024
+  %mul = mul nsw i32 %sth.0.reload, %sth.0.reload
+  %add1 = add nsw i32 %sth.0.reload, %mul
+  %tmp = mul i32 %sum.0.reload, 5
+  store i32 %tmp, i32* %sum.0
+  %sum.1.reload = load i32* %sum.0
+  %mul3 = add i32 %sum.1.reload, 25
+  %add2 = add nsw i32 %add1, %sth.0.reload
+  %inc = add nsw i32 %i.0, 1
+  store i32 %mul3, i32* %sum.0
+  store i32 %add2, i32* %sth.0
+  br i1 %exitcond, label %for.cond, label %for.end
+
+for.end:                                          ; preds = %for.cond
+  %sum.0.reload.2 = load i32* %sum.0
+  %sth.0.reload.2 = load i32* %sth.0
+  %add4 = add nsw i32 %sum.0.reload.2, %sth.0.reload.2
+  ret i32 %add4
+}
--- a/polly/test/ScopInfo/reduction_invalid_overlapping_accesses.ll
+++ b/polly/test/ScopInfo/reduction_invalid_overlapping_accesses.ll
@ -0,0 +1,58 @@
+; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
+;
+; void f(int *sums) {
+;   int i, j;
+;   for (i = 0; i < 1024; i++) {
+;     for (j = 0; j < 1024; j++) {
+;       sums[i] += 5;
+;       sums[i+10] *= 5;
+;     }
+;   }
+; }
+;
+; CHECK-NOT: Reduction like: 1
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+define void @f(i32* %sums) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc6, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc7, %for.inc6 ]
+  %exitcond1 = icmp ne i32 %i.0, 1024
+  br i1 %exitcond1, label %for.body, label %for.end8
+
+for.body:                                         ; preds = %for.cond
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.inc, %for.body
+  %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
+  %exitcond = icmp ne i32 %j.0, 1024
+  br i1 %exitcond, label %for.body3, label %for.end
+
+for.body3:                                        ; preds = %for.cond1
+  %arrayidx = getelementptr inbounds i32* %sums, i32 %i.0
+  %tmp = load i32* %arrayidx, align 4
+  %add = add nsw i32 %tmp, 5
+  store i32 %add, i32* %arrayidx, align 4
+  %add4 = add nsw i32 %i.0, 10
+  %arrayidx5 = getelementptr inbounds i32* %sums, i32 %add4
+  %tmp2 = load i32* %arrayidx5, align 4
+  %mul = mul nsw i32 %tmp2, 5
+  store i32 %mul, i32* %arrayidx5, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body3
+  %inc = add nsw i32 %j.0, 1
+  br label %for.cond1
+
+for.end:                                          ; preds = %for.cond1
+  br label %for.inc6
+
+for.inc6:                                         ; preds = %for.end
+  %inc7 = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end8:                                         ; preds = %for.cond
+  ret void
+}
--- a/polly/test/ScopInfo/reduction_multiple_loops_array_sum.ll
+++ b/polly/test/ScopInfo/reduction_multiple_loops_array_sum.ll
@ -0,0 +1,78 @@
+; RUN: opt -basicaa %loadPolly -polly-scops -analyze < %s | FileCheck %s
+;
+; CHECK: Stmt_for_body
+; CHECK: Reduction like: 1
+; CHECK: MemRef_sum
+; CHECK: Reduction like: 1
+; CHECK: MemRef_sum
+; CHECK: Stmt_for_body3
+; CHECK: Reduction like: 0
+; CHECK: MemRef_A
+; CHECK: Reduction like: 1
+; CHECK: MemRef_sum
+; CHECK: Reduction like: 1
+; CHECK: MemRef_sum
+; CHECK: Stmt_for_end
+; CHECK: Reduction like: 1
+; CHECK: MemRef_sum
+; CHECK: Reduction like: 1
+; CHECK: MemRef_sum
+;
+; void f(int *restrict A, int *restrict sum) {
+;   int i, j;
+;   for (i = 0; i < 100; i++) {
+;     *sum *= 7;
+;     for (j = 0; j < 100; j++) {
+;       *sum += A[i + j];
+;     }
+;     *sum *= 5;
+;   }
+; }
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+define void @f(i32* noalias %A, i32* noalias %sum) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc6, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc7, %for.inc6 ]
+  %exitcond1 = icmp ne i32 %i.0, 100
+  br i1 %exitcond1, label %for.body, label %for.end8
+
+for.body:                                         ; preds = %for.cond
+  %tmp = load i32* %sum, align 4
+  %mul = mul nsw i32 %tmp, 7
+  store i32 %mul, i32* %sum, align 4
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.inc, %for.body
+  %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
+  %exitcond = icmp ne i32 %j.0, 100
+  br i1 %exitcond, label %for.body3, label %for.end
+
+for.body3:                                        ; preds = %for.cond1
+  %add = add nsw i32 %i.0, %j.0
+  %arrayidx = getelementptr inbounds i32* %A, i32 %add
+  %tmp2 = load i32* %arrayidx, align 4
+  %tmp3 = load i32* %sum, align 4
+  %add4 = add nsw i32 %tmp3, %tmp2
+  store i32 %add4, i32* %sum, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body3
+  %inc = add nsw i32 %j.0, 1
+  br label %for.cond1
+
+for.end:                                          ; preds = %for.cond1
+  %tmp4 = load i32* %sum, align 4
+  %mul5 = mul nsw i32 %tmp4, 5
+  store i32 %mul5, i32* %sum, align 4
+  br label %for.inc6
+
+for.inc6:                                         ; preds = %for.end
+  %inc7 = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end8:                                         ; preds = %for.cond
+  ret void
+}
--- a/polly/test/ScopInfo/reduction_multiple_loops_array_sum_1.ll
+++ b/polly/test/ScopInfo/reduction_multiple_loops_array_sum_1.ll
@ -0,0 +1,72 @@
+; RUN: opt -basicaa %loadPolly -polly-scops -analyze < %s | FileCheck %s
+;
+; CHECK: Stmt_for_body
+; CHECK: Reduction like: 0
+; CHECK: MemRef_sum_04
+; CHECK: Reduction like: 0
+; CHECK: MemRef_sum_12
+; CHECK: Stmt_for_inc
+; CHECK: Reduction like: 1
+; CHECK: MemRef_sum_12
+; CHECK: Reduction like: 0
+; CHECK: MemRef_A
+; CHECK: Reduction like: 1
+; CHECK: MemRef_sum_12
+; CHECK: Stmt_for_inc5
+; CHECK: Reduction like: 0
+; CHECK: MemRef_sum_12
+; CHECK: Reduction like: 0
+; CHECK: MemRef_sum_04
+;
+; int f(int * __restrict__ A) {
+;   int i, j, sum = 1;
+;   for (i = 0; i < 100; i++) {
+;     sum *= 7;
+;     for (j = 0; j < 100; j++) {
+;       sum += A[i+j];
+;     }
+;   }
+;   return sum;
+; }
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define i32 @f(i32* noalias %A) {
+entry:
+  %sum.04.reg2mem = alloca i32
+  %sum.12.reg2mem = alloca i32
+  br label %entry.split
+
+entry.split:                                      ; preds = %entry
+  store i32 0, i32* %sum.04.reg2mem
+  br label %for.body
+
+for.body:                                         ; preds = %for.inc5, %entry.split
+  %indvars.iv23 = phi i64 [ 0, %entry.split ], [ %3, %for.inc5 ]
+  %sum.04.reload = load i32* %sum.04.reg2mem
+  %mul = mul nsw i32 %sum.04.reload, 7
+  store i32 %mul, i32* %sum.12.reg2mem
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.inc, %for.body
+  %indvars.iv1 = phi i64 [ 0, %for.body ], [ %1, %for.inc ]
+  %sum.12.reload = load i32* %sum.12.reg2mem
+  %0 = add i64 %indvars.iv23, %indvars.iv1
+  %arrayidx = getelementptr i32* %A, i64 %0
+  %tmp5 = load i32* %arrayidx, align 4
+  %add4 = add nsw i32 %tmp5, %sum.12.reload
+  %1 = add nuw nsw i64 %indvars.iv1, 1
+  %exitcond1 = icmp eq i64 %1, 100
+  store i32 %add4, i32* %sum.12.reg2mem
+  br i1 %exitcond1, label %for.inc5, label %for.inc
+
+for.inc5:                                         ; preds = %for.inc
+  %2 = load i32* %sum.12.reg2mem
+  %3 = add nuw nsw i64 %indvars.iv23, 1
+  %exitcond2 = icmp eq i64 %3, 100
+  store i32 %2, i32* %sum.04.reg2mem
+  br i1 %exitcond2, label %for.end7, label %for.body
+
+for.end7:                                         ; preds = %for.inc5
+  %4 = load i32* %sum.04.reg2mem
+  ret i32 %4
+}
--- a/polly/test/ScopInfo/reduction_multiple_simple_binary.ll
+++ b/polly/test/ScopInfo/reduction_multiple_simple_binary.ll
@ -0,0 +1,98 @@
+; RUN: opt -basicaa %loadPolly -polly-scops -analyze < %s | FileCheck %s
+;
+; CHECK: ReadAccess :=       [Reduction like: 0]
+; CHECK:     { Stmt_for_body[i0] -> MemRef_A[1 + i0] };
+; CHECK: ReadAccess :=       [Reduction like: 0]
+; CHECK:     { Stmt_for_body[i0] -> MemRef_A[i0] };
+; CHECK: MustWriteAccess :=  [Reduction like: 0]
+; CHECK:     { Stmt_for_body[i0] -> MemRef_first[0] };
+; CHECK: ReadAccess :=       [Reduction like: 1]
+; CHECK:     { Stmt_for_body[i0] -> MemRef_sum[0] };
+; CHECK: MustWriteAccess :=  [Reduction like: 1]
+; CHECK:     { Stmt_for_body[i0] -> MemRef_sum[0] };
+; CHECK: ReadAccess :=       [Reduction like: 0]
+; CHECK:     { Stmt_for_body[i0] -> MemRef_A[-1 + i0] };
+; CHECK: ReadAccess :=       [Reduction like: 0]
+; CHECK:     { Stmt_for_body[i0] -> MemRef_A[i0] };
+; CHECK: MustWriteAccess :=  [Reduction like: 0]
+; CHECK:     { Stmt_for_body[i0] -> MemRef_middle[0] };
+; CHECK: ReadAccess :=       [Reduction like: 1]
+; CHECK:     { Stmt_for_body[i0] -> MemRef_prod[0] };
+; CHECK: MustWriteAccess :=  [Reduction like: 1]
+; CHECK:     { Stmt_for_body[i0] -> MemRef_prod[0] };
+; CHECK: ReadAccess :=       [Reduction like: 0]
+; CHECK:     { Stmt_for_body[i0] -> MemRef_A[-1 + i0] };
+; CHECK: ReadAccess :=       [Reduction like: 0]
+; CHECK:     { Stmt_for_body[i0] -> MemRef_A[1 + i0] };
+; CHECK: MustWriteAccess :=  [Reduction like: 0]
+; CHECK:     { Stmt_for_body[i0] -> MemRef_last[0] };
+;
+; int first, sum, middle, prod, last;
+;
+; void f(int * restrict A) {
+;   int i;
+;   for (int i = 0; i < 100; i++) {
+;     first = A[i+1] + A[i];
+;     sum += i * 3;
+;     middle = A[i-1] + A[i];
+;     prod *= (i + 3);
+;     last = A[i-1] + A[i+1];
+;   }
+; }
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+@first = common global i32 0, align 4
+@sum = common global i32 0, align 4
+@middle = common global i32 0, align 4
+@prod = common global i32 0, align 4
+@last = common global i32 0, align 4
+
+define void @f(i32* noalias %A) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %i1.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %exitcond = icmp ne i32 %i1.0, 100
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %add = add nsw i32 %i1.0, 1
+  %arrayidx = getelementptr inbounds i32* %A, i32 %add
+  %tmp = load i32* %arrayidx, align 4
+  %arrayidx2 = getelementptr inbounds i32* %A, i32 %i1.0
+  %tmp1 = load i32* %arrayidx2, align 4
+  %add3 = add nsw i32 %tmp, %tmp1
+  store i32 %add3, i32* @first, align 4
+  %mul = mul nsw i32 %i1.0, 3
+  %tmp2 = load i32* @sum, align 4
+  %add4 = add nsw i32 %tmp2, %mul
+  store i32 %add4, i32* @sum, align 4
+  %sub = add nsw i32 %i1.0, -1
+  %arrayidx5 = getelementptr inbounds i32* %A, i32 %sub
+  %tmp3 = load i32* %arrayidx5, align 4
+  %arrayidx6 = getelementptr inbounds i32* %A, i32 %i1.0
+  %tmp4 = load i32* %arrayidx6, align 4
+  %add7 = add nsw i32 %tmp3, %tmp4
+  store i32 %add7, i32* @middle, align 4
+  %add8 = add nsw i32 %i1.0, 3
+  %tmp5 = load i32* @prod, align 4
+  %mul9 = mul nsw i32 %tmp5, %add8
+  store i32 %mul9, i32* @prod, align 4
+  %sub10 = add nsw i32 %i1.0, -1
+  %arrayidx11 = getelementptr inbounds i32* %A, i32 %sub10
+  %tmp6 = load i32* %arrayidx11, align 4
+  %add12 = add nsw i32 %i1.0, 1
+  %arrayidx13 = getelementptr inbounds i32* %A, i32 %add12
+  %tmp7 = load i32* %arrayidx13, align 4
+  %add14 = add nsw i32 %tmp6, %tmp7
+  store i32 %add14, i32* @last, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %inc = add nsw i32 %i1.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
--- a/polly/test/ScopInfo/reduction_non_overlapping_chains.ll
+++ b/polly/test/ScopInfo/reduction_non_overlapping_chains.ll
@ -0,0 +1,60 @@
+; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
+;
+; CHECK: Reduction like: 1
+; CHECK: Reduction like: 1
+; CHECK: Reduction like: 1
+; CHECK: Reduction like: 1
+;
+; void f(int *sums) {
+;   for (int i = 0; i < 1024; i++) {
+;     for (int j = 0; j < 1024; j++) {
+;       sums[i] += 5;
+;       sums[i+1024] *= 5;
+;     }
+;   }
+; }
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+define void @f(i32* %sums) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc6, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc7, %for.inc6 ]
+  %exitcond1 = icmp ne i32 %i.0, 1024
+  br i1 %exitcond1, label %for.body, label %for.end8
+
+for.body:                                         ; preds = %for.cond
+  br label %for.cond1
+
+for.cond1:                                        ; preds = %for.inc, %for.body
+  %j.0 = phi i32 [ 0, %for.body ], [ %inc, %for.inc ]
+  %exitcond = icmp ne i32 %j.0, 1024
+  br i1 %exitcond, label %for.body3, label %for.end
+
+for.body3:                                        ; preds = %for.cond1
+  %arrayidx = getelementptr inbounds i32* %sums, i32 %i.0
+  %tmp = load i32* %arrayidx, align 4
+  %add = add nsw i32 %tmp, 5
+  store i32 %add, i32* %arrayidx, align 4
+  %add4 = add nsw i32 %i.0, 1024
+  %arrayidx5 = getelementptr inbounds i32* %sums, i32 %add4
+  %tmp2 = load i32* %arrayidx5, align 4
+  %mul = mul nsw i32 %tmp2, 5
+  store i32 %mul, i32* %arrayidx5, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body3
+  %inc = add nsw i32 %j.0, 1
+  br label %for.cond1
+
+for.end:                                          ; preds = %for.cond1
+  br label %for.inc6
+
+for.inc6:                                         ; preds = %for.end
+  %inc7 = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end8:                                         ; preds = %for.cond
+  ret void
+}
--- a/polly/test/ScopInfo/reduction_only_reduction_like_access.ll
+++ b/polly/test/ScopInfo/reduction_only_reduction_like_access.ll
@ -1,6 +1,6 @@
 ; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
 ;
-; CHECK: Reduction like: 0
+; CHECK: Reduction like: 1
 ;
 ; void f(int *sum) {
 ;   for (int i = 0; i < 100; i++)
--- a/polly/test/ScopInfo/reduction_two_identical_reads.ll
+++ b/polly/test/ScopInfo/reduction_two_identical_reads.ll
@ -0,0 +1,66 @@
+; RUN: opt %loadPolly -polly-scops -analyze < %s | FileCheck %s
+;
+; CHECK-NOT: Reduction like: 1
+;
+; Check that we do not mark these accesses as reduction like.
+; We do this for the case the loads are modelt with the same LLVM-IR value and
+; for the case there are different LLVM-IR values.
+;
+;    void f(int *A) {
+;      for (int i = 0; i < 1024; i++)
+;        A[i] = A[i] + A[i];
+;    }
+;
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
+
+define void @f_one_load_case(i32* %A) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %exitcond = icmp ne i32 %i.0, 1024
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %arrayidx = getelementptr inbounds i32* %A, i32 %i.0
+  %tmp = load i32* %arrayidx, align 4
+  %add = add nsw i32 %tmp, %tmp
+  %arrayidx2 = getelementptr inbounds i32* %A, i32 %i.0
+  store i32 %add, i32* %arrayidx2, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}
+
+define void @f_two_loads_case(i32* %A) {
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.inc, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+  %exitcond = icmp ne i32 %i.0, 1024
+  br i1 %exitcond, label %for.body, label %for.end
+
+for.body:                                         ; preds = %for.cond
+  %arrayidx = getelementptr inbounds i32* %A, i32 %i.0
+  %tmp = load i32* %arrayidx, align 4
+  %arrayidxCopy = getelementptr inbounds i32* %A, i32 %i.0
+  %tmpCopy = load i32* %arrayidxCopy, align 4
+  %add = add nsw i32 %tmp, %tmpCopy
+  %arrayidx2 = getelementptr inbounds i32* %A, i32 %i.0
+  store i32 %add, i32* %arrayidx2, align 4
+  br label %for.inc
+
+for.inc:                                          ; preds = %for.body
+  %inc = add nsw i32 %i.0, 1
+  br label %for.cond
+
+for.end:                                          ; preds = %for.cond
+  ret void
+}