[SCEVAffinator] Make precise modular math more correct.

Integer math in LLVM IR is modular. Integer math in isl is arbitrary-precision. Modeling LLVM IR math correctly in isl requires either adding assumptions that math doesn't actually overflow, or explicitly wrapping the math. However, expressions with the "nsw" flag are special; we can pretend they're arbitrary-precision because it's undefined behavior if the result wraps. SCEV expressions based on IR instructions with an nsw flag also carry an nsw flag (roughly; actually, the real rule is a bit more complicated, but the details don't matter here). Before this patch, SCEV flags were also overloaded with an additional function: the ZExt code was mutating SCEV expressions as a hack to indicate to checkForWrapping that we don't need to add assumptions to the operand of a ZExt; it'll add explicit wrapping itself. This kind of works... the problem is that if anything else ever touches that SCEV expression, it'll get confused by the incorrect flags. Instead, with this patch, we make the decision about whether to explicitly wrap the math a bit earlier, basing the decision purely on the SCEV expression itself, and not its users. Differential Revision: https://reviews.llvm.org/D25287 llvm-svn: 284848
2025-01-30 05:36:44 +00:00 · 2016-10-21 18:08:02 +00:00 · 2016-10-21 18:08:02 +00:00 · 286c5a76ba
commit 286c5a76ba
parent 5a75a0d8f5
7 changed files with 86 additions and 70 deletions
--- a/polly/include/polly/Support/SCEVAffinator.h
+++ b/polly/include/polly/Support/SCEVAffinator.h
@ -113,6 +113,10 @@ private:
  /// @returns The isl representation @p PWAC with a posisbly adjusted domain.
  __isl_give PWACtx checkForWrapping(const llvm::SCEV *Expr, PWACtx PWAC) const;

+  /// Whether to track the value of this expression precisely, rather than
+  /// assuming it won't wrap.
+  bool computeModuloForExpr(const llvm::SCEV *Expr);
+
  __isl_give PWACtx visit(const llvm::SCEV *E);
  __isl_give PWACtx visitConstant(const llvm::SCEVConstant *E);
  __isl_give PWACtx visitTruncateExpr(const llvm::SCEVTruncateExpr *E);
--- a/polly/lib/Support/SCEVAffinator.cpp
+++ b/polly/lib/Support/SCEVAffinator.cpp
@ -36,21 +36,9 @@ static cl::opt<bool> IgnoreIntegerWrapping(
 // compile time.
 static int const MaxDisjunctionsInPwAff = 100;

-// The maximal number of bits for which a zero-extend is modeled precisely.
-static unsigned const MaxZextSmallBitWidth = 7;
-
-// The maximal number of bits for which a truncate is modeled precisely.
-static unsigned const MaxTruncateSmallBitWidth = 31;
-
-/// Return true if a zero-extend from @p Width bits is precisely modeled.
-static bool isPreciseZeroExtend(unsigned Width) {
-  return Width <= MaxZextSmallBitWidth;
-}
-
-/// Return true if a truncate from @p Width bits is precisely modeled.
-static bool isPreciseTruncate(unsigned Width) {
-  return Width <= MaxTruncateSmallBitWidth;
-}
+// The maximal number of bits for which a general expression is modeled
+// precisely.
+static unsigned const MaxSmallBitWidth = 7;

 /// Add the number of basic sets in @p Domain to @p User
 static isl_stat addNumBasicSets(__isl_take isl_set *Domain,
@ -99,26 +87,6 @@ static void combine(__isl_keep PWACtx &PWAC0, const __isl_take PWACtx &PWAC1,
  PWAC0.second = isl_set_union(PWAC0.second, PWAC1.second);
 }

-/// Set the possible wrapping of @p Expr to @p Flags.
-static const SCEV *setNoWrapFlags(ScalarEvolution &SE, const SCEV *Expr,
-                                  SCEV::NoWrapFlags Flags) {
-  auto *NAry = dyn_cast<SCEVNAryExpr>(Expr);
-  if (!NAry)
-    return Expr;
-
-  SmallVector<const SCEV *, 8> Ops(NAry->op_begin(), NAry->op_end());
-  switch (Expr->getSCEVType()) {
-  case scAddExpr:
-    return SE.getAddExpr(Ops, Flags);
-  case scMulExpr:
-    return SE.getMulExpr(Ops, Flags);
-  case scAddRecExpr:
-    return SE.getAddRecExpr(Ops, cast<SCEVAddRecExpr>(Expr)->getLoop(), Flags);
-  default:
-    return Expr;
-  }
-}
-
 static __isl_give isl_pw_aff *getWidthExpValOnDomain(unsigned Width,
                                                     __isl_take isl_set *Dom) {
  auto *Ctx = isl_set_get_ctx(Dom);
@ -241,6 +209,15 @@ bool SCEVAffinator::hasNSWAddRecForLoop(Loop *L) const {
  return false;
 }

+bool SCEVAffinator::computeModuloForExpr(const SCEV *Expr) {
+  unsigned Width = TD.getTypeSizeInBits(Expr->getType());
+  // We assume nsw expressions never overflow.
+  if (auto *NAry = dyn_cast<SCEVNAryExpr>(Expr))
+    if (NAry->getNoWrapFlags() & SCEV::FlagNSW)
+      return false;
+  return Width <= MaxSmallBitWidth;
+}
+
 __isl_give PWACtx SCEVAffinator::visit(const SCEV *Expr) {

  auto Key = std::make_pair(Expr, BB);
@ -267,16 +244,23 @@ __isl_give PWACtx SCEVAffinator::visit(const SCEV *Expr) {
    PWAC = getPWACtxFromPWA(isl_pw_aff_alloc(Domain, Affine));
  } else {
    PWAC = SCEVVisitor<SCEVAffinator, PWACtx>::visit(Expr);
-    PWAC = checkForWrapping(Expr, PWAC);
+    if (computeModuloForExpr(Expr))
+      PWAC.first = addModuloSemantic(PWAC.first, Expr->getType());
+    else
+      PWAC = checkForWrapping(Expr, PWAC);
  }

-  if (!Factor->getType()->isIntegerTy(1))
+  if (!Factor->getType()->isIntegerTy(1)) {
    combine(PWAC, visitConstant(Factor), isl_pw_aff_mul);
+    if (computeModuloForExpr(Key.first))
+      PWAC.first = addModuloSemantic(PWAC.first, Expr->getType());
+  }

  // For compile time reasons we need to simplify the PWAC before we cache and
  // return it.
  PWAC.first = isl_pw_aff_coalesce(PWAC.first);
-  PWAC = checkForWrapping(Key.first, PWAC);
+  if (!computeModuloForExpr(Key.first))
+    PWAC = checkForWrapping(Key.first, PWAC);

  CachedExpressions[Key] = copyPWACtx(PWAC);
  return PWAC;
@ -314,12 +298,9 @@ SCEVAffinator::visitTruncateExpr(const SCEVTruncateExpr *Expr) {
  auto OpPWAC = visit(Op);

  unsigned Width = TD.getTypeSizeInBits(Expr->getType());
-  bool Precise = isPreciseTruncate(Width);

-  if (Precise) {
-    OpPWAC.first = addModuloSemantic(OpPWAC.first, Expr->getType());
+  if (computeModuloForExpr(Expr))
    return OpPWAC;
-  }

  auto *Dom = isl_pw_aff_domain(isl_pw_aff_copy(OpPWAC.first));
  auto *ExpPWA = getWidthExpValOnDomain(Width - 1, Dom);
@ -380,28 +361,17 @@ SCEVAffinator::visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) {
  // assumptions and assume the "former negative" piece will not exist.

  auto *Op = Expr->getOperand();
-  unsigned Width = TD.getTypeSizeInBits(Op->getType());
-
-  bool Precise = isPreciseZeroExtend(Width);
-
-  auto Flags = getNoWrapFlags(Op);
-  auto NoWrapFlags = ScalarEvolution::setFlags(Flags, SCEV::FlagNSW);
-  bool OpCanWrap = Precise && !(Flags & SCEV::FlagNSW);
-  if (OpCanWrap)
-    Op = setNoWrapFlags(SE, Op, NoWrapFlags);
-
  auto OpPWAC = visit(Op);
-  if (OpCanWrap)
-    OpPWAC.first = addModuloSemantic(OpPWAC.first, Op->getType());

  // If the width is to big we assume the negative part does not occur.
-  if (!Precise) {
+  if (!computeModuloForExpr(Op)) {
    takeNonNegativeAssumption(OpPWAC);
    return OpPWAC;
  }

  // If the width is small build the piece for the non-negative part and
  // the one for the negative part and unify them.
+  unsigned Width = TD.getTypeSizeInBits(Op->getType());
  interpretAsUnsigned(OpPWAC, Width);
  return OpPWAC;
 }
--- a/polly/test/ScopInfo/infeasible-rtc.ll
+++ b/polly/test/ScopInfo/infeasible-rtc.ll
@ -4,24 +4,47 @@
 ; RUN: opt %loadPolly -polly-scops -analyze < %s \
 ; RUN:  | FileCheck %s -check-prefix=SCOPS

-; DETECT: Valid Region for Scop: header => exit
-; SCOPS-NOT: Region: %header---%exit
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+; DETECT: Valid Region for Scop: test1.header => test1.exit
+; SCOPS-NOT: Region: %test1.header---%test1.exit

 ; Verify that we detect this scop, but that, due to an infeasible run-time
 ; check, we refuse to model it.

-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+define void @test(i64* %a) nounwind uwtable {
+preheader:
+  br label %test1.header

-@A = common global [128 x i32] zeroinitializer, align 16
+test1.header:
+  %i = phi i56 [ 0, %preheader ], [ %i.1, %test1.header ]
+  %tmp = zext i56 %i to i64
+  %A.addr = getelementptr i64, i64* %a, i64 %tmp
+  %A.load = load i64, i64* %A.addr, align 4
+  %A.inc = zext i56 %i to i64
+  %A.val = add nsw i64 %A.load, %A.inc
+  store i64 %A.val, i64* %A.addr, align 4
+  %i.1 = add i56 %i, 1
+  %exitcond = icmp eq i56 %i.1, 0
+  br i1 %exitcond, label %test1.exit, label %test1.header

-define void @test() nounwind uwtable {
+test1.exit:
+  ret void
+}
+
+; Old version of the previous test; make sure we compute the trip count
+; correctly.
+
+; SCOPS: { Stmt_header[i0] : 0 <= i0 <= 127 };
+
+define void @test2([128 x i32]* %a) nounwind uwtable {
 preheader:
  br label %header

 header:
  %i = phi i7 [ 0, %preheader ], [ %i.1, %header ]
  %tmp = zext i7 %i to i64
-  %A.addr = getelementptr [128 x i32], [128 x i32]* @A, i64 0, i64 %tmp
+  %A.addr = getelementptr [128 x i32], [128 x i32]* %a, i64 0, i64 %tmp
  %A.load = load i32, i32* %A.addr, align 4
  %A.inc = zext i7 %i to i32
  %A.val = add nsw i32 %A.load, %A.inc
--- a/polly/test/ScopInfo/integers.ll
+++ b/polly/test/ScopInfo/integers.ll
@ -111,8 +111,24 @@ bb:
  store i3 %indvar, i3* %scevgep, align 8
  %indvar.next = add nsw i3 %indvar, 1
  %sub = sub i3 %n, 3
-; CHECK:  'bb => return' in function 'f6'
-; CHECK: [n] -> { Stmt_bb[0] : n = 3 };
+; CHECK-LABEL:  'bb => return' in function 'f6'
+; CHECK:         Context:
+; CHECK-NEXT:    [n] -> {  : -4 <= n <= 3 }
+; CHECK-NEXT:    Assumed Context:
+; CHECK-NEXT:    [n] -> {  :  }
+; CHECK-NEXT:    Invalid Context:
+; CHECK-NEXT:    [n] -> {  : 1 = 0 }
+
+; CHECK:     Statements {
+; CHECK-NEXT:    Stmt_bb
+; CHECK-NEXT:        Domain :=
+; CHECK-NEXT:            [n] -> { Stmt_bb[i0] : i0 >= 0 and 8*floor((2 - n)/8) >= -5 - n + i0 and 8*floor((2 - n)/8) <= -2 - n };
+; CHECK-NEXT:        Schedule :=
+; CHECK-NEXT:            [n] -> { Stmt_bb[i0] -> [i0] };
+; CHECK-NEXT:        MustWriteAccess :=  [Reduction Type: NONE] [Scalar: 0]
+; CHECK-NEXT:            [n] -> { Stmt_bb[i0] -> MemRef_a[i0] };
+; CHECK-NEXT:}
+
  %exitcond = icmp eq i3 %indvar, %sub
  br i1 %exitcond, label %return, label %bb

--- a/polly/test/ScopInfo/truncate-1.ll
+++ b/polly/test/ScopInfo/truncate-1.ll
@ -5,13 +5,14 @@
 ;        A[i]++;
 ;    }
 ;
+; FIXME: We should the truncate precisely... or just make it a separate parameter.
 ; CHECK:       Assumed Context:
 ; CHECK-NEXT:  [N] -> {  :  }
 ; CHECK-NEXT:  Invalid Context:
-; CHECK-NEXT:  [N] -> {  : 1 = 0 }
+; CHECK-NEXT:  [N] -> { : N <= -129 or N >= 128 }
 ;
 ; CHECK:       Domain :=
-; CHECK-NEXT:    [N] -> { Stmt_for_body[i0] : i0 >= 0 and 256*floor((128 + N)/256) < N - i0 };
+; CHECK-NEXT:    [N] -> { Stmt_for_body[i0] : 0 <= i0 < N };
 ;
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

--- a/polly/test/ScopInfo/truncate-2.ll
+++ b/polly/test/ScopInfo/truncate-2.ll
@ -5,15 +5,16 @@
 ;        A[(char)(N)]++;
 ;    }
 ;
+; FIXME: We should the truncate precisely... or just make it a separate parameter.
 ; CHECK:       Assumed Context:
 ; CHECK-NEXT:  [N] -> {  :  }
 ; CHECK-NEXT:  Invalid Context:
-; CHECK-NEXT:  [N] -> {  : 1 = 0 }
+; CHECK-NEXT:  [N] -> { : N >= 128 }
 ;
 ; CHECK:         ReadAccess :=	[Reduction Type: +] [Scalar: 0]
-; CHECK-NEXT:        [N] -> { Stmt_for_body[i0] -> MemRef_A[o0] : 256*floor((-N + o0)/256) = -N + o0 and -128 <= o0 <= 127 };
+; CHECK-NEXT:        [N] -> { Stmt_for_body[i0] -> MemRef_A[N] };
 ; CHECK-NEXT:    MustWriteAccess :=	[Reduction Type: +] [Scalar: 0]
-; CHECK-NEXT:        [N] -> { Stmt_for_body[i0] -> MemRef_A[o0] : 256*floor((-N + o0)/256) = -N + o0 and -128 <= o0 <= 127 };
+; CHECK-NEXT:        [N] -> { Stmt_for_body[i0] -> MemRef_A[N] };
 ;
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

--- a/polly/test/ScopInfo/zero_ext_of_truncate.ll
+++ b/polly/test/ScopInfo/zero_ext_of_truncate.ll
@ -9,13 +9,14 @@
 ;      }
 ;    }
 ;
+; FIXME: The truncated value should be a paramter.
 ; CHECK:         Assumed Context:
 ; CHECK-NEXT:    [N, tmp, M] -> { : }
 ; CHECK-NEXT:    Invalid Context:
-; CHECK-NEXT:    [N, tmp, M] -> { : N < 0 or (N > 0 and M < 0) or (N > 0 and 256*floor((128 + tmp)/256) > tmp) }
+; CHECK-NEXT:    [N, tmp, M] -> { : N < 0 or (N > 0 and tmp >= 128) or (N > 0 and tmp < 0) or (N > 0 and M < 0) }
 ;
 ; CHECK:         Domain :=
-; CHECK-NEXT:    [N, tmp, M] -> { Stmt_if_then[i0] : 0 <= i0 < N and 256*floor((128 + tmp)/256) > tmp - M };
+; CHECK-NEXT:    [N, tmp, M] -> { Stmt_if_then[i0] : M > tmp and 0 <= i0 < N };
 ;
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"