LSR Fix: check SCEV expression safety before expansion.

All SCEV expressions used by LSR formulae must be safe to
expand. i.e. they may not contain UDiv unless we can prove nonzero
denominator.

Fixes PR11356: LSR hoists UDiv.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@160205 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Andrew Trick 2012-07-13 23:33:10 +00:00
parent 31f61e8b22
commit e08c32249f
4 changed files with 136 additions and 1 deletions

View File

@ -24,6 +24,10 @@
namespace llvm {
class TargetLowering;
/// Return true if the given expression is safe to expand in the sense that
/// all materialized values are safe to speculate.
bool isSafeToExpand(const SCEV *S);
/// SCEVExpander - This class uses information about analyze scalars to
/// rewrite expressions in canonical form.
///

View File

@ -1700,3 +1700,44 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
}
return NumElim;
}
namespace {
// Search for a SCEV subexpression that is not safe to expand. Any expression
// that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely
// UDiv expressions. We don't know if the UDiv is derived from an IR divide
// instruction, but the important thing is that we prove the denominator is
// nonzero before expansion.
//
// IVUsers already checks that IV-derived expressions are safe. So this check is
// only needed when the expression includes some subexpression that is not IV
// derived.
//
// Currently, we only allow division by a nonzero constant here. If this is
// inadequate, we could easily allow division by SCEVUnknown by using
// ValueTracking to check isKnownNonZero().
struct SCEVFindUnsafe {
bool IsUnsafe;
SCEVFindUnsafe(): IsUnsafe(false) {}
bool follow(const SCEV *S) {
const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S);
if (!D)
return true;
const SCEVConstant *SC = dyn_cast<SCEVConstant>(D->getRHS());
if (SC && !SC->getValue()->isZero())
return true;
IsUnsafe = true;
return false;
}
bool isDone() const { return IsUnsafe; }
};
}
namespace llvm {
bool isSafeToExpand(const SCEV *S) {
SCEVFindUnsafe Search;
visitAll(S, Search);
return !Search.IsUnsafe;
}
}

View File

@ -2836,7 +2836,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
// x == y --> x - y == 0
const SCEV *N = SE.getSCEV(NV);
if (SE.isLoopInvariant(N, L)) {
if (SE.isLoopInvariant(N, L) && isSafeToExpand(N)) {
// S is normalized, so normalize N before folding it into S
// to keep the result normalized.
N = TransformForPostIncUse(Normalize, N, CI, 0,

View File

@ -0,0 +1,90 @@
; RUN: opt -loop-reduce -S < %s | FileCheck %s
;
; PR11356: likely wrong code bug
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-darwin"
@g_66 = global [1 x i32] zeroinitializer, align 4
@g_775 = global i32 0, align 4
@g_752 = global i32 0, align 4
@g_3 = global i32 0, align 4
; Ensure that %div.i.i.us is not hoisted.
; CHECK: @main
; CHECK: for.body.i.i.us:
; CHECK: %div.i.i.i.us
; CHECK: %cmp5.i.i.us
define i32 @main() nounwind uwtable ssp {
entry:
%l_2 = alloca [1 x i32], align 4
%arrayidx = getelementptr inbounds [1 x i32]* %l_2, i64 0, i64 0
store i32 0, i32* %arrayidx, align 4, !tbaa !0
%tmp = load i32* @g_3, align 4, !tbaa !0
%idxprom = sext i32 %tmp to i64
%arrayidx1 = getelementptr inbounds [1 x i32]* %l_2, i64 0, i64 %idxprom
%tmp1 = load i32* %arrayidx1, align 4, !tbaa !0
%conv.i.i = and i32 %tmp1, 65535
%tobool.i.i.i = icmp ne i32 %tmp, 0
br label %codeRepl
codeRepl.loopexit.us-lcssa: ; preds = %for.body.i.i, %codeRepl5
br label %codeRepl.loopexit
codeRepl.loopexit: ; preds = %codeRepl.loopexit.us-lcssa.us, %codeRepl.loopexit.us-lcssa
br label %codeRepl
codeRepl: ; preds = %codeRepl.loopexit, %entry
br i1 %tobool.i.i.i, label %codeRepl.split.us, label %codeRepl.codeRepl.split_crit_edge
codeRepl.codeRepl.split_crit_edge: ; preds = %codeRepl
br label %codeRepl.split
codeRepl.split.us: ; preds = %codeRepl
br label %for.cond.i.i.us
for.cond.i.i.us: ; preds = %for.inc.i.i.us, %codeRepl.split.us
%tmp2 = phi i32 [ 0, %codeRepl.split.us ], [ %add.i.i.us, %for.inc.i.i.us ]
br label %codeRepl5.us
for.inc.i.i.us: ; preds = %for.body.i.i.us
%add.i.i.us = add nsw i32 %tmp2, 1
store i32 %add.i.i.us, i32* @g_752, align 4, !tbaa !0
br label %for.cond.i.i.us
for.body.i.i.us: ; preds = %codeRepl5.us
%div.i.i.i.us = udiv i32 1, %conv.i.i
%cmp5.i.i.us = icmp eq i32 %div.i.i.i.us, %tmp2
br i1 %cmp5.i.i.us, label %codeRepl.loopexit.us-lcssa.us, label %for.inc.i.i.us
codeRepl5.us: ; preds = %for.cond.i.i.us
br i1 true, label %codeRepl.loopexit.us-lcssa.us, label %for.body.i.i.us
codeRepl.loopexit.us-lcssa.us: ; preds = %codeRepl5.us, %for.body.i.i.us
br label %codeRepl.loopexit
codeRepl.split: ; preds = %codeRepl.codeRepl.split_crit_edge
br label %for.cond.i.i
for.cond.i.i: ; preds = %for.inc.i.i, %codeRepl.split
%tmp3 = phi i32 [ 0, %codeRepl.split ], [ %add.i.i, %for.inc.i.i ]
br label %codeRepl5
codeRepl5: ; preds = %for.cond.i.i
br i1 true, label %codeRepl.loopexit.us-lcssa, label %for.body.i.i
for.body.i.i: ; preds = %codeRepl5
%cmp5.i.i = icmp eq i32 0, %tmp3
br i1 %cmp5.i.i, label %codeRepl.loopexit.us-lcssa, label %for.inc.i.i
for.inc.i.i: ; preds = %for.body.i.i
%add.i.i = add nsw i32 %tmp3, 1
store i32 %add.i.i, i32* @g_752, align 4, !tbaa !0
br label %for.cond.i.i
func_4.exit: ; No predecessors!
ret i32 0
}
!0 = metadata !{metadata !"int", metadata !1}
!1 = metadata !{metadata !"omnipotent char", metadata !2}
!2 = metadata !{metadata !"Simple C/C++ TBAA", null}