mirror of
https://github.com/RPCS3/llvm.git
synced 2025-02-21 03:00:45 +00:00
LSR Fix: check SCEV expression safety before expansion.
All SCEV expressions used by LSR formulae must be safe to expand. i.e. they may not contain UDiv unless we can prove nonzero denominator. Fixes PR11356: LSR hoists UDiv. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@160205 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
31f61e8b22
commit
e08c32249f
@ -24,6 +24,10 @@
|
||||
namespace llvm {
|
||||
class TargetLowering;
|
||||
|
||||
/// Return true if the given expression is safe to expand in the sense that
|
||||
/// all materialized values are safe to speculate.
|
||||
bool isSafeToExpand(const SCEV *S);
|
||||
|
||||
/// SCEVExpander - This class uses information about analyze scalars to
|
||||
/// rewrite expressions in canonical form.
|
||||
///
|
||||
|
@ -1700,3 +1700,44 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT,
|
||||
}
|
||||
return NumElim;
|
||||
}
|
||||
|
||||
namespace {
|
||||
// Search for a SCEV subexpression that is not safe to expand. Any expression
|
||||
// that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely
|
||||
// UDiv expressions. We don't know if the UDiv is derived from an IR divide
|
||||
// instruction, but the important thing is that we prove the denominator is
|
||||
// nonzero before expansion.
|
||||
//
|
||||
// IVUsers already checks that IV-derived expressions are safe. So this check is
|
||||
// only needed when the expression includes some subexpression that is not IV
|
||||
// derived.
|
||||
//
|
||||
// Currently, we only allow division by a nonzero constant here. If this is
|
||||
// inadequate, we could easily allow division by SCEVUnknown by using
|
||||
// ValueTracking to check isKnownNonZero().
|
||||
struct SCEVFindUnsafe {
|
||||
bool IsUnsafe;
|
||||
|
||||
SCEVFindUnsafe(): IsUnsafe(false) {}
|
||||
|
||||
bool follow(const SCEV *S) {
|
||||
const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S);
|
||||
if (!D)
|
||||
return true;
|
||||
const SCEVConstant *SC = dyn_cast<SCEVConstant>(D->getRHS());
|
||||
if (SC && !SC->getValue()->isZero())
|
||||
return true;
|
||||
IsUnsafe = true;
|
||||
return false;
|
||||
}
|
||||
bool isDone() const { return IsUnsafe; }
|
||||
};
|
||||
}
|
||||
|
||||
namespace llvm {
|
||||
bool isSafeToExpand(const SCEV *S) {
|
||||
SCEVFindUnsafe Search;
|
||||
visitAll(S, Search);
|
||||
return !Search.IsUnsafe;
|
||||
}
|
||||
}
|
||||
|
@ -2836,7 +2836,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
|
||||
|
||||
// x == y --> x - y == 0
|
||||
const SCEV *N = SE.getSCEV(NV);
|
||||
if (SE.isLoopInvariant(N, L)) {
|
||||
if (SE.isLoopInvariant(N, L) && isSafeToExpand(N)) {
|
||||
// S is normalized, so normalize N before folding it into S
|
||||
// to keep the result normalized.
|
||||
N = TransformForPostIncUse(Normalize, N, CI, 0,
|
||||
|
90
test/Transforms/LoopStrengthReduce/2012-07-13-ExpandUDiv.ll
Normal file
90
test/Transforms/LoopStrengthReduce/2012-07-13-ExpandUDiv.ll
Normal file
@ -0,0 +1,90 @@
|
||||
; RUN: opt -loop-reduce -S < %s | FileCheck %s
|
||||
;
|
||||
; PR11356: likely wrong code bug
|
||||
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-darwin"
|
||||
|
||||
@g_66 = global [1 x i32] zeroinitializer, align 4
|
||||
@g_775 = global i32 0, align 4
|
||||
@g_752 = global i32 0, align 4
|
||||
@g_3 = global i32 0, align 4
|
||||
|
||||
; Ensure that %div.i.i.us is not hoisted.
|
||||
; CHECK: @main
|
||||
; CHECK: for.body.i.i.us:
|
||||
; CHECK: %div.i.i.i.us
|
||||
; CHECK: %cmp5.i.i.us
|
||||
define i32 @main() nounwind uwtable ssp {
|
||||
entry:
|
||||
%l_2 = alloca [1 x i32], align 4
|
||||
%arrayidx = getelementptr inbounds [1 x i32]* %l_2, i64 0, i64 0
|
||||
store i32 0, i32* %arrayidx, align 4, !tbaa !0
|
||||
%tmp = load i32* @g_3, align 4, !tbaa !0
|
||||
%idxprom = sext i32 %tmp to i64
|
||||
%arrayidx1 = getelementptr inbounds [1 x i32]* %l_2, i64 0, i64 %idxprom
|
||||
%tmp1 = load i32* %arrayidx1, align 4, !tbaa !0
|
||||
%conv.i.i = and i32 %tmp1, 65535
|
||||
%tobool.i.i.i = icmp ne i32 %tmp, 0
|
||||
br label %codeRepl
|
||||
|
||||
codeRepl.loopexit.us-lcssa: ; preds = %for.body.i.i, %codeRepl5
|
||||
br label %codeRepl.loopexit
|
||||
|
||||
codeRepl.loopexit: ; preds = %codeRepl.loopexit.us-lcssa.us, %codeRepl.loopexit.us-lcssa
|
||||
br label %codeRepl
|
||||
|
||||
codeRepl: ; preds = %codeRepl.loopexit, %entry
|
||||
br i1 %tobool.i.i.i, label %codeRepl.split.us, label %codeRepl.codeRepl.split_crit_edge
|
||||
|
||||
codeRepl.codeRepl.split_crit_edge: ; preds = %codeRepl
|
||||
br label %codeRepl.split
|
||||
|
||||
codeRepl.split.us: ; preds = %codeRepl
|
||||
br label %for.cond.i.i.us
|
||||
|
||||
for.cond.i.i.us: ; preds = %for.inc.i.i.us, %codeRepl.split.us
|
||||
%tmp2 = phi i32 [ 0, %codeRepl.split.us ], [ %add.i.i.us, %for.inc.i.i.us ]
|
||||
br label %codeRepl5.us
|
||||
|
||||
for.inc.i.i.us: ; preds = %for.body.i.i.us
|
||||
%add.i.i.us = add nsw i32 %tmp2, 1
|
||||
store i32 %add.i.i.us, i32* @g_752, align 4, !tbaa !0
|
||||
br label %for.cond.i.i.us
|
||||
|
||||
for.body.i.i.us: ; preds = %codeRepl5.us
|
||||
%div.i.i.i.us = udiv i32 1, %conv.i.i
|
||||
%cmp5.i.i.us = icmp eq i32 %div.i.i.i.us, %tmp2
|
||||
br i1 %cmp5.i.i.us, label %codeRepl.loopexit.us-lcssa.us, label %for.inc.i.i.us
|
||||
|
||||
codeRepl5.us: ; preds = %for.cond.i.i.us
|
||||
br i1 true, label %codeRepl.loopexit.us-lcssa.us, label %for.body.i.i.us
|
||||
|
||||
codeRepl.loopexit.us-lcssa.us: ; preds = %codeRepl5.us, %for.body.i.i.us
|
||||
br label %codeRepl.loopexit
|
||||
|
||||
codeRepl.split: ; preds = %codeRepl.codeRepl.split_crit_edge
|
||||
br label %for.cond.i.i
|
||||
|
||||
for.cond.i.i: ; preds = %for.inc.i.i, %codeRepl.split
|
||||
%tmp3 = phi i32 [ 0, %codeRepl.split ], [ %add.i.i, %for.inc.i.i ]
|
||||
br label %codeRepl5
|
||||
|
||||
codeRepl5: ; preds = %for.cond.i.i
|
||||
br i1 true, label %codeRepl.loopexit.us-lcssa, label %for.body.i.i
|
||||
|
||||
for.body.i.i: ; preds = %codeRepl5
|
||||
%cmp5.i.i = icmp eq i32 0, %tmp3
|
||||
br i1 %cmp5.i.i, label %codeRepl.loopexit.us-lcssa, label %for.inc.i.i
|
||||
|
||||
for.inc.i.i: ; preds = %for.body.i.i
|
||||
%add.i.i = add nsw i32 %tmp3, 1
|
||||
store i32 %add.i.i, i32* @g_752, align 4, !tbaa !0
|
||||
br label %for.cond.i.i
|
||||
|
||||
func_4.exit: ; No predecessors!
|
||||
ret i32 0
|
||||
}
|
||||
|
||||
!0 = metadata !{metadata !"int", metadata !1}
|
||||
!1 = metadata !{metadata !"omnipotent char", metadata !2}
|
||||
!2 = metadata !{metadata !"Simple C/C++ TBAA", null}
|
Loading…
x
Reference in New Issue
Block a user