Fix a scalability issue with complex ConstantExprs.

This is basically the same fix in three different places. We use a set to avoid
walking the whole tree of a big ConstantExprs multiple times.

For example: (select cmp, (add big_expr 1), (add big_expr 2))
We don't want to visit big_expr twice here, it may consist of thousands of
nodes.

The testcase exercises this by creating an insanely large ConstantExprs out of
a loop. It's questionable if the optimizer should ever create those, but this
can be triggered with real C code. Fixes PR15714.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@179458 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Benjamin Kramer 2013-04-13 12:53:18 +00:00
parent b99c995825
commit 8848680ce0
4 changed files with 149 additions and 25 deletions

View File

@ -17,6 +17,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/Analysis/ValueTracking.h"
@ -880,19 +881,20 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I,
return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, TD, TLI);
}
/// ConstantFoldConstantExpression - Attempt to fold the constant expression
/// using the specified DataLayout. If successful, the constant result is
/// result is returned, if not, null is returned.
Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
const DataLayout *TD,
const TargetLibraryInfo *TLI) {
SmallVector<Constant*, 8> Ops;
for (User::const_op_iterator i = CE->op_begin(), e = CE->op_end();
i != e; ++i) {
static Constant *
ConstantFoldConstantExpressionImpl(const ConstantExpr *CE, const DataLayout *TD,
const TargetLibraryInfo *TLI,
SmallPtrSet<ConstantExpr *, 4> &FoldedOps) {
SmallVector<Constant *, 8> Ops;
for (User::const_op_iterator i = CE->op_begin(), e = CE->op_end(); i != e;
++i) {
Constant *NewC = cast<Constant>(*i);
// Recursively fold the ConstantExpr's operands.
if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(NewC))
NewC = ConstantFoldConstantExpression(NewCE, TD, TLI);
// Recursively fold the ConstantExpr's operands. If we have already folded
// a ConstantExpr, we don't have to process it again.
if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(NewC)) {
if (FoldedOps.insert(NewCE))
NewC = ConstantFoldConstantExpressionImpl(NewCE, TD, TLI, FoldedOps);
}
Ops.push_back(NewC);
}
@ -902,6 +904,16 @@ Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(), Ops, TD, TLI);
}
/// ConstantFoldConstantExpression - Attempt to fold the constant expression
/// using the specified DataLayout. If successful, the constant result is
/// result is returned, if not, null is returned.
Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
const DataLayout *TD,
const TargetLibraryInfo *TLI) {
SmallPtrSet<ConstantExpr *, 4> FoldedOps;
return ConstantFoldConstantExpressionImpl(CE, TD, TLI, FoldedOps);
}
/// ConstantFoldInstOperands - Attempt to constant fold an instruction with the
/// specified opcode and operands. If successful, the constant result is
/// returned, if not, null is returned. Note that this function can fail when

View File

@ -237,18 +237,21 @@ void Constant::destroyConstantImpl() {
delete this;
}
/// canTrap - Return true if evaluation of this constant could trap. This is
/// true for things like constant expressions that could divide by zero.
bool Constant::canTrap() const {
assert(getType()->isFirstClassType() && "Cannot evaluate aggregate vals!");
static bool canTrapImpl(const Constant *C,
SmallPtrSet<const ConstantExpr *, 4> &NonTrappingOps) {
assert(C->getType()->isFirstClassType() && "Cannot evaluate aggregate vals!");
// The only thing that could possibly trap are constant exprs.
const ConstantExpr *CE = dyn_cast<ConstantExpr>(this);
if (!CE) return false;
const ConstantExpr *CE = dyn_cast<ConstantExpr>(C);
if (!CE)
return false;
// ConstantExpr traps if any operands can trap.
for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
if (CE->getOperand(i)->canTrap())
return true;
for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
if (ConstantExpr *Op = dyn_cast<ConstantExpr>(CE->getOperand(i))) {
if (NonTrappingOps.insert(Op) && canTrapImpl(Op, NonTrappingOps))
return true;
}
}
// Otherwise, only specific operations can trap.
switch (CE->getOpcode()) {
@ -267,6 +270,13 @@ bool Constant::canTrap() const {
}
}
/// canTrap - Return true if evaluation of this constant could trap. This is
/// true for things like constant expressions that could divide by zero.
bool Constant::canTrap() const {
SmallPtrSet<const ConstantExpr *, 4> NonTrappingOps;
return canTrapImpl(this, NonTrappingOps);
}
/// isThreadDependent - Return true if the value can vary between threads.
bool Constant::isThreadDependent() const {
SmallPtrSet<const Constant*, 64> Visited;

View File

@ -42,6 +42,7 @@ namespace {
private:
SmallPtrSet<GlobalValue*, 32> AliveGlobals;
SmallPtrSet<Constant *, 8> SeenConstants;
/// GlobalIsNeeded - mark the specific global value as needed, and
/// recursively mark anything that it uses as also needed.
@ -151,6 +152,7 @@ bool GlobalDCE::runOnModule(Module &M) {
// Make sure that all memory is released
AliveGlobals.clear();
SeenConstants.clear();
return Changed;
}
@ -190,12 +192,15 @@ void GlobalDCE::GlobalIsNeeded(GlobalValue *G) {
void GlobalDCE::MarkUsedGlobalsAsNeeded(Constant *C) {
if (GlobalValue *GV = dyn_cast<GlobalValue>(C))
return GlobalIsNeeded(GV);
// Loop over all of the operands of the constant, adding any globals they
// use to the list of needed globals.
for (User::op_iterator I = C->op_begin(), E = C->op_end(); I != E; ++I)
if (Constant *OpC = dyn_cast<Constant>(*I))
MarkUsedGlobalsAsNeeded(OpC);
for (User::op_iterator I = C->op_begin(), E = C->op_end(); I != E; ++I) {
// If we've already processed this constant there's no need to do it again.
Constant *Op = cast<Constant>(*I);
if (SeenConstants.insert(Op))
MarkUsedGlobalsAsNeeded(Op);
}
}
// RemoveUnusedGlobalValue - Loop over all of the uses of the specified

View File

@ -0,0 +1,97 @@
; RUN: opt -O2 -disable-output < %s
; PR15714
%struct.ham = type { i32 }
@global5 = common global i32 0, align 4
@global6 = common global i32 0, align 4
@global7 = common global i32 0, align 4
@global = common global i32 0, align 4
@global8 = common global %struct.ham zeroinitializer, align 4
@global9 = common global i32 0, align 4
@global10 = common global i32 0, align 4
@global11 = common global i32 0, align 4
define void @zot12() {
bb:
store i32 0, i32* @global5, align 4
store i32 0, i32* @global6, align 4
br label %bb2
bb1: ; preds = %bb11
%tmp = load i32* @global5, align 4
br label %bb2
bb2: ; preds = %bb1, %bb
%tmp3 = phi i32 [ %tmp, %bb1 ], [ 0, %bb ]
%tmp4 = xor i32 %tmp3, zext (i1 icmp ne (i64 ptrtoint (i32* @global5 to i64), i64 1) to i32)
store i32 %tmp4, i32* @global5, align 4
%tmp5 = icmp eq i32 %tmp3, zext (i1 icmp ne (i64 ptrtoint (i32* @global5 to i64), i64 1) to i32)
br i1 %tmp5, label %bb8, label %bb6
bb6: ; preds = %bb2
%tmp7 = tail call i32 @quux13()
br label %bb8
bb8: ; preds = %bb6, %bb2
%tmp9 = load i32* @global7, align 4
%tmp10 = icmp eq i32 %tmp9, 0
br i1 %tmp10, label %bb11, label %bb15
bb11: ; preds = %bb8
%tmp12 = load i32* @global6, align 4
%tmp13 = add nsw i32 %tmp12, 1
store i32 %tmp13, i32* @global6, align 4
%tmp14 = icmp slt i32 %tmp13, 42
br i1 %tmp14, label %bb1, label %bb15
bb15: ; preds = %bb11, %bb8
ret void
}
define i32 @quux13() {
bb:
store i32 1, i32* @global5, align 4
ret i32 1
}
define void @wombat() {
bb:
tail call void @zot12()
ret void
}
define void @wombat14() {
bb:
tail call void @blam()
ret void
}
define void @blam() {
bb:
store i32 ptrtoint (i32* @global to i32), i32* getelementptr inbounds (%struct.ham* @global8, i64 0, i32 0), align 4
store i32 0, i32* @global9, align 4
%tmp = load i32* getelementptr inbounds (%struct.ham* @global8, i64 0, i32 0), align 4
br label %bb1
bb1: ; preds = %bb1, %bb
%tmp2 = phi i32 [ 0, %bb ], [ %tmp11, %bb1 ]
%tmp3 = phi i32 [ %tmp, %bb ], [ %tmp10, %bb1 ]
%tmp4 = icmp sgt i32 %tmp3, 0
%tmp5 = zext i1 %tmp4 to i32
%tmp6 = urem i32 %tmp5, 5
%tmp7 = mul i32 %tmp3, -80
%tmp8 = or i32 %tmp7, %tmp6
%tmp9 = icmp eq i32 %tmp8, 0
%tmp10 = zext i1 %tmp9 to i32
%tmp11 = add nsw i32 %tmp2, 1
%tmp12 = icmp eq i32 %tmp11, 20
br i1 %tmp12, label %bb13, label %bb1
bb13: ; preds = %bb1
store i32 %tmp10, i32* getelementptr inbounds (%struct.ham* @global8, i64 0, i32 0), align 4
store i32 0, i32* @global10, align 4
store i32 %tmp6, i32* @global11, align 4
store i32 20, i32* @global9, align 4
ret void
}