mirror of
https://github.com/RPCSX/llvm.git
synced 2025-01-23 20:57:21 +00:00
Fix a scalability issue with complex ConstantExprs.
This is basically the same fix in three different places. We use a set to avoid walking the whole tree of a big ConstantExprs multiple times. For example: (select cmp, (add big_expr 1), (add big_expr 2)) We don't want to visit big_expr twice here, it may consist of thousands of nodes. The testcase exercises this by creating an insanely large ConstantExprs out of a loop. It's questionable if the optimizer should ever create those, but this can be triggered with real C code. Fixes PR15714. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@179458 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
b99c995825
commit
8848680ce0
@ -17,6 +17,7 @@
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#include "llvm/Analysis/ConstantFolding.h"
|
||||
#include "llvm/ADT/SmallPtrSet.h"
|
||||
#include "llvm/ADT/SmallVector.h"
|
||||
#include "llvm/ADT/StringMap.h"
|
||||
#include "llvm/Analysis/ValueTracking.h"
|
||||
@ -880,19 +881,20 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I,
|
||||
return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, TD, TLI);
|
||||
}
|
||||
|
||||
/// ConstantFoldConstantExpression - Attempt to fold the constant expression
|
||||
/// using the specified DataLayout. If successful, the constant result is
|
||||
/// result is returned, if not, null is returned.
|
||||
Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
|
||||
const DataLayout *TD,
|
||||
const TargetLibraryInfo *TLI) {
|
||||
SmallVector<Constant*, 8> Ops;
|
||||
for (User::const_op_iterator i = CE->op_begin(), e = CE->op_end();
|
||||
i != e; ++i) {
|
||||
static Constant *
|
||||
ConstantFoldConstantExpressionImpl(const ConstantExpr *CE, const DataLayout *TD,
|
||||
const TargetLibraryInfo *TLI,
|
||||
SmallPtrSet<ConstantExpr *, 4> &FoldedOps) {
|
||||
SmallVector<Constant *, 8> Ops;
|
||||
for (User::const_op_iterator i = CE->op_begin(), e = CE->op_end(); i != e;
|
||||
++i) {
|
||||
Constant *NewC = cast<Constant>(*i);
|
||||
// Recursively fold the ConstantExpr's operands.
|
||||
if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(NewC))
|
||||
NewC = ConstantFoldConstantExpression(NewCE, TD, TLI);
|
||||
// Recursively fold the ConstantExpr's operands. If we have already folded
|
||||
// a ConstantExpr, we don't have to process it again.
|
||||
if (ConstantExpr *NewCE = dyn_cast<ConstantExpr>(NewC)) {
|
||||
if (FoldedOps.insert(NewCE))
|
||||
NewC = ConstantFoldConstantExpressionImpl(NewCE, TD, TLI, FoldedOps);
|
||||
}
|
||||
Ops.push_back(NewC);
|
||||
}
|
||||
|
||||
@ -902,6 +904,16 @@ Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
|
||||
return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(), Ops, TD, TLI);
|
||||
}
|
||||
|
||||
/// ConstantFoldConstantExpression - Attempt to fold the constant expression
|
||||
/// using the specified DataLayout. If successful, the constant result is
|
||||
/// result is returned, if not, null is returned.
|
||||
Constant *llvm::ConstantFoldConstantExpression(const ConstantExpr *CE,
|
||||
const DataLayout *TD,
|
||||
const TargetLibraryInfo *TLI) {
|
||||
SmallPtrSet<ConstantExpr *, 4> FoldedOps;
|
||||
return ConstantFoldConstantExpressionImpl(CE, TD, TLI, FoldedOps);
|
||||
}
|
||||
|
||||
/// ConstantFoldInstOperands - Attempt to constant fold an instruction with the
|
||||
/// specified opcode and operands. If successful, the constant result is
|
||||
/// returned, if not, null is returned. Note that this function can fail when
|
||||
|
@ -237,18 +237,21 @@ void Constant::destroyConstantImpl() {
|
||||
delete this;
|
||||
}
|
||||
|
||||
/// canTrap - Return true if evaluation of this constant could trap. This is
|
||||
/// true for things like constant expressions that could divide by zero.
|
||||
bool Constant::canTrap() const {
|
||||
assert(getType()->isFirstClassType() && "Cannot evaluate aggregate vals!");
|
||||
static bool canTrapImpl(const Constant *C,
|
||||
SmallPtrSet<const ConstantExpr *, 4> &NonTrappingOps) {
|
||||
assert(C->getType()->isFirstClassType() && "Cannot evaluate aggregate vals!");
|
||||
// The only thing that could possibly trap are constant exprs.
|
||||
const ConstantExpr *CE = dyn_cast<ConstantExpr>(this);
|
||||
if (!CE) return false;
|
||||
const ConstantExpr *CE = dyn_cast<ConstantExpr>(C);
|
||||
if (!CE)
|
||||
return false;
|
||||
|
||||
// ConstantExpr traps if any operands can trap.
|
||||
for (unsigned i = 0, e = getNumOperands(); i != e; ++i)
|
||||
if (CE->getOperand(i)->canTrap())
|
||||
return true;
|
||||
for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) {
|
||||
if (ConstantExpr *Op = dyn_cast<ConstantExpr>(CE->getOperand(i))) {
|
||||
if (NonTrappingOps.insert(Op) && canTrapImpl(Op, NonTrappingOps))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// Otherwise, only specific operations can trap.
|
||||
switch (CE->getOpcode()) {
|
||||
@ -267,6 +270,13 @@ bool Constant::canTrap() const {
|
||||
}
|
||||
}
|
||||
|
||||
/// canTrap - Return true if evaluation of this constant could trap. This is
|
||||
/// true for things like constant expressions that could divide by zero.
|
||||
bool Constant::canTrap() const {
|
||||
SmallPtrSet<const ConstantExpr *, 4> NonTrappingOps;
|
||||
return canTrapImpl(this, NonTrappingOps);
|
||||
}
|
||||
|
||||
/// isThreadDependent - Return true if the value can vary between threads.
|
||||
bool Constant::isThreadDependent() const {
|
||||
SmallPtrSet<const Constant*, 64> Visited;
|
||||
|
@ -42,6 +42,7 @@ namespace {
|
||||
|
||||
private:
|
||||
SmallPtrSet<GlobalValue*, 32> AliveGlobals;
|
||||
SmallPtrSet<Constant *, 8> SeenConstants;
|
||||
|
||||
/// GlobalIsNeeded - mark the specific global value as needed, and
|
||||
/// recursively mark anything that it uses as also needed.
|
||||
@ -151,6 +152,7 @@ bool GlobalDCE::runOnModule(Module &M) {
|
||||
|
||||
// Make sure that all memory is released
|
||||
AliveGlobals.clear();
|
||||
SeenConstants.clear();
|
||||
|
||||
return Changed;
|
||||
}
|
||||
@ -190,12 +192,15 @@ void GlobalDCE::GlobalIsNeeded(GlobalValue *G) {
|
||||
void GlobalDCE::MarkUsedGlobalsAsNeeded(Constant *C) {
|
||||
if (GlobalValue *GV = dyn_cast<GlobalValue>(C))
|
||||
return GlobalIsNeeded(GV);
|
||||
|
||||
|
||||
// Loop over all of the operands of the constant, adding any globals they
|
||||
// use to the list of needed globals.
|
||||
for (User::op_iterator I = C->op_begin(), E = C->op_end(); I != E; ++I)
|
||||
if (Constant *OpC = dyn_cast<Constant>(*I))
|
||||
MarkUsedGlobalsAsNeeded(OpC);
|
||||
for (User::op_iterator I = C->op_begin(), E = C->op_end(); I != E; ++I) {
|
||||
// If we've already processed this constant there's no need to do it again.
|
||||
Constant *Op = cast<Constant>(*I);
|
||||
if (SeenConstants.insert(Op))
|
||||
MarkUsedGlobalsAsNeeded(Op);
|
||||
}
|
||||
}
|
||||
|
||||
// RemoveUnusedGlobalValue - Loop over all of the uses of the specified
|
||||
|
97
test/Transforms/GlobalDCE/complex-constantexpr.ll
Normal file
97
test/Transforms/GlobalDCE/complex-constantexpr.ll
Normal file
@ -0,0 +1,97 @@
|
||||
; RUN: opt -O2 -disable-output < %s
|
||||
; PR15714
|
||||
|
||||
%struct.ham = type { i32 }
|
||||
|
||||
@global5 = common global i32 0, align 4
|
||||
@global6 = common global i32 0, align 4
|
||||
@global7 = common global i32 0, align 4
|
||||
@global = common global i32 0, align 4
|
||||
@global8 = common global %struct.ham zeroinitializer, align 4
|
||||
@global9 = common global i32 0, align 4
|
||||
@global10 = common global i32 0, align 4
|
||||
@global11 = common global i32 0, align 4
|
||||
|
||||
define void @zot12() {
|
||||
bb:
|
||||
store i32 0, i32* @global5, align 4
|
||||
store i32 0, i32* @global6, align 4
|
||||
br label %bb2
|
||||
|
||||
bb1: ; preds = %bb11
|
||||
%tmp = load i32* @global5, align 4
|
||||
br label %bb2
|
||||
|
||||
bb2: ; preds = %bb1, %bb
|
||||
%tmp3 = phi i32 [ %tmp, %bb1 ], [ 0, %bb ]
|
||||
%tmp4 = xor i32 %tmp3, zext (i1 icmp ne (i64 ptrtoint (i32* @global5 to i64), i64 1) to i32)
|
||||
store i32 %tmp4, i32* @global5, align 4
|
||||
%tmp5 = icmp eq i32 %tmp3, zext (i1 icmp ne (i64 ptrtoint (i32* @global5 to i64), i64 1) to i32)
|
||||
br i1 %tmp5, label %bb8, label %bb6
|
||||
|
||||
bb6: ; preds = %bb2
|
||||
%tmp7 = tail call i32 @quux13()
|
||||
br label %bb8
|
||||
|
||||
bb8: ; preds = %bb6, %bb2
|
||||
%tmp9 = load i32* @global7, align 4
|
||||
%tmp10 = icmp eq i32 %tmp9, 0
|
||||
br i1 %tmp10, label %bb11, label %bb15
|
||||
|
||||
bb11: ; preds = %bb8
|
||||
%tmp12 = load i32* @global6, align 4
|
||||
%tmp13 = add nsw i32 %tmp12, 1
|
||||
store i32 %tmp13, i32* @global6, align 4
|
||||
%tmp14 = icmp slt i32 %tmp13, 42
|
||||
br i1 %tmp14, label %bb1, label %bb15
|
||||
|
||||
bb15: ; preds = %bb11, %bb8
|
||||
ret void
|
||||
}
|
||||
|
||||
define i32 @quux13() {
|
||||
bb:
|
||||
store i32 1, i32* @global5, align 4
|
||||
ret i32 1
|
||||
}
|
||||
|
||||
define void @wombat() {
|
||||
bb:
|
||||
tail call void @zot12()
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @wombat14() {
|
||||
bb:
|
||||
tail call void @blam()
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @blam() {
|
||||
bb:
|
||||
store i32 ptrtoint (i32* @global to i32), i32* getelementptr inbounds (%struct.ham* @global8, i64 0, i32 0), align 4
|
||||
store i32 0, i32* @global9, align 4
|
||||
%tmp = load i32* getelementptr inbounds (%struct.ham* @global8, i64 0, i32 0), align 4
|
||||
br label %bb1
|
||||
|
||||
bb1: ; preds = %bb1, %bb
|
||||
%tmp2 = phi i32 [ 0, %bb ], [ %tmp11, %bb1 ]
|
||||
%tmp3 = phi i32 [ %tmp, %bb ], [ %tmp10, %bb1 ]
|
||||
%tmp4 = icmp sgt i32 %tmp3, 0
|
||||
%tmp5 = zext i1 %tmp4 to i32
|
||||
%tmp6 = urem i32 %tmp5, 5
|
||||
%tmp7 = mul i32 %tmp3, -80
|
||||
%tmp8 = or i32 %tmp7, %tmp6
|
||||
%tmp9 = icmp eq i32 %tmp8, 0
|
||||
%tmp10 = zext i1 %tmp9 to i32
|
||||
%tmp11 = add nsw i32 %tmp2, 1
|
||||
%tmp12 = icmp eq i32 %tmp11, 20
|
||||
br i1 %tmp12, label %bb13, label %bb1
|
||||
|
||||
bb13: ; preds = %bb1
|
||||
store i32 %tmp10, i32* getelementptr inbounds (%struct.ham* @global8, i64 0, i32 0), align 4
|
||||
store i32 0, i32* @global10, align 4
|
||||
store i32 %tmp6, i32* @global11, align 4
|
||||
store i32 20, i32* @global9, align 4
|
||||
ret void
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user