Teach InstCombine to nuke a common alloca pattern -- an alloca which has

GEPs, bit casts, and stores reaching it but no other instructions. These
often show up during the iterative processing of the inliner, SROA, and
DCE. Once we hit this point, we can completely remove the alloca. These
were actually showing up in the final, fully optimized code in a bunch
of inliner tests I've been working on, and notably they show up after
LLVM finishes optimizing away all function calls involved in
hash_combine(a, b).

llvm-svn: 154285
This commit is contained in:
Chandler Carruth 2012-04-08 14:36:56 +00:00
parent 8957364ae5
commit b3fb4be360
2 changed files with 114 additions and 1 deletions

View File

@ -22,6 +22,72 @@ using namespace llvm;
STATISTIC(NumDeadStore, "Number of dead stores eliminated");
// Try to kill dead allocas by walking through its uses until we see some use
// that could escape. This is a conservative analysis which tries to handle
// GEPs, bitcasts, stores, and no-op intrinsics. These tend to be the things
// left after inlining and SROA finish chewing on an alloca.
static Instruction *removeDeadAlloca(InstCombiner &IC, AllocaInst &AI) {
SmallVector<Instruction *, 4> Worklist, DeadStores;
Worklist.push_back(&AI);
do {
Instruction *PI = Worklist.pop_back_val();
for (Value::use_iterator UI = PI->use_begin(), UE = PI->use_end();
UI != UE; ++UI) {
Instruction *I = cast<Instruction>(*UI);
switch (I->getOpcode()) {
default:
// Give up the moment we see something we can't handle.
return 0;
case Instruction::GetElementPtr:
case Instruction::BitCast:
Worklist.push_back(I);
continue;
case Instruction::Call:
// We can handle a limited subset of calls to no-op intrinsics.
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
case Intrinsic::dbg_declare:
case Intrinsic::dbg_value:
case Intrinsic::invariant_start:
case Intrinsic::invariant_end:
case Intrinsic::lifetime_start:
case Intrinsic::lifetime_end:
continue;
default:
return 0;
}
}
// Reject everything else.
return 0;
case Instruction::Store: {
// Stores into the alloca are only live if the alloca is live.
StoreInst *SI = cast<StoreInst>(I);
// We can eliminate atomic stores, but not volatile.
if (SI->isVolatile())
return 0;
// The store is only trivially safe if the poniter is the destination
// as opposed to the value. We're conservative here and don't check for
// the case where we store the address of a dead alloca into a dead
// alloca.
if (SI->getPointerOperand() != PI)
return 0;
DeadStores.push_back(I);
continue;
}
}
}
} while (!Worklist.empty());
// The alloca is dead. Kill off all the stores to it, and then replace it
// with undef.
while (!DeadStores.empty())
IC.EraseInstFromFunction(*DeadStores.pop_back_val());
return IC.ReplaceInstUsesWith(AI, UndefValue::get(AI.getType()));
}
Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
// Ensure that the alloca array size argument has type intptr_t, so that
// any casting is exposed early.
@ -81,7 +147,10 @@ Instruction *InstCombiner::visitAllocaInst(AllocaInst &AI) {
AI.setAlignment(TD->getPrefTypeAlignment(AI.getAllocatedType()));
}
return 0;
// Try to aggressively remove allocas which are only used for GEPs, lifetime
// markers, and stores. This happens when SROA iteratively promotes stores
// out of the alloca, and we need to cleanup after it.
return removeDeadAlloca(*this, AI);
}

View File

@ -44,3 +44,47 @@ define i32* @test4(i32 %n) {
%A = alloca i32, i32 %n
ret i32* %A
}
; Allocas which are only used by GEPs, bitcasts, and stores (transitively)
; should be deleted.
define void @test5() {
; CHECK: @test5
; CHECK-NOT: alloca
; CHECK-NOT: store
; CHECK: ret
entry:
%a = alloca { i32 }
%b = alloca i32*
%a.1 = getelementptr { i32 }* %a, i32 0, i32 0
store i32 123, i32* %a.1
store i32* %a.1, i32** %b
%b.1 = bitcast i32** %b to i32*
store i32 123, i32* %b.1
%a.2 = getelementptr { i32 }* %a, i32 0, i32 0
store atomic i32 2, i32* %a.2 unordered, align 4
%a.3 = getelementptr { i32 }* %a, i32 0, i32 0
store atomic i32 3, i32* %a.3 release, align 4
%a.4 = getelementptr { i32 }* %a, i32 0, i32 0
store atomic i32 4, i32* %a.4 seq_cst, align 4
ret void
}
declare void @f(i32* %p)
; Check that we don't delete allocas in some erroneous cases.
define void @test6() {
; CHECK: @test6
; CHECK-NOT: ret
; CHECK: alloca
; CHECK-NEXT: alloca
; CHECK: ret
entry:
%a = alloca { i32 }
%b = alloca i32
%a.1 = getelementptr { i32 }* %a, i32 0, i32 0
store volatile i32 123, i32* %a.1
tail call void @f(i32* %b)
ret void
}