Add support for cross block dse.

This patch enables dead stroe elimination across basicblocks.

Example:
define void @test_02(i32 %N) {
  %1 = alloca i32
  store i32 %N, i32* %1
  store i32 10, i32* @x
  %2 = load i32, i32* %1
  %3 = icmp ne i32 %2, 0
  br i1 %3, label %4, label %5

; <label>:4
  store i32 5, i32* @x
  br label %7

; <label>:5
  %6 = load i32, i32* @x
  store i32 %6, i32* @y
  br label %7

; <label>:7
  store i32 15, i32* @x
  ret void
}
In the above example dead store "store i32 5, i32* @x" is now eliminated.

Differential Revision: http://reviews.llvm.org/D11143

llvm-svn: 245025
This commit is contained in:
Karthik Bhat 2015-08-14 04:17:23 +00:00
parent df256e0563
commit b20d3957b6
3 changed files with 403 additions and 50 deletions

View File

@ -16,13 +16,16 @@
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
#include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar.h"
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h" #include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h" #include "llvm/ADT/Statistic.h"
#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h"
#include "llvm/Analysis/PostDominators.h"
#include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/Constants.h" #include "llvm/IR/Constants.h"
@ -42,6 +45,7 @@ using namespace llvm;
STATISTIC(NumRedundantStores, "Number of redundant stores deleted"); STATISTIC(NumRedundantStores, "Number of redundant stores deleted");
STATISTIC(NumFastStores, "Number of stores deleted"); STATISTIC(NumFastStores, "Number of stores deleted");
STATISTIC(NumCrossBlockStores, "Number of cross block stores deleted");
STATISTIC(NumFastOther , "Number of other instrs removed"); STATISTIC(NumFastOther , "Number of other instrs removed");
namespace { namespace {
@ -49,12 +53,41 @@ namespace {
AliasAnalysis *AA; AliasAnalysis *AA;
MemoryDependenceAnalysis *MD; MemoryDependenceAnalysis *MD;
DominatorTree *DT; DominatorTree *DT;
PostDominatorTree *PDT;
const TargetLibraryInfo *TLI; const TargetLibraryInfo *TLI;
SmallVector<SmallVector<StoreInst *, 8>, 16> Candidates;
SetVector<StoreInst *> DeadStores;
SmallVector<std::pair<const BasicBlock *, const BasicBlock *>, 32>
BackEdges;
DenseSet<std::pair<const BasicBlock *, const BasicBlock *>> BackEdgesMap;
static char ID; // Pass identification, replacement for typeid static char ID; // Pass identification, replacement for typeid
DSE() : FunctionPass(ID), AA(nullptr), MD(nullptr), DT(nullptr) { DSE()
: FunctionPass(ID), AA(nullptr), MD(nullptr), DT(nullptr),
PDT(nullptr) {
initializeDSEPass(*PassRegistry::getPassRegistry()); initializeDSEPass(*PassRegistry::getPassRegistry());
} }
// Return all stores in a given BasicBlock.
SmallVector<StoreInst *, 8> getStores(BasicBlock *BB) {
SmallVector<StoreInst *, 8> VecStores;
for (auto &BI : *BB) {
if (StoreInst *SI = dyn_cast<StoreInst>(&BI))
VecStores.push_back(SI);
}
return VecStores;
}
// Get dfs in/out on the PDT and populate Candidates store list which
// is used to find potential dead stores for a given block
void populateCandidateStores(Function &F) {
for (auto &I : F) {
DomTreeNode *DTNode = PDT->getNode(&I);
if (!DTNode)
continue;
int DFSIn = DTNode->getDFSNumIn();
SmallVector<StoreInst *, 8> VecStores = getStores(&I);
Candidates[DFSIn] = VecStores;
}
}
bool runOnFunction(Function &F) override { bool runOnFunction(Function &F) override {
if (skipOptnoneFunction(F)) if (skipOptnoneFunction(F))
@ -64,7 +97,21 @@ namespace {
MD = &getAnalysis<MemoryDependenceAnalysis>(); MD = &getAnalysis<MemoryDependenceAnalysis>();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(); TLI = &getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
PDT = &getAnalysis<PostDominatorTree>();
if (PDT->getRootNode()) {
int Count = PDT->getRootNode()->getDFSNumOut();
SmallVector<StoreInst *, 8> VecStores;
Candidates.resize(Count + 1);
Candidates.assign(Count + 1, VecStores);
// If we have more than 1 block try to populate candidate store.
if (Count > 1) {
populateCandidateStores(F);
FindFunctionBackedges(F, BackEdges);
for (auto I : BackEdges)
BackEdgesMap.insert(I);
}
}
bool Changed = false; bool Changed = false;
for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
// Only check non-dead blocks. Dead blocks may have strange pointer // Only check non-dead blocks. Dead blocks may have strange pointer
@ -83,16 +130,23 @@ namespace {
void RemoveAccessedObjects(const MemoryLocation &LoadedLoc, void RemoveAccessedObjects(const MemoryLocation &LoadedLoc,
SmallSetVector<Value *, 16> &DeadStackObjects, SmallSetVector<Value *, 16> &DeadStackObjects,
const DataLayout &DL); const DataLayout &DL);
void handleNonLocalStoreDeletion(StoreInst *SI);
bool isSafeCandidateForDeletion(BasicBlock *SrcBlock, BasicBlock *SinkBlock,
StoreInst *SI);
void DeleteDeadInstruction(Instruction *I, MemoryDependenceAnalysis &MD,
const TargetLibraryInfo &TLI,
SmallSetVector<Value *, 16> *ValueSet = nullptr);
void getAnalysisUsage(AnalysisUsage &AU) const override { void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.setPreservesCFG(); AU.setPreservesCFG();
AU.addRequired<DominatorTreeWrapperPass>(); AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<AliasAnalysis>(); AU.addRequired<AliasAnalysis>();
AU.addRequired<MemoryDependenceAnalysis>(); AU.addRequired<MemoryDependenceAnalysis>();
AU.addRequired<PostDominatorTree>();
AU.addRequired<TargetLibraryInfoWrapperPass>(); AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addPreserved<AliasAnalysis>(); AU.addPreserved<AliasAnalysis>();
AU.addPreserved<DominatorTreeWrapperPass>(); AU.addPreserved<DominatorTreeWrapperPass>();
AU.addPreserved<MemoryDependenceAnalysis>(); AU.addPreserved<MemoryDependenceAnalysis>();
AU.addPreserved<PostDominatorTree>();
} }
}; };
} }
@ -102,6 +156,7 @@ INITIALIZE_PASS_BEGIN(DSE, "dse", "Dead Store Elimination", false, false)
INITIALIZE_AG_DEPENDENCY(AliasAnalysis) INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis) INITIALIZE_PASS_DEPENDENCY(MemoryDependenceAnalysis)
INITIALIZE_PASS_DEPENDENCY(PostDominatorTree)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(DSE, "dse", "Dead Store Elimination", false, false) INITIALIZE_PASS_END(DSE, "dse", "Dead Store Elimination", false, false)
@ -111,50 +166,6 @@ FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); }
// Helper functions // Helper functions
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
/// DeleteDeadInstruction - Delete this instruction. Before we do, go through
/// and zero out all the operands of this instruction. If any of them become
/// dead, delete them and the computation tree that feeds them.
///
/// If ValueSet is non-null, remove any deleted instructions from it as well.
///
static void DeleteDeadInstruction(Instruction *I,
MemoryDependenceAnalysis &MD,
const TargetLibraryInfo &TLI,
SmallSetVector<Value*, 16> *ValueSet = nullptr) {
SmallVector<Instruction*, 32> NowDeadInsts;
NowDeadInsts.push_back(I);
--NumFastOther;
// Before we touch this instruction, remove it from memdep!
do {
Instruction *DeadInst = NowDeadInsts.pop_back_val();
++NumFastOther;
// This instruction is dead, zap it, in stages. Start by removing it from
// MemDep, which needs to know the operands and needs it to be in the
// function.
MD.removeInstruction(DeadInst);
for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) {
Value *Op = DeadInst->getOperand(op);
DeadInst->setOperand(op, nullptr);
// If this operand just became dead, add it to the NowDeadInsts list.
if (!Op->use_empty()) continue;
if (Instruction *OpI = dyn_cast<Instruction>(Op))
if (isInstructionTriviallyDead(OpI, &TLI))
NowDeadInsts.push_back(OpI);
}
DeadInst->eraseFromParent();
if (ValueSet) ValueSet->remove(DeadInst);
} while (!NowDeadInsts.empty());
}
/// hasMemoryWrite - Does this instruction write some memory? This only returns /// hasMemoryWrite - Does this instruction write some memory? This only returns
/// true for things that we can analyze with other helpers below. /// true for things that we can analyze with other helpers below.
static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo &TLI) { static bool hasMemoryWrite(Instruction *I, const TargetLibraryInfo &TLI) {
@ -527,10 +538,15 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
MemDepResult InstDep = MD->getDependency(Inst); MemDepResult InstDep = MD->getDependency(Inst);
// Ignore any store where we can't find a local dependence. if (!InstDep.isDef() && !InstDep.isClobber() && !InstDep.isNonLocal())
// FIXME: cross-block DSE would be fun. :)
if (!InstDep.isDef() && !InstDep.isClobber())
continue; continue;
if (InstDep.isNonLocal()) {
if (!PDT->getRootNode())
continue;
if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
handleNonLocalStoreDeletion(SI);
continue;
}
// Figure out what location is being stored to. // Figure out what location is being stored to.
MemoryLocation Loc = getLocForWrite(Inst, *AA); MemoryLocation Loc = getLocForWrite(Inst, *AA);
@ -704,6 +720,50 @@ static void FindUnconditionalPreds(SmallVectorImpl<BasicBlock *> &Blocks,
} }
} }
/// DeleteDeadInstruction - Delete this instruction. Before we do, go through
/// and zero out all the operands of this instruction. If any of them become
/// dead, delete them and the computation tree that feeds them.
/// If ValueSet is non-null, remove any deleted instructions from it as well.
void DSE::DeleteDeadInstruction(Instruction *I, MemoryDependenceAnalysis &MD,
const TargetLibraryInfo &TLI,
SmallSetVector<Value *, 16> *ValueSet) {
SmallVector<Instruction *, 32> NowDeadInsts;
NowDeadInsts.push_back(I);
--NumFastOther;
// Before we touch this instruction, remove it from memdep!
do {
Instruction *DeadInst = NowDeadInsts.pop_back_val();
++NumFastOther;
if (StoreInst *SI = dyn_cast<StoreInst>(DeadInst))
DeadStores.insert(SI);
// This instruction is dead, zap it, in stages. Start by removing it from
// MemDep, which needs to know the operands and needs it to be in the
// function.
MD.removeInstruction(DeadInst);
for (unsigned op = 0, e = DeadInst->getNumOperands(); op != e; ++op) {
Value *Op = DeadInst->getOperand(op);
DeadInst->setOperand(op, nullptr);
// If this operand just became dead, add it to the NowDeadInsts list.
if (!Op->use_empty())
continue;
if (Instruction *OpI = dyn_cast<Instruction>(Op))
if (isInstructionTriviallyDead(OpI, &TLI))
NowDeadInsts.push_back(OpI);
}
DeadInst->eraseFromParent();
if (ValueSet)
ValueSet->remove(DeadInst);
} while (!NowDeadInsts.empty());
}
/// HandleFree - Handle frees of entire structures whose dependency is a store /// HandleFree - Handle frees of entire structures whose dependency is a store
/// to a field of that structure. /// to a field of that structure.
bool DSE::HandleFree(CallInst *F) { bool DSE::HandleFree(CallInst *F) {
@ -931,3 +991,116 @@ void DSE::RemoveAccessedObjects(const MemoryLocation &LoadedLoc,
return !AA->isNoAlias(StackLoc, LoadedLoc); return !AA->isNoAlias(StackLoc, LoadedLoc);
}); });
} }
/// isSafeCandidateForDeletion- Check all paths from the SrcBlock till
/// SinkBlock to see if Store 'SI' is safe to be remove.
/// Returns true if the candidate store SI is safe to delete
/// else returns false.
bool DSE::isSafeCandidateForDeletion(BasicBlock *SrcBlock,
BasicBlock *SinkBlock, StoreInst *SI) {
SmallVector<BasicBlock *, 16> WorkList;
SmallPtrSet<BasicBlock *, 8> Visited;
BasicBlock::iterator BBI(SI);
// Check from the store till end of block and make sure we have no references
// to memory stored by this Store Instruction.
for (auto BI = ++BBI, BE = SrcBlock->end(); BI != BE; ++BI) {
Instruction *I = BI;
StoreInst *CSI = dyn_cast<StoreInst>(I);
if (CSI) {
AliasResult R =
AA->alias(MemoryLocation::get(SI), MemoryLocation::get(CSI));
if (R == MustAlias)
return true;
} else {
ModRefInfo Res = AA->getModRefInfo(I, MemoryLocation::get(SI));
if (Res != MRI_NoModRef)
return false;
}
}
// Add successors of the block to stack and start DFS.
for (succ_iterator I = succ_begin(SrcBlock), E = succ_end(SrcBlock); I != E;
++I) {
if (!Visited.insert(*I).second)
continue;
// A path with backedge may not be safe. Conservatively mark
// this store unsafe.
if (BackEdgesMap.count(std::make_pair(SrcBlock, *I)))
return false;
WorkList.push_back(*I);
}
while (!WorkList.empty()) {
BasicBlock *B = WorkList.pop_back_val();
auto BI = B->begin();
auto BE = B->end();
for (; BI != BE; ++BI) {
Instruction *I = BI;
StoreInst *CSI = dyn_cast<StoreInst>(I);
if (CSI) {
AliasResult R =
AA->alias(MemoryLocation::get(SI), MemoryLocation::get(CSI));
if (R == MustAlias)
break;
} else {
ModRefInfo Res = AA->getModRefInfo(I, MemoryLocation::get(SI));
if (Res != MRI_NoModRef)
return false;
}
}
// If we reached the sink node or we found a block which has a stores that
// overwrites the candidate block we need not look at their successors.
if (B == SinkBlock || BI != BE)
continue;
for (succ_iterator I = succ_begin(B), E = succ_end(B); I != E; ++I) {
if (!Visited.insert(*I).second)
continue;
// A path with backedge may not be safe.Conservatively mark
// this store unsafe.
if (BackEdgesMap.count(std::make_pair(B, *I)))
return false;
WorkList.push_back(*I);
}
}
return true;
}
/// handleNonLocalStoreDeletion - Handle non local dead store elimination.
/// This works by finding candidate stores using PDT and then running DFS
/// from candidate store block checking all paths to make sure the store is
/// safe to delete.
void DSE::handleNonLocalStoreDeletion(StoreInst *SI) {
BasicBlock *BB = SI->getParent();
Value *Pointer = SI->getPointerOperand();
DomTreeNode *DTNode = PDT->getNode(BB);
if (!DTNode)
return;
int DFSNumIn = DTNode->getDFSNumIn();
int DFSNumOut = DTNode->getDFSNumOut();
for (int i = DFSNumIn + 1; i < DFSNumOut; ++i) {
for (auto &I : Candidates[i]) {
StoreInst *CandidateSI = I;
if (DeadStores.count(CandidateSI))
continue;
Value *MemPtr = CandidateSI->getPointerOperand();
if (!MemPtr)
continue;
if (Pointer->getType() != MemPtr->getType())
continue;
AliasResult R =
AA->alias(MemoryLocation::get(SI), MemoryLocation::get(CandidateSI));
if (R != MustAlias)
continue;
if (isSafeCandidateForDeletion(CandidateSI->getParent(), BB,
CandidateSI)) {
DeleteDeadInstruction(CandidateSI, *MD, *TLI);
++NumCrossBlockStores;
}
}
}
}

View File

@ -0,0 +1,76 @@
; RUN: opt < %s -basicaa -dse -S | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@x = common global i32 0
@y = common global i32 0
define void @test_01(i32 %N) {
%1 = alloca i32
store i32 %N, i32* %1
store i32 10, i32* @x
%2 = load i32, i32* %1
%3 = icmp ne i32 %2, 0
br i1 %3, label %4, label %5
; <label>:4
store i32 5, i32* @x
br label %5
; <label>:5
store i32 15, i32* @x
ret void
}
; CHECK-LABEL: @test_01(
; CHECK-NOT: store i32 10, i32* @x
; CHECK-NOT: store i32 5, i32* @x
; CHECK: store i32 15, i32* @x
define void @test_02(i32 %N) {
%1 = alloca i32
store i32 %N, i32* %1
store i32 10, i32* @x
%2 = load i32, i32* %1
%3 = icmp ne i32 %2, 0
br i1 %3, label %4, label %5
; <label>:4
store i32 5, i32* @x
br label %7
; <label>:5
%6 = load i32, i32* @x
store i32 %6, i32* @y
br label %7
; <label>:7
store i32 15, i32* @x
ret void
}
; CHECK-LABEL: @test_02(
; CHECK: store i32 10, i32* @x
; CHECK-NOT: store i32 5, i32* @x
; CHECK: store i32 %6, i32* @y
define void @test_03(i32 %N) #0 {
%1 = alloca i32
store i32 %N, i32* %1
store i32 10, i32* @x
%2 = load i32, i32* %1
%3 = icmp ne i32 %2, 0
br i1 %3, label %4, label %6
; <label>:4 ; preds = %0
%5 = load i32, i32* @x
store i32 %5, i32* @y
br label %6
; <label>:6 ; preds = %4, %0
store i32 15, i32* @x
ret void
}
; CHECK-LABEL: @test_03(
; CHECK: store i32 10, i32* @x
; CHECK: store i32 %5, i32* @y
; CHECK: store i32 15, i32* @x

View File

@ -0,0 +1,104 @@
; RUN: opt < %s -basicaa -dse -S | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@A = common global [100 x i32] zeroinitializer, align 16
@x = common global i32 0
; Negative Test case-
;void foo(int N) {
; A[0] = N;
; for(int i=0;i<N;++i)
; A[i]+=i;
; A[0] = 10;
;}
;; Stores should not be optimized away.
define void @test_01(i32 %N) #0 {
%1 = alloca i32
%i = alloca i32
store i32 %N, i32* %1
%2 = load i32, i32* %1
store i32 %2, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i32 0, i64 0)
store i32 0, i32* %i
br label %3
; <label>:3 ; preds = %14, %0
%4 = load i32, i32* %i
%5 = load i32, i32* %1
%6 = icmp slt i32 %4, %5
br i1 %6, label %7, label %17
; <label>:7 ; preds = %3
%8 = load i32, i32* %i
%9 = load i32, i32* %i
%10 = sext i32 %9 to i64
%11 = getelementptr inbounds [100 x i32], [100 x i32]* @A, i32 0, i64 %10
%12 = load i32, i32* %11
%13 = add nsw i32 %12, %8
store i32 %13, i32* %11
br label %14
; <label>:14 ; preds = %7
%15 = load i32, i32* %i
%16 = add nsw i32 %15, 1
store i32 %16, i32* %i
br label %3
; <label>:17 ; preds = %3
store i32 10, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i32 0, i64 0)
ret void
}
; CHECK-LABEL: @test_01(
; CHECK: store i32 %2, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i32 0, i64 0)
; CHECK: store i32 %13, i32* %11
; CHECK: store i32 10, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i32 0, i64 0)
; Postive Test case-
;void foo(int N) {
; A[0] = N;
; for(int i=0;i<N;++i)
; A[i]=i;
; A[0] = 10;
;}
;; Stores should not be optimized away.
define void @test_02(i32 %N) #0 {
%1 = alloca i32
%i = alloca i32
store i32 %N, i32* %1
%2 = load i32, i32* %1
store i32 %2, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i32 0, i64 0)
store i32 0, i32* %i
br label %3
; <label>:3 ; preds = %12, %0
%4 = load i32, i32* %i
%5 = load i32, i32* %1
%6 = icmp slt i32 %4, %5
br i1 %6, label %7, label %15
; <label>:7 ; preds = %3
%8 = load i32, i32* %i
%9 = load i32, i32* %i
%10 = sext i32 %9 to i64
%11 = getelementptr inbounds [100 x i32], [100 x i32]* @A, i32 0, i64 %10
store i32 %8, i32* %11
br label %12
; <label>:12 ; preds = %7
%13 = load i32, i32* %i
%14 = add nsw i32 %13, 1
store i32 %14, i32* %i
br label %3
; <label>:15 ; preds = %3
store i32 10, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i32 0, i64 0)
ret void
}
; CHECK-LABEL: @test_02(
; CHECK-NOT: store i32 %2, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i32 0, i64 0)
; CHECK: store i32 %7, i32* %10
; CHECK: store i32 10, i32* getelementptr inbounds ([100 x i32], [100 x i32]* @A, i32 0, i64 0)