[EarlyCSE] Optionally use MemorySSA. NFC.

Summary:
Use MemorySSA, if requested, to do less conservative memory dependency
checking.

This change doesn't enable the MemorySSA enhanced EarlyCSE in the
default pipelines, so should be NFC.

Reviewers: dberlin, sanjoy, reames, majnemer

Subscribers: mcrosier, llvm-commits

Differential Revision: http://reviews.llvm.org/D19821

llvm-svn: 280279
This commit is contained in:
Geoff Berry 2016-08-31 19:24:10 +00:00
parent ed30533026
commit 4a45626e2f
22 changed files with 186 additions and 22 deletions

View File

@ -133,7 +133,7 @@ void LLVMAddVerifierPass(LLVMPassManagerRef PM);
void LLVMAddCorrelatedValuePropagationPass(LLVMPassManagerRef PM);
/** See llvm::createEarlyCSEPass function */
void LLVMAddEarlyCSEPass(LLVMPassManagerRef PM);
void LLVMAddEarlyCSEPass(LLVMPassManagerRef PM, int UseMemorySSA);
/** See llvm::createLowerExpectIntrinsicPass function */
void LLVMAddLowerExpectIntrinsicPass(LLVMPassManagerRef PM);

View File

@ -119,6 +119,7 @@ void initializeDominanceFrontierWrapperPassPass(PassRegistry&);
void initializeDominatorTreeWrapperPassPass(PassRegistry&);
void initializeDwarfEHPreparePass(PassRegistry&);
void initializeEarlyCSELegacyPassPass(PassRegistry &);
void initializeEarlyCSEMemSSALegacyPassPass(PassRegistry &);
void initializeEarlyIfConverterPass(PassRegistry&);
void initializeEdgeBundlesPass(PassRegistry&);
void initializeEfficiencySanitizerPass(PassRegistry&);

View File

@ -322,7 +322,7 @@ extern char &LCSSAID;
// EarlyCSE - This pass performs a simple and fast CSE pass over the dominator
// tree.
//
FunctionPass *createEarlyCSEPass();
FunctionPass *createEarlyCSEPass(bool UseMemorySSA = false);
//===----------------------------------------------------------------------===//
//

View File

@ -27,8 +27,12 @@ namespace llvm {
/// cases so that instcombine and other passes are more effective. It is
/// expected that a later pass of GVN will catch the interesting/hard cases.
struct EarlyCSEPass : PassInfoMixin<EarlyCSEPass> {
EarlyCSEPass(bool UseMemorySSA = false) : UseMemorySSA(UseMemorySSA) {}
/// \brief Run the pass over the function.
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM);
bool UseMemorySSA;
};
}

View File

@ -139,7 +139,8 @@ FUNCTION_PASS("consthoist", ConstantHoistingPass())
FUNCTION_PASS("correlated-propagation", CorrelatedValuePropagationPass())
FUNCTION_PASS("dce", DCEPass())
FUNCTION_PASS("dse", DSEPass())
FUNCTION_PASS("early-cse", EarlyCSEPass())
FUNCTION_PASS("early-cse", EarlyCSEPass(/*UseMemorySSA=*/false))
FUNCTION_PASS("early-cse-memssa", EarlyCSEPass(/*UseMemorySSA=*/true))
FUNCTION_PASS("gvn-hoist", GVNHoistPass())
FUNCTION_PASS("instcombine", InstCombinePass())
FUNCTION_PASS("instsimplify", InstSimplifierPass())

View File

@ -32,6 +32,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/Transforms/Scalar.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/MemorySSA.h"
#include <deque>
using namespace llvm;
using namespace llvm::PatternMatch;
@ -251,6 +252,7 @@ public:
const TargetTransformInfo &TTI;
DominatorTree &DT;
AssumptionCache &AC;
MemorySSA *MSSA;
typedef RecyclingAllocator<
BumpPtrAllocator, ScopedHashTableVal<SimpleValue, Value *>> AllocatorTy;
typedef ScopedHashTable<SimpleValue, Value *, DenseMapInfo<SimpleValue>,
@ -312,8 +314,8 @@ public:
/// \brief Set up the EarlyCSE runner for a particular function.
EarlyCSE(const TargetLibraryInfo &TLI, const TargetTransformInfo &TTI,
DominatorTree &DT, AssumptionCache &AC)
: TLI(TLI), TTI(TTI), DT(DT), AC(AC), CurrentGeneration(0) {}
DominatorTree &DT, AssumptionCache &AC, MemorySSA *MSSA)
: TLI(TLI), TTI(TTI), DT(DT), AC(AC), MSSA(MSSA), CurrentGeneration(0) {}
bool run();
@ -487,9 +489,61 @@ private:
return TTI.getOrCreateResultFromMemIntrinsic(cast<IntrinsicInst>(Inst),
ExpectedType);
}
bool isSameMemGeneration(unsigned EarlierGeneration, unsigned LaterGeneration,
Instruction *EarlierInst, Instruction *LaterInst);
void removeMSSA(Instruction *Inst) {
if (!MSSA)
return;
// FIXME: Removing a store here can leave MemorySSA in an unoptimized state
// by creating MemoryPhis that have identical arguments and by creating
// MemoryUses whose defining access is not an actual clobber.
if (MemoryAccess *MA = MSSA->getMemoryAccess(Inst))
MSSA->removeMemoryAccess(MA);
}
};
}
/// Determine if the memory referenced by LaterInst is from the same heap version
/// as EarlierInst.
/// This is currently called in two scenarios:
///
/// load p
/// ...
/// load p
///
/// and
///
/// x = load p
/// ...
/// store x, p
///
/// in both cases we want to verify that there are no possible writes to the
/// memory referenced by p between the earlier and later instruction.
bool EarlyCSE::isSameMemGeneration(unsigned EarlierGeneration,
unsigned LaterGeneration,
Instruction *EarlierInst,
Instruction *LaterInst) {
// Check the simple memory generation tracking first.
if (EarlierGeneration == LaterGeneration)
return true;
if (!MSSA)
return false;
// Since we know LaterDef dominates LaterInst and EarlierInst dominates
// LaterInst, if LaterDef dominates EarlierInst then it can't occur between
// EarlierInst and LaterInst and neither can any other write that potentially
// clobbers LaterInst.
// FIXME: This is currently fairly expensive since it does an AA check even
// for MemoryUses that were already optimized by MemorySSA construction.
// Re-visit once MemorySSA optimized use tracking change has been committed.
MemoryAccess *LaterDef =
MSSA->getWalker()->getClobberingMemoryAccess(LaterInst);
return MSSA->dominates(LaterDef, MSSA->getMemoryAccess(EarlierInst));
}
bool EarlyCSE::processNode(DomTreeNode *Node) {
bool Changed = false;
BasicBlock *BB = Node->getBlock();
@ -547,6 +601,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// Dead instructions should just be removed.
if (isInstructionTriviallyDead(Inst, &TLI)) {
DEBUG(dbgs() << "EarlyCSE DCE: " << *Inst << '\n');
removeMSSA(Inst);
Inst->eraseFromParent();
Changed = true;
++NumSimplify;
@ -601,6 +656,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
Changed = true;
}
if (isInstructionTriviallyDead(Inst, &TLI)) {
removeMSSA(Inst);
Inst->eraseFromParent();
Changed = true;
Killed = true;
@ -619,6 +675,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
if (auto *I = dyn_cast<Instruction>(V))
I->andIRFlags(Inst);
Inst->replaceAllUsesWith(V);
removeMSSA(Inst);
Inst->eraseFromParent();
Changed = true;
++NumCSE;
@ -649,19 +706,21 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// load.
LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
if (InVal.DefInst != nullptr &&
(InVal.Generation == CurrentGeneration ||
InVal.IsInvariant || MemInst.isInvariantLoad()) &&
InVal.MatchingId == MemInst.getMatchingId() &&
// We don't yet handle removing loads with ordering of any kind.
!MemInst.isVolatile() && MemInst.isUnordered() &&
// We can't replace an atomic load with one which isn't also atomic.
InVal.IsAtomic >= MemInst.isAtomic()) {
InVal.IsAtomic >= MemInst.isAtomic() &&
(InVal.IsInvariant || MemInst.isInvariantLoad() ||
isSameMemGeneration(InVal.Generation, CurrentGeneration,
InVal.DefInst, Inst))) {
Value *Op = getOrCreateResult(InVal.DefInst, Inst->getType());
if (Op != nullptr) {
DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst
<< " to: " << *InVal.DefInst << '\n');
if (!Inst->use_empty())
Inst->replaceAllUsesWith(Op);
removeMSSA(Inst);
Inst->eraseFromParent();
Changed = true;
++NumCSELoad;
@ -692,11 +751,14 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
// If we have an available version of this call, and if it is the right
// generation, replace this instruction.
std::pair<Instruction *, unsigned> InVal = AvailableCalls.lookup(Inst);
if (InVal.first != nullptr && InVal.second == CurrentGeneration) {
if (InVal.first != nullptr &&
isSameMemGeneration(InVal.second, CurrentGeneration, InVal.first,
Inst)) {
DEBUG(dbgs() << "EarlyCSE CSE CALL: " << *Inst
<< " to: " << *InVal.first << '\n');
if (!Inst->use_empty())
Inst->replaceAllUsesWith(InVal.first);
removeMSSA(Inst);
Inst->eraseFromParent();
Changed = true;
++NumCSECall;
@ -729,15 +791,22 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand());
if (InVal.DefInst &&
InVal.DefInst == getOrCreateResult(Inst, InVal.DefInst->getType()) &&
InVal.Generation == CurrentGeneration &&
InVal.MatchingId == MemInst.getMatchingId() &&
// We don't yet handle removing stores with ordering of any kind.
!MemInst.isVolatile() && MemInst.isUnordered()) {
!MemInst.isVolatile() && MemInst.isUnordered() &&
isSameMemGeneration(InVal.Generation, CurrentGeneration,
InVal.DefInst, Inst)) {
// It is okay to have a LastStore to a different pointer here if MemorySSA
// tells us that the load and store are from the same memory generation.
// In that case, LastStore should keep its present value since we're
// removing the current store.
assert((!LastStore ||
ParseMemoryInst(LastStore, TTI).getPointerOperand() ==
MemInst.getPointerOperand()) &&
"can't have an intervening store!");
MemInst.getPointerOperand() ||
MSSA) &&
"can't have an intervening store if not using MemorySSA!");
DEBUG(dbgs() << "EarlyCSE DSE (writeback): " << *Inst << '\n');
removeMSSA(Inst);
Inst->eraseFromParent();
Changed = true;
++NumDSE;
@ -769,6 +838,7 @@ bool EarlyCSE::processNode(DomTreeNode *Node) {
if (LastStoreMemInst.isMatchingMemLoc(MemInst)) {
DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore
<< " due to: " << *Inst << '\n');
removeMSSA(LastStore);
LastStore->eraseFromParent();
Changed = true;
++NumDSE;
@ -865,8 +935,10 @@ PreservedAnalyses EarlyCSEPass::run(Function &F,
auto &TTI = AM.getResult<TargetIRAnalysis>(F);
auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
auto &AC = AM.getResult<AssumptionAnalysis>(F);
auto *MSSA =
UseMemorySSA ? &AM.getResult<MemorySSAAnalysis>(F).getMSSA() : nullptr;
EarlyCSE CSE(TLI, TTI, DT, AC);
EarlyCSE CSE(TLI, TTI, DT, AC, MSSA);
if (!CSE.run())
return PreservedAnalyses::all();
@ -876,6 +948,8 @@ PreservedAnalyses EarlyCSEPass::run(Function &F,
PreservedAnalyses PA;
PA.preserve<DominatorTreeAnalysis>();
PA.preserve<GlobalsAA>();
if (UseMemorySSA)
PA.preserve<MemorySSAAnalysis>();
return PA;
}
@ -887,12 +961,16 @@ namespace {
/// canonicalize things as it goes. It is intended to be fast and catch obvious
/// cases so that instcombine and other passes are more effective. It is
/// expected that a later pass of GVN will catch the interesting/hard cases.
class EarlyCSELegacyPass : public FunctionPass {
template<bool UseMemorySSA>
class EarlyCSELegacyCommonPass : public FunctionPass {
public:
static char ID;
EarlyCSELegacyPass() : FunctionPass(ID) {
initializeEarlyCSELegacyPassPass(*PassRegistry::getPassRegistry());
EarlyCSELegacyCommonPass() : FunctionPass(ID) {
if (UseMemorySSA)
initializeEarlyCSEMemSSALegacyPassPass(*PassRegistry::getPassRegistry());
else
initializeEarlyCSELegacyPassPass(*PassRegistry::getPassRegistry());
}
bool runOnFunction(Function &F) override {
@ -903,8 +981,10 @@ public:
auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
auto *MSSA =
UseMemorySSA ? &getAnalysis<MemorySSAWrapperPass>().getMSSA() : nullptr;
EarlyCSE CSE(TLI, TTI, DT, AC);
EarlyCSE CSE(TLI, TTI, DT, AC, MSSA);
return CSE.run();
}
@ -914,15 +994,20 @@ public:
AU.addRequired<DominatorTreeWrapperPass>();
AU.addRequired<TargetLibraryInfoWrapperPass>();
AU.addRequired<TargetTransformInfoWrapperPass>();
if (UseMemorySSA) {
AU.addRequired<MemorySSAWrapperPass>();
AU.addPreserved<MemorySSAWrapperPass>();
}
AU.addPreserved<GlobalsAAWrapperPass>();
AU.setPreservesCFG();
}
};
}
char EarlyCSELegacyPass::ID = 0;
using EarlyCSELegacyPass = EarlyCSELegacyCommonPass</*UseMemorySSA=*/false>;
FunctionPass *llvm::createEarlyCSEPass() { return new EarlyCSELegacyPass(); }
template<>
char EarlyCSELegacyPass::ID = 0;
INITIALIZE_PASS_BEGIN(EarlyCSELegacyPass, "early-cse", "Early CSE", false,
false)
@ -931,3 +1016,26 @@ INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_END(EarlyCSELegacyPass, "early-cse", "Early CSE", false, false)
using EarlyCSEMemSSALegacyPass =
EarlyCSELegacyCommonPass</*UseMemorySSA=*/true>;
template<>
char EarlyCSEMemSSALegacyPass::ID = 0;
FunctionPass *llvm::createEarlyCSEPass(bool UseMemorySSA) {
if (UseMemorySSA)
return new EarlyCSEMemSSALegacyPass();
else
return new EarlyCSELegacyPass();
}
INITIALIZE_PASS_BEGIN(EarlyCSEMemSSALegacyPass, "early-cse-memssa",
"Early CSE w/ MemorySSA", false, false)
INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass)
INITIALIZE_PASS_DEPENDENCY(MemorySSAWrapperPass)
INITIALIZE_PASS_END(EarlyCSEMemSSALegacyPass, "early-cse-memssa",
"Early CSE w/ MemorySSA", false, false)

View File

@ -44,6 +44,7 @@ void llvm::initializeScalarOpts(PassRegistry &Registry) {
initializeGuardWideningLegacyPassPass(Registry);
initializeGVNLegacyPassPass(Registry);
initializeEarlyCSELegacyPassPass(Registry);
initializeEarlyCSEMemSSALegacyPassPass(Registry);
initializeGVNHoistLegacyPassPass(Registry);
initializeFlattenCFGPassPass(Registry);
initializeInductiveRangeCheckEliminationPass(Registry);
@ -233,8 +234,8 @@ void LLVMAddCorrelatedValuePropagationPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createCorrelatedValuePropagationPass());
}
void LLVMAddEarlyCSEPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createEarlyCSEPass());
void LLVMAddEarlyCSEPass(LLVMPassManagerRef PM, int UseMemorySSA) {
unwrap(PM)->add(createEarlyCSEPass(UseMemorySSA));
}
void LLVMAddGVNHoistLegacyPass(LLVMPassManagerRef PM) {

View File

@ -1,5 +1,7 @@
; RUN: opt < %s -S -mtriple=aarch64-none-linux-gnu -mattr=+neon -early-cse | FileCheck %s
; RUN: opt < %s -S -mtriple=aarch64-none-linux-gnu -mattr=+neon -basicaa -early-cse-memssa | FileCheck %s
; RUN: opt < %s -S -mtriple=aarch64-none-linux-gnu -mattr=+neon -passes=early-cse | FileCheck %s
; RUN: opt < %s -S -mtriple=aarch64-none-linux-gnu -mattr=+neon -aa-pipeline=basic-aa -passes=early-cse-memssa | FileCheck %s
define <4 x i32> @test_cse(i32* %a, [2 x <4 x i32>] %s.coerce, i32 %n) {
entry:

View File

@ -1,4 +1,5 @@
; RUN: opt -S -early-cse < %s | FileCheck %s
; RUN: opt -S -basicaa -early-cse-memssa < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
target triple = "aarch64--linux-gnu"

View File

@ -1,4 +1,5 @@
; RUN: opt < %s -S -early-cse | FileCheck %s
; RUN: opt < %s -S -basicaa -early-cse-memssa | FileCheck %s
; CHECK-LABEL: @test12(
define i32 @test12(i1 %B, i32* %P1, i32* %P2) {

View File

@ -1,4 +1,5 @@
; RUN: opt < %s -S -early-cse | FileCheck %s
; RUN: opt < %s -S -basicaa -early-cse-memssa | FileCheck %s
; RUN: opt < %s -S -passes=early-cse | FileCheck %s
declare void @llvm.assume(i1) nounwind

View File

@ -1,4 +1,5 @@
; RUN: opt < %s -S -early-cse | FileCheck %s
; RUN: opt < %s -S -basicaa -early-cse-memssa | FileCheck %s
; CHECK-LABEL: @test1(
define void @test1(float %A, float %B, float* %PA, float* %PB) {

View File

@ -1,4 +1,5 @@
; RUN: opt -early-cse -S < %s | FileCheck %s
; RUN: opt -basicaa -early-cse-memssa -S < %s | FileCheck %s
; Can we CSE a known condition to a constant?
define i1 @test(i8* %p) {

View File

@ -1,4 +1,5 @@
; RUN: opt -early-cse -S < %s | FileCheck %s
; RUN: opt -basicaa -early-cse-memssa -S < %s | FileCheck %s
; Same as GVN/edge.ll, but updated to reflect EarlyCSE's less powerful
; implementation. EarlyCSE currently doesn't exploit equality comparisons
; against constants.

View File

@ -1,4 +1,5 @@
; RUN: opt -S -early-cse < %s | FileCheck %s
; RUN: opt < %s -S -basicaa -early-cse-memssa | FileCheck %s
; NOTE: This file is testing the current implementation. Some of
; the transforms used as negative tests below would be legal, but
; only if reached through a chain of logic which EarlyCSE is incapable

View File

@ -1,4 +1,5 @@
; RUN: opt -early-cse -S < %s | FileCheck %s
; RUN: opt -basicaa -early-cse-memssa -S < %s | FileCheck %s
declare void @use(i1)

View File

@ -1,4 +1,5 @@
; RUN: opt < %s -S -early-cse | FileCheck %s
; RUN: opt < %s -S -basicaa -early-cse-memssa | FileCheck %s
; Ensure we don't simplify away additions vectors of +0.0's (same as scalars).
define <4 x float> @fV( <4 x float> %a) {

View File

@ -1,4 +1,5 @@
; RUN: opt -S -early-cse < %s | FileCheck %s
; RUN: opt < %s -S -basicaa -early-cse-memssa | FileCheck %s
declare void @llvm.experimental.guard(i1,...)

View File

@ -1,4 +1,5 @@
; RUN: opt -early-cse -S < %s | FileCheck %s
; RUN: opt -basicaa -early-cse-memssa -S < %s | FileCheck %s
; PR12231
declare i32 @f()

View File

@ -1,4 +1,5 @@
; RUN: opt -S -early-cse < %s | FileCheck %s
; RUN: opt -S -basicaa -early-cse-memssa < %s | FileCheck %s
declare void @clobber_and_use(i32)

View File

@ -0,0 +1,34 @@
; RUN: opt < %s -S -early-cse | FileCheck %s --check-prefix=CHECK-NOMEMSSA
; RUN: opt < %s -S -basicaa -early-cse-memssa | FileCheck %s
; RUN: opt < %s -S -passes='early-cse' | FileCheck %s --check-prefix=CHECK-NOMEMSSA
; RUN: opt < %s -S -aa-pipeline=basic-aa -passes='early-cse-memssa' | FileCheck %s
@G1 = global i32 zeroinitializer
@G2 = global i32 zeroinitializer
;; Simple load value numbering across non-clobbering store.
; CHECK-LABEL: @test1(
; CHECK-NOMEMSSA-LABEL: @test1(
define i32 @test1() {
%V1 = load i32, i32* @G1
store i32 0, i32* @G2
%V2 = load i32, i32* @G1
; CHECK-NOMEMSSA: sub i32 %V1, %V2
%Diff = sub i32 %V1, %V2
ret i32 %Diff
; CHECK: ret i32 0
}
;; Simple dead store elimination across non-clobbering store.
; CHECK-LABEL: @test2(
; CHECK-NOMEMSSA-LABEL: @test2(
define void @test2() {
entry:
%V1 = load i32, i32* @G1
; CHECK: store i32 0, i32* @G2
store i32 0, i32* @G2
; CHECK-NOT: store
; CHECK-NOMEMSSA: store i32 %V1, i32* @G1
store i32 %V1, i32* @G1
ret void
}

View File

@ -1,4 +1,5 @@
; RUN: opt -S -early-cse < %s | FileCheck %s
; RUN: opt -S -basicaa -early-cse-memssa < %s | FileCheck %s
target datalayout = "E-m:e-i64:64-n32:64"
target triple = "powerpc64-unknown-linux-gnu"