[MemorySSA] Make insertDef insert corresponding phi nodes.

Summary:
The original assumption for the insertDef method was that it would not
materialize Defs out of no-where, hence it will not insert phis needed
after inserting a Def.

However, when cloning an instruction (use case used in LICM), we do
materialize Defs "out of no-where". If the block receiving a Def has at
least one other Def, then no processing is needed. If the block just
received its first Def, we must check where Phi placement is needed.
The only new usage of insertDef is in LICM, hence the trigger for the bug.

But the original goal of the method also fails to apply for the move()
method. If we move a Def from the entry point of a diamond to either the
left or right blocks, then the merge block must add a phi.
While this usecase does not currently occur, or may be viewed as an
incorrect transformation, MSSA must behave corectly given the scenario.

Resolves PR40749 and PR40754.

Reviewers: george.burgess.iv

Subscribers: sanjoy, jlebar, Prazek, jdoerfert, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D58652

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@355040 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Alina Sbirlea 2019-02-27 22:20:22 +00:00
parent 83855d225b
commit a1d5c6d1e8
5 changed files with 177 additions and 9 deletions

View File

@ -269,6 +269,8 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) {
// Also make sure we skip ourselves to avoid self references.
if (isa<MemoryUse>(U.getUser()) || U.getUser() == MD)
continue;
// Defs are automatically unoptimized when the user is set to MD below,
// because the isOptimized() call will fail to find the same ID.
U.set(MD);
}
}
@ -276,6 +278,9 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) {
// and that def is now our defining access.
MD->setDefiningAccess(DefBefore);
// Remember the index where we may insert new phis below.
unsigned NewPhiIndex = InsertedPHIs.size();
SmallVector<WeakVH, 8> FixupList(InsertedPHIs.begin(), InsertedPHIs.end());
if (!DefBeforeSameBlock) {
// If there was a local def before us, we must have the same effect it
@ -289,9 +294,49 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) {
// backwards to find the def. To make that work, we'd have to track whether
// getDefRecursive only ever used the single predecessor case. These types
// of paths also only exist in between CFG simplifications.
// If this is the first def in the block and this insert is in an arbitrary
// place, compute IDF and place phis.
auto Iter = MD->getDefsIterator();
++Iter;
auto IterEnd = MSSA->getBlockDefs(MD->getBlock())->end();
if (Iter == IterEnd) {
ForwardIDFCalculator IDFs(*MSSA->DT);
SmallVector<BasicBlock *, 32> IDFBlocks;
SmallPtrSet<BasicBlock *, 2> DefiningBlocks;
DefiningBlocks.insert(MD->getBlock());
IDFs.setDefiningBlocks(DefiningBlocks);
IDFs.calculate(IDFBlocks);
SmallVector<AssertingVH<MemoryPhi>, 4> NewInsertedPHIs;
for (auto *BBIDF : IDFBlocks)
if (!MSSA->getMemoryAccess(BBIDF))
NewInsertedPHIs.push_back(MSSA->createMemoryPhi(BBIDF));
for (auto &MPhi : NewInsertedPHIs) {
auto *BBIDF = MPhi->getBlock();
for (auto *Pred : predecessors(BBIDF)) {
DenseMap<BasicBlock *, TrackingVH<MemoryAccess>> CachedPreviousDef;
MPhi->addIncoming(getPreviousDefFromEnd(Pred, CachedPreviousDef),
Pred);
}
}
// Re-take the index where we're adding the new phis, because the above
// call to getPreviousDefFromEnd, may have inserted into InsertedPHIs.
NewPhiIndex = InsertedPHIs.size();
for (auto &MPhi : NewInsertedPHIs) {
InsertedPHIs.push_back(&*MPhi);
FixupList.push_back(&*MPhi);
}
}
FixupList.push_back(MD);
}
// Remember the index where we stopped inserting new phis above, since the
// fixupDefs call in the loop below may insert more, that are already minimal.
unsigned NewPhiIndexEnd = InsertedPHIs.size();
while (!FixupList.empty()) {
unsigned StartingPHISize = InsertedPHIs.size();
fixupDefs(FixupList);
@ -299,6 +344,15 @@ void MemorySSAUpdater::insertDef(MemoryDef *MD, bool RenameUses) {
// Put any new phis on the fixup list, and process them
FixupList.append(InsertedPHIs.begin() + StartingPHISize, InsertedPHIs.end());
}
// Optimize potentially non-minimal phis added in this method.
for (unsigned Idx = NewPhiIndex; Idx < NewPhiIndexEnd; ++Idx) {
if (auto *MPhi = cast_or_null<MemoryPhi>(InsertedPHIs[Idx])) {
auto OperRange = MPhi->operands();
tryRemoveTrivialPhi(MPhi, OperRange);
}
}
// Now that all fixups are done, rename all uses if we are asked.
if (RenameUses) {
SmallPtrSet<BasicBlock *, 16> Visited;

View File

@ -2068,6 +2068,8 @@ bool llvm::promoteLoopAccessesToScalars(
// stores in the loop.
Promoter.run(LoopUses);
if (MSSAU && VerifyMemorySSA)
MSSAU->getMemorySSA()->verifyMemorySSA();
// If the SSAUpdater didn't use the load in the preheader, just zap it now.
if (PreheaderLoad->use_empty())
eraseInstruction(*PreheaderLoad, *SafetyInfo, CurAST, MSSAU);

View File

@ -0,0 +1,58 @@
; RUN: opt -licm -enable-mssa-loop-dependency -verify-memoryssa -S < %s | FileCheck %s
; REQUIRES: asserts
target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64"
target triple = "systemz-unknown"
@g_3 = external dso_local local_unnamed_addr global i32, align 4
@g_57 = external dso_local local_unnamed_addr global i8, align 2
@g_82 = external dso_local global [8 x i16], align 2
@g_107 = external dso_local local_unnamed_addr global i32, align 4
define internal fastcc void @foo1() unnamed_addr{
; CHECK-LABEL: @foo1()
entry:
%.pre.pre = load i32, i32* @g_3, align 4
br label %loop1
loop1:
%tmp0 = phi i32 [ undef, %entry ], [ %var18.lcssa, %loopexit ]
br label %preheader
preheader:
%indvars.iv = phi i64 [ 0, %loop1 ], [ %indvars.iv.next, %loop6 ]
%phi18 = phi i32 [ %tmp0, %loop1 ], [ 0, %loop6 ]
%phi87 = phi i32 [ 0, %loop1 ], [ %tmp7, %loop6 ]
%tmp1 = getelementptr inbounds [8 x i16], [8 x i16]* @g_82, i64 0, i64 %indvars.iv
%tmp2 = load i16, i16* %tmp1, align 2
%tmp3 = trunc i16 %tmp2 to i8
store i8 %tmp3, i8* @g_57, align 2
store i32 8, i32* @g_107, align 4
%tmp4 = icmp eq i32 %.pre.pre, 0
%spec.select = select i1 %tmp4, i32 %phi18, i32 14
%tmp5 = trunc i64 %indvars.iv to i32
switch i32 %spec.select, label %loopexit [
i32 0, label %loop6
i32 14, label %loop9
]
loop6:
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
%tmp7 = add nuw nsw i32 %phi87, 1
%tmp8 = icmp ult i64 %indvars.iv.next, 6
br i1 %tmp8, label %preheader, label %loop9
loop9:
%phi8.lcssa = phi i32 [ %tmp5, %preheader ], [ %tmp7, %loop6 ]
%tmp10 = trunc i32 %phi8.lcssa to i8
%tmp11 = tail call i16* @func_101(i16* getelementptr inbounds ([8 x i16], [8 x i16]* @g_82, i64 0, i64 6), i16* undef, i8 zeroext %tmp10)
unreachable
loopexit:
%var18.lcssa = phi i32 [ %phi18, %preheader ]
br label %loop1
}
declare dso_local i16* @func_101(i16*, i16*, i8) local_unnamed_addr

View File

@ -0,0 +1,54 @@
; RUN: opt -licm -enable-mssa-loop-dependency -verify-memoryssa -S < %s | FileCheck %s
; REQUIRES: asserts
target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64"
target triple = "systemz-unknown"
@g_120 = external dso_local local_unnamed_addr global [8 x [4 x [6 x i32]]], align 4
@g_185 = external dso_local local_unnamed_addr global i32, align 4
@g_329 = external dso_local local_unnamed_addr global i16, align 2
; Function Attrs: norecurse noreturn nounwind
define dso_local void @func_65() local_unnamed_addr {
; CHECK-LABEL: @func_65()
br label %1
; <label>:1: ; preds = %.thread, %0
br label %2
; <label>:2: ; preds = %.critedge, %1
br label %3
; <label>:3: ; preds = %5, %2
%storemerge = phi i32 [ 0, %2 ], [ %6, %5 ]
store i32 %storemerge, i32* @g_185, align 4
%4 = icmp ult i32 %storemerge, 2
br i1 %4, label %5, label %.thread.loopexit
; <label>:5: ; preds = %3
%6 = add i32 %storemerge, 1
%7 = zext i32 %6 to i64
%8 = getelementptr [8 x [4 x [6 x i32]]], [8 x [4 x [6 x i32]]]* @g_120, i64 0, i64 undef, i64 %7, i64 undef
%9 = load i32, i32* %8, align 4
%10 = icmp eq i32 %9, 0
br i1 %10, label %3, label %11
; <label>:11: ; preds = %5
%storemerge.lcssa4 = phi i32 [ %storemerge, %5 ]
%12 = icmp eq i32 %storemerge.lcssa4, 0
br i1 %12, label %.critedge, label %.thread.loopexit3
.critedge: ; preds = %11
store i16 0, i16* @g_329, align 2
br label %2
.thread.loopexit: ; preds = %3
br label %.thread
.thread.loopexit3: ; preds = %11
br label %.thread
.thread: ; preds = %.thread.loopexit3, %.thread.loopexit
br label %1
}

View File

@ -159,15 +159,15 @@ TEST_F(MemorySSATest, CreateLoadsAndStoreUpdater) {
MemoryAccess *LeftStoreAccess = Updater.createMemoryAccessInBB(
LeftStore, nullptr, Left, MemorySSA::Beginning);
Updater.insertDef(cast<MemoryDef>(LeftStoreAccess), false);
// We don't touch existing loads, so we need to create a new one to get a phi
// MemoryPHI should exist after adding LeftStore.
MP = MSSA.getMemoryAccess(Merge);
EXPECT_NE(MP, nullptr);
// Add the second load
B.SetInsertPoint(Merge, Merge->begin());
LoadInst *SecondLoad = B.CreateLoad(B.getInt8Ty(), PointerArg);
// MemoryPHI should not already exist.
MP = MSSA.getMemoryAccess(Merge);
EXPECT_EQ(MP, nullptr);
// Create the load memory access
MemoryUse *SecondLoadAccess = cast<MemoryUse>(Updater.createMemoryAccessInBB(
SecondLoad, nullptr, Merge, MemorySSA::Beginning));
@ -226,14 +226,14 @@ TEST_F(MemorySSATest, CreateALoadUpdater) {
Updater.createMemoryAccessInBB(SI, nullptr, Left, MemorySSA::Beginning);
Updater.insertDef(cast<MemoryDef>(StoreAccess));
// MemoryPHI should be created when inserting the def
MemoryPhi *MP = MSSA.getMemoryAccess(Merge);
EXPECT_NE(MP, nullptr);
// Add the load
B.SetInsertPoint(Merge, Merge->begin());
LoadInst *LoadInst = B.CreateLoad(B.getInt8Ty(), PointerArg);
// MemoryPHI should not already exist.
MemoryPhi *MP = MSSA.getMemoryAccess(Merge);
EXPECT_EQ(MP, nullptr);
// Create the load memory acccess
MemoryUse *LoadAccess = cast<MemoryUse>(Updater.createMemoryAccessInBB(
LoadInst, nullptr, Merge, MemorySSA::Beginning));