mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-24 19:44:49 +00:00
[SROA] Fix another instability in SROA with respect to the slice
ordering. The fundamental problem that we're hitting here is that the use-def chain ordering is *itself* not a stable thing to be relying on in the rewriting for SROA. Further, we use a non-stable sort over the slices to arrange them based on the section of the alloca they're operating on. With a debugging STL implementation (or different implementations in stage2 and stage3) this can cause stage2 != stage3. The specific aspect of this problem fixed in this commit deals with the rewriting and load-speculation around PHIs and Selects. This, like many other aspects of the use-rewriting in SROA, is really part of the "strong SSA-formation" that is doen by SROA where it works very hard to canonicalize loads and stores in *just* the right way to satisfy the needs of mem2reg[1]. When we have a select (or a PHI) with 2 uses of the same alloca, we test that loads downstream of the select are speculatable around it twice. If only one of the operands to the select needs to be rewritten, then if we get lucky we rewrite that one first and the select is immediately speculatable. This can cause the order of operand visitation, and thus the order of slices to be rewritten, to change an alloca from promotable to non-promotable and vice versa. The fix is to defer all of the speculation until *after* the rewrite phase is done. Once we've rewritten everything, we can accurately test for whether speculation will work (once, instead of twice!) and the order ceases to matter. This also happens to simplify the other subtlety of speculation -- we need to *not* speculate anything unless the result of speculating will make the alloca fully promotable by mem2reg. I had a previous attempt at simplifying this, but it was still pretty horrible. There is actually already a *really* nice test case for this in basictest.ll, but on multiple STL implementations and inputs, we just got "lucky". Fortunately, the test case is very small and we can essentially build it in exactly the opposite way to get reasonable coverage in both directions even from normal STL implementations. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@202092 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
2a213a4532
commit
c537759a7f
@ -1954,9 +1954,9 @@ class AllocaSliceRewriter : public InstVisitor<AllocaSliceRewriter, bool> {
|
||||
Use *OldUse;
|
||||
Instruction *OldPtr;
|
||||
|
||||
// Output members carrying state about the result of visiting and rewriting
|
||||
// the slice of the alloca.
|
||||
bool IsUsedByRewrittenSpeculatableInstructions;
|
||||
// Track post-rewrite users which are PHI nodes and Selects.
|
||||
SmallPtrSetImpl<PHINode *> &PHIUsers;
|
||||
SmallPtrSetImpl<SelectInst *> &SelectUsers;
|
||||
|
||||
// Utility IR builder, whose name prefix is setup for each visited use, and
|
||||
// the insertion point is set to point to the user.
|
||||
@ -1966,8 +1966,9 @@ public:
|
||||
AllocaSliceRewriter(const DataLayout &DL, AllocaSlices &S, SROA &Pass,
|
||||
AllocaInst &OldAI, AllocaInst &NewAI,
|
||||
uint64_t NewBeginOffset, uint64_t NewEndOffset,
|
||||
bool IsVectorPromotable = false,
|
||||
bool IsIntegerPromotable = false)
|
||||
bool IsVectorPromotable, bool IsIntegerPromotable,
|
||||
SmallPtrSetImpl<PHINode *> &PHIUsers,
|
||||
SmallPtrSetImpl<SelectInst *> &SelectUsers)
|
||||
: DL(DL), S(S), Pass(Pass), OldAI(OldAI), NewAI(NewAI),
|
||||
NewAllocaBeginOffset(NewBeginOffset), NewAllocaEndOffset(NewEndOffset),
|
||||
NewAllocaTy(NewAI.getAllocatedType()),
|
||||
@ -1980,7 +1981,7 @@ public:
|
||||
DL.getTypeSizeInBits(NewAI.getAllocatedType()))
|
||||
: 0),
|
||||
BeginOffset(), EndOffset(), IsSplittable(), IsSplit(), OldUse(),
|
||||
OldPtr(), IsUsedByRewrittenSpeculatableInstructions(false),
|
||||
OldPtr(), PHIUsers(PHIUsers), SelectUsers(SelectUsers),
|
||||
IRB(NewAI.getContext(), ConstantFolder()) {
|
||||
if (VecTy) {
|
||||
assert((DL.getTypeSizeInBits(ElementTy) % 8) == 0 &&
|
||||
@ -2013,20 +2014,6 @@ public:
|
||||
return CanSROA;
|
||||
}
|
||||
|
||||
/// \brief Query whether this slice is used by speculatable instructions after
|
||||
/// rewriting.
|
||||
///
|
||||
/// These instructions (PHIs and Selects currently) require the alloca slice
|
||||
/// to run back through the rewriter. Thus, they are promotable, but not on
|
||||
/// this iteration. This is distinct from a slice which is unpromotable for
|
||||
/// some other reason, in which case we don't even want to perform the
|
||||
/// speculation. This can be querried at any time and reflects whether (at
|
||||
/// that point) a visit call has rewritten a speculatable instruction on the
|
||||
/// current slice.
|
||||
bool isUsedByRewrittenSpeculatableInstructions() const {
|
||||
return IsUsedByRewrittenSpeculatableInstructions;
|
||||
}
|
||||
|
||||
private:
|
||||
// Make sure the other visit overloads are visible.
|
||||
using Base::visit;
|
||||
@ -2658,16 +2645,11 @@ private:
|
||||
DEBUG(dbgs() << " to: " << PN << "\n");
|
||||
deleteIfTriviallyDead(OldPtr);
|
||||
|
||||
// Check whether we can speculate this PHI node, and if so remember that
|
||||
// fact and queue it up for another iteration after the speculation
|
||||
// occurs.
|
||||
if (isSafePHIToSpeculate(PN, &DL)) {
|
||||
Pass.SpeculatablePHIs.insert(&PN);
|
||||
IsUsedByRewrittenSpeculatableInstructions = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false; // PHIs can't be promoted on their own.
|
||||
// PHIs can't be promoted on their own, but often can be speculated. We
|
||||
// check the speculation outside of the rewriter so that we see the
|
||||
// fully-rewritten alloca.
|
||||
PHIUsers.insert(&PN);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool visitSelectInst(SelectInst &SI) {
|
||||
@ -2687,16 +2669,11 @@ private:
|
||||
DEBUG(dbgs() << " to: " << SI << "\n");
|
||||
deleteIfTriviallyDead(OldPtr);
|
||||
|
||||
// Check whether we can speculate this select instruction, and if so
|
||||
// remember that fact and queue it up for another iteration after the
|
||||
// speculation occurs.
|
||||
if (isSafeSelectToSpeculate(SI, &DL)) {
|
||||
Pass.SpeculatableSelects.insert(&SI);
|
||||
IsUsedByRewrittenSpeculatableInstructions = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false; // Selects can't be promoted on their own.
|
||||
// Selects can't be promoted on their own, but often can be speculated. We
|
||||
// check the speculation outside of the rewriter so that we see the
|
||||
// fully-rewritten alloca.
|
||||
SelectUsers.insert(&SI);
|
||||
return true;
|
||||
}
|
||||
|
||||
};
|
||||
@ -3132,17 +3109,17 @@ bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &S,
|
||||
<< "[" << BeginOffset << "," << EndOffset << ") to: " << *NewAI
|
||||
<< "\n");
|
||||
|
||||
// Track the high watermark on several worklists that are only relevant for
|
||||
// Track the high watermark on the worklist as it is only relevant for
|
||||
// promoted allocas. We will reset it to this point if the alloca is not in
|
||||
// fact scheduled for promotion.
|
||||
unsigned PPWOldSize = PostPromotionWorklist.size();
|
||||
unsigned SPOldSize = SpeculatablePHIs.size();
|
||||
unsigned SSOldSize = SpeculatableSelects.size();
|
||||
unsigned NumUses = 0;
|
||||
SmallPtrSet<PHINode *, 8> PHIUsers;
|
||||
SmallPtrSet<SelectInst *, 8> SelectUsers;
|
||||
|
||||
AllocaSliceRewriter Rewriter(*DL, S, *this, AI, *NewAI, BeginOffset,
|
||||
EndOffset, IsVectorPromotable,
|
||||
IsIntegerPromotable);
|
||||
IsIntegerPromotable, PHIUsers, SelectUsers);
|
||||
bool Promotable = true;
|
||||
for (ArrayRef<AllocaSlices::iterator>::const_iterator SUI = SplitUses.begin(),
|
||||
SUE = SplitUses.end();
|
||||
@ -3163,33 +3140,53 @@ bool SROA::rewritePartition(AllocaInst &AI, AllocaSlices &S,
|
||||
MaxUsesPerAllocaPartition =
|
||||
std::max<unsigned>(NumUses, MaxUsesPerAllocaPartition);
|
||||
|
||||
if (Promotable && !Rewriter.isUsedByRewrittenSpeculatableInstructions()) {
|
||||
DEBUG(dbgs() << " and queuing for promotion\n");
|
||||
PromotableAllocas.push_back(NewAI);
|
||||
} else if (NewAI != &AI ||
|
||||
(Promotable &&
|
||||
Rewriter.isUsedByRewrittenSpeculatableInstructions())) {
|
||||
// Now that we've processed all the slices in the new partition, check if any
|
||||
// PHIs or Selects would block promotion.
|
||||
for (SmallPtrSetImpl<PHINode *>::iterator I = PHIUsers.begin(),
|
||||
E = PHIUsers.end();
|
||||
I != E; ++I)
|
||||
if (!isSafePHIToSpeculate(**I, DL)) {
|
||||
Promotable = false;
|
||||
PHIUsers.clear();
|
||||
SelectUsers.clear();
|
||||
}
|
||||
for (SmallPtrSetImpl<SelectInst *>::iterator I = SelectUsers.begin(),
|
||||
E = SelectUsers.end();
|
||||
I != E; ++I)
|
||||
if (!isSafeSelectToSpeculate(**I, DL)) {
|
||||
Promotable = false;
|
||||
PHIUsers.clear();
|
||||
SelectUsers.clear();
|
||||
}
|
||||
|
||||
if (Promotable) {
|
||||
if (PHIUsers.empty() && SelectUsers.empty()) {
|
||||
// Promote the alloca.
|
||||
PromotableAllocas.push_back(NewAI);
|
||||
} else {
|
||||
// If we have either PHIs or Selects to speculate, add them to those
|
||||
// worklists and re-queue the new alloca so that we promote in on the
|
||||
// next iteration.
|
||||
for (SmallPtrSetImpl<PHINode *>::iterator I = PHIUsers.begin(),
|
||||
E = PHIUsers.end();
|
||||
I != E; ++I)
|
||||
SpeculatablePHIs.insert(*I);
|
||||
for (SmallPtrSetImpl<SelectInst *>::iterator I = SelectUsers.begin(),
|
||||
E = SelectUsers.end();
|
||||
I != E; ++I)
|
||||
SpeculatableSelects.insert(*I);
|
||||
Worklist.insert(NewAI);
|
||||
}
|
||||
} else {
|
||||
// If we can't promote the alloca, iterate on it to check for new
|
||||
// refinements exposed by splitting the current alloca. Don't iterate on an
|
||||
// alloca which didn't actually change and didn't get promoted.
|
||||
//
|
||||
// Alternatively, if we could promote the alloca but have speculatable
|
||||
// instructions then we will speculate them after finishing our processing
|
||||
// of the original alloca. Mark the new one for re-visiting in the next
|
||||
// iteration so the speculated operations can be rewritten.
|
||||
//
|
||||
// FIXME: We should actually track whether the rewriter changed anything.
|
||||
Worklist.insert(NewAI);
|
||||
}
|
||||
if (NewAI != &AI)
|
||||
Worklist.insert(NewAI);
|
||||
|
||||
// Drop any post-promotion work items if promotion didn't happen.
|
||||
if (!Promotable) {
|
||||
// Drop any post-promotion work items if promotion didn't happen.
|
||||
while (PostPromotionWorklist.size() > PPWOldSize)
|
||||
PostPromotionWorklist.pop_back();
|
||||
while (SpeculatablePHIs.size() > SPOldSize)
|
||||
SpeculatablePHIs.pop_back();
|
||||
while (SpeculatableSelects.size() > SSOldSize)
|
||||
SpeculatableSelects.pop_back();
|
||||
}
|
||||
|
||||
return true;
|
||||
|
@ -1317,6 +1317,28 @@ define void @PR15805(i1 %a, i1 %b) {
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @PR15805.1(i1 %a, i1 %b) {
|
||||
; Same as the normal PR15805, but rigged to place the use before the def inside
|
||||
; of looping unreachable code. This helps ensure that we aren't sensitive to the
|
||||
; order in which the uses of the alloca are visited.
|
||||
;
|
||||
; CHECK-LABEL: @PR15805.1(
|
||||
; CHECK-NOT: alloca
|
||||
; CHECK: ret void
|
||||
|
||||
%c = alloca i64, align 8
|
||||
br label %exit
|
||||
|
||||
loop:
|
||||
%cond.in = select i1 undef, i64* %c, i64* %p.0.c
|
||||
%p.0.c = select i1 undef, i64* %c, i64* %c
|
||||
%cond = load i64* %cond.in, align 8
|
||||
br i1 undef, label %loop, label %exit
|
||||
|
||||
exit:
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @PR16651.1(i8* %a) {
|
||||
; This test case caused a crash due to the volatile memcpy in combination with
|
||||
; lowering to integer loads and stores of a width other than that of the original
|
||||
|
Loading…
x
Reference in New Issue
Block a user