[LICM] Make store promotion work in the face of unordered atomics

Extend our store promotion code to deal with unordered atomic accesses. Ordered atomics continue to be unhandled.

Most of the change is straight-forward, the only complicated bit is in the reasoning around mixing of atomic and non-atomic memory access. Rather than trying to reason about the complex semantics in these cases, I simply disallowed promotion when both atomic and non-atomic accesses are present. This is conservatively correct.

It seems really tempting to just promote all access to atomics, but the original accesses might have been conditional. Since we can't lower an arbitrary atomic type, it might not be safe to promote all access to atomic. Consider a loop like the following:
while(b) {
  load i128 ...
  if (can lower i128 atomic)
    store atomic i128 ...
  else
    store i128
}

It could be there's no race on the location and thus the code is perfectly well defined even if we can't lower a i128 atomically. 

It's not clear we need to be this conservative - arguably the program above is brocken since it can't be lowered unless the branch is folded - but I didn't want to have to fix any fallout which might result.

Differential Revision: https://reviews.llvm.org/D15592



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@295015 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Philip Reames 2017-02-14 01:38:31 +00:00
parent 75ebce9b55
commit 33ee99ac22
3 changed files with 198 additions and 7 deletions

View File

@ -917,6 +917,7 @@ class LoopPromoter : public LoadAndStorePromoter {
LoopInfo &LI;
DebugLoc DL;
int Alignment;
bool UnorderedAtomic;
AAMDNodes AATags;
Value *maybeInsertLCSSAPHI(Value *V, BasicBlock *BB) const {
@ -940,10 +941,11 @@ public:
SmallVectorImpl<BasicBlock *> &LEB,
SmallVectorImpl<Instruction *> &LIP, PredIteratorCache &PIC,
AliasSetTracker &ast, LoopInfo &li, DebugLoc dl, int alignment,
const AAMDNodes &AATags)
bool UnorderedAtomic, const AAMDNodes &AATags)
: LoadAndStorePromoter(Insts, S), SomePtr(SP), PointerMustAliases(PMA),
LoopExitBlocks(LEB), LoopInsertPts(LIP), PredCache(PIC), AST(ast),
LI(li), DL(std::move(dl)), Alignment(alignment), AATags(AATags) {}
LI(li), DL(std::move(dl)), Alignment(alignment),
UnorderedAtomic(UnorderedAtomic),AATags(AATags) {}
bool isInstInList(Instruction *I,
const SmallVectorImpl<Instruction *> &) const override {
@ -967,6 +969,8 @@ public:
Value *Ptr = maybeInsertLCSSAPHI(SomePtr, ExitBlock);
Instruction *InsertPos = LoopInsertPts[i];
StoreInst *NewSI = new StoreInst(LiveInValue, Ptr, InsertPos);
if (UnorderedAtomic)
NewSI->setOrdering(AtomicOrdering::Unordered);
NewSI->setAlignment(Alignment);
NewSI->setDebugLoc(DL);
if (AATags)
@ -1057,6 +1061,9 @@ bool llvm::promoteLoopAccessesToScalars(
// We start with an alignment of one and try to find instructions that allow
// us to prove better alignment.
unsigned Alignment = 1;
// Keep track of which types of access we see
bool SawUnorderedAtomic = false;
bool SawNotAtomic = false;
AAMDNodes AATags;
const DataLayout &MDL = Preheader->getModule()->getDataLayout();
@ -1114,8 +1121,11 @@ bool llvm::promoteLoopAccessesToScalars(
// it.
if (LoadInst *Load = dyn_cast<LoadInst>(UI)) {
assert(!Load->isVolatile() && "AST broken");
if (!Load->isSimple())
if (!Load->isUnordered())
return false;
SawUnorderedAtomic |= Load->isAtomic();
SawNotAtomic |= !Load->isAtomic();
if (!DereferenceableInPH)
DereferenceableInPH = isSafeToExecuteUnconditionally(
@ -1126,9 +1136,12 @@ bool llvm::promoteLoopAccessesToScalars(
if (UI->getOperand(1) != ASIV)
continue;
assert(!Store->isVolatile() && "AST broken");
if (!Store->isSimple())
if (!Store->isUnordered())
return false;
SawUnorderedAtomic |= Store->isAtomic();
SawNotAtomic |= !Store->isAtomic();
// If the store is guaranteed to execute, both properties are satisfied.
// We may want to check if a store is guaranteed to execute even if we
// already know that promotion is safe, since it may have higher
@ -1181,6 +1194,12 @@ bool llvm::promoteLoopAccessesToScalars(
}
}
// If we found both an unordered atomic instruction and a non-atomic memory
// access, bail. We can't blindly promote non-atomic to atomic since we
// might not be able to lower the result. We can't downgrade since that
// would violate memory model. Also, align 0 is an error for atomics.
if (SawUnorderedAtomic && SawNotAtomic)
return false;
// If we couldn't prove we can hoist the load, bail.
if (!DereferenceableInPH)
@ -1224,12 +1243,15 @@ bool llvm::promoteLoopAccessesToScalars(
SmallVector<PHINode *, 16> NewPHIs;
SSAUpdater SSA(&NewPHIs);
LoopPromoter Promoter(SomePtr, LoopUses, SSA, PointerMustAliases, ExitBlocks,
InsertPts, PIC, *CurAST, *LI, DL, Alignment, AATags);
InsertPts, PIC, *CurAST, *LI, DL, Alignment,
SawUnorderedAtomic, AATags);
// Set up the preheader to have a definition of the value. It is the live-out
// value from the preheader that uses in the loop will use.
LoadInst *PreheaderLoad = new LoadInst(
SomePtr, SomePtr->getName() + ".promoted", Preheader->getTerminator());
if (SawUnorderedAtomic)
PreheaderLoad->setOrdering(AtomicOrdering::Unordered);
PreheaderLoad->setAlignment(Alignment);
PreheaderLoad->setDebugLoc(DL);
if (AATags)

View File

@ -60,8 +60,7 @@ end:
; CHECK-NEXT: br label %loop
}
; Don't try to "sink" unordered stores yet; it is legal, but the machinery
; isn't there.
; We can sink an unordered store
define i32 @test4(i32* nocapture noalias %x, i32* nocapture %y) nounwind uwtable ssp {
entry:
br label %loop
@ -75,6 +74,149 @@ loop:
end:
ret i32 %vala
; CHECK-LABEL: define i32 @test4(
; CHECK-LABEL: loop:
; CHECK: load atomic i32, i32* %y monotonic
; CHECK-NOT: store
; CHECK-LABEL: end:
; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i32 [ %vala
; CHECK: store atomic i32 %[[LCSSAPHI]], i32* %x unordered, align 4
}
; We currently don't handle ordered atomics.
define i32 @test5(i32* nocapture noalias %x, i32* nocapture %y) nounwind uwtable ssp {
entry:
br label %loop
loop:
%vala = load atomic i32, i32* %y monotonic, align 4
store atomic i32 %vala, i32* %x release, align 4
%exitcond = icmp ne i32 %vala, 0
br i1 %exitcond, label %end, label %loop
end:
ret i32 %vala
; CHECK-LABEL: define i32 @test5(
; CHECK: load atomic i32, i32* %y monotonic
; CHECK-NEXT: store atomic
}
; We currently don't touch volatiles
define i32 @test6(i32* nocapture noalias %x, i32* nocapture %y) nounwind uwtable ssp {
entry:
br label %loop
loop:
%vala = load atomic i32, i32* %y monotonic, align 4
store volatile i32 %vala, i32* %x, align 4
%exitcond = icmp ne i32 %vala, 0
br i1 %exitcond, label %end, label %loop
end:
ret i32 %vala
; CHECK-LABEL: define i32 @test6(
; CHECK: load atomic i32, i32* %y monotonic
; CHECK-NEXT: store volatile
}
; We currently don't touch volatiles
define i32 @test6b(i32* nocapture noalias %x, i32* nocapture %y) nounwind uwtable ssp {
entry:
br label %loop
loop:
%vala = load atomic i32, i32* %y monotonic, align 4
store atomic volatile i32 %vala, i32* %x unordered, align 4
%exitcond = icmp ne i32 %vala, 0
br i1 %exitcond, label %end, label %loop
end:
ret i32 %vala
; CHECK-LABEL: define i32 @test6b(
; CHECK: load atomic i32, i32* %y monotonic
; CHECK-NEXT: store atomic volatile
}
; Mixing unorder atomics and normal loads/stores is
; current unimplemented
define i32 @test7(i32* nocapture noalias %x, i32* nocapture %y) nounwind uwtable ssp {
entry:
br label %loop
loop:
store i32 5, i32* %x
%vala = load atomic i32, i32* %y monotonic, align 4
store atomic i32 %vala, i32* %x unordered, align 4
%exitcond = icmp ne i32 %vala, 0
br i1 %exitcond, label %end, label %loop
end:
ret i32 %vala
; CHECK-LABEL: define i32 @test7(
; CHECK: store i32 5, i32* %x
; CHECK-NEXT: load atomic i32, i32* %y
; CHECK-NEXT: store atomic i32
}
; Three provably noalias locations - we can sink normal and unordered, but
; not monotonic
define i32 @test7b(i32* nocapture noalias %x, i32* nocapture %y, i32* noalias nocapture %z) nounwind uwtable ssp {
entry:
br label %loop
loop:
store i32 5, i32* %x
%vala = load atomic i32, i32* %y monotonic, align 4
store atomic i32 %vala, i32* %z unordered, align 4
%exitcond = icmp ne i32 %vala, 0
br i1 %exitcond, label %end, label %loop
end:
ret i32 %vala
; CHECK-LABEL: define i32 @test7b(
; CHECK: load atomic i32, i32* %y monotonic
; CHECK-LABEL: end:
; CHECK: store i32 5, i32* %x
; CHECK: store atomic i32 %{{.+}}, i32* %z unordered, align 4
}
define i32 @test8(i32* nocapture noalias %x, i32* nocapture %y) {
entry:
br label %loop
loop:
%vala = load atomic i32, i32* %y monotonic, align 4
store atomic i32 %vala, i32* %x unordered, align 4
fence release
%exitcond = icmp ne i32 %vala, 0
br i1 %exitcond, label %end, label %loop
end:
ret i32 %vala
; CHECK-LABEL: define i32 @test8(
; CHECK-LABEL: loop:
; CHECK: load atomic i32, i32* %y monotonic
; CHECK-NEXT: store atomic
; CHECK-NEXT: fence
}
; Exact semantics of monotonic accesses are a bit vague in the C++ spec,
; for the moment, be conservative and don't touch them.
define i32 @test9(i32* nocapture noalias %x, i32* nocapture %y) {
entry:
br label %loop
loop:
%vala = load atomic i32, i32* %y monotonic, align 4
store atomic i32 %vala, i32* %x monotonic, align 4
%exitcond = icmp ne i32 %vala, 0
br i1 %exitcond, label %end, label %loop
end:
ret i32 %vala
; CHECK-LABEL: define i32 @test9(
; CHECK-LABEL: loop:
; CHECK: load atomic i32, i32* %y monotonic
; CHECK-NEXT: store atomic i32 %vala, i32* %x monotonic, align 4
}

View File

@ -378,6 +378,33 @@ exit:
ret i32 %ret
}
define void @test10(i32 %i) {
Entry:
br label %Loop
; CHECK-LABEL: @test10(
; CHECK: Entry:
; CHECK-NEXT: load atomic i32, i32* @X unordered, align 4
; CHECK-NEXT: br label %Loop
Loop: ; preds = %Loop, %0
%j = phi i32 [ 0, %Entry ], [ %Next, %Loop ] ; <i32> [#uses=1]
%x = load atomic i32, i32* @X unordered, align 4
%x2 = add i32 %x, 1
store atomic i32 %x2, i32* @X unordered, align 4
%Next = add i32 %j, 1
%cond = icmp eq i32 %Next, 0
br i1 %cond, label %Out, label %Loop
Out:
ret void
; CHECK: Out:
; CHECK-NEXT: %[[LCSSAPHI:.*]] = phi i32 [ %x2
; CHECK-NEXT: store atomic i32 %[[LCSSAPHI]], i32* @X unordered, align 4
; CHECK-NEXT: ret void
}
!0 = !{!4, !4, i64 0}
!1 = !{!"omnipotent char", !2}
!2 = !{!"Simple C/C++ TBAA"}