AArch64: use ldxp/stxp pair to implement 128-bit atomic loads.

The ARM ARM is clear that 128-bit loads are only guaranteed to have been atomic
if there has been a corresponding successful stxp. It's less clear for AArch32, so
I'm leaving that alone for now.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@254524 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Tim Northover 2015-12-02 18:12:57 +00:00
parent 454061bf3a
commit 57b1a9599b
6 changed files with 57 additions and 38 deletions

View File

@ -130,10 +130,12 @@ public:
/// support for these atomic instructions, and also have different options
/// w.r.t. what they should expand to.
enum class AtomicExpansionKind {
None, // Don't expand the instruction.
LLSC, // Expand the instruction into loadlinked/storeconditional; used
// by ARM/AArch64.
CmpXChg, // Expand the instruction into cmpxchg; used by at least X86.
None, // Don't expand the instruction.
LLSC, // Expand the instruction into loadlinked/storeconditional; used
// by ARM/AArch64.
LLOnly, // Expand the (load) instruction into just a load-linked, which has
// greater atomic guarantees than a normal load.
CmpXChg, // Expand the instruction into cmpxchg; used by at least X86.
};
static ISD::NodeType getExtendForContent(BooleanContent Content) {

View File

@ -51,7 +51,9 @@ namespace {
bool expandAtomicLoadToCmpXchg(LoadInst *LI);
bool expandAtomicStore(StoreInst *SI);
bool tryExpandAtomicRMW(AtomicRMWInst *AI);
bool expandAtomicRMWToLLSC(AtomicRMWInst *AI);
bool expandAtomicOpToLLSC(
Instruction *I, Value *Addr, AtomicOrdering MemOpOrder,
std::function<Value *(IRBuilder<> &, Value *)> PerformOp);
bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
bool isIdempotentRMW(AtomicRMWInst *AI);
bool simplifyIdempotentRMW(AtomicRMWInst *AI);
@ -174,13 +176,15 @@ bool AtomicExpand::tryExpandAtomicLoad(LoadInst *LI) {
switch (TLI->shouldExpandAtomicLoadInIR(LI)) {
case TargetLoweringBase::AtomicExpansionKind::None:
return false;
case TargetLoweringBase::AtomicExpansionKind::LLSC: {
case TargetLoweringBase::AtomicExpansionKind::LLSC:
return expandAtomicOpToLLSC(
LI, LI->getPointerOperand(), LI->getOrdering(),
[](IRBuilder<> &Builder, Value *Loaded) { return Loaded; });
case TargetLoweringBase::AtomicExpansionKind::LLOnly:
return expandAtomicLoadToLL(LI);
}
case TargetLoweringBase::AtomicExpansionKind::CmpXChg: {
case TargetLoweringBase::AtomicExpansionKind::CmpXChg:
return expandAtomicLoadToCmpXchg(LI);
}
}
llvm_unreachable("Unhandled case in tryExpandAtomicLoad");
}
@ -192,6 +196,7 @@ bool AtomicExpand::expandAtomicLoadToLL(LoadInst *LI) {
// to be single-copy atomic by ARM is an ldrexd (A3.5.3).
Value *Val =
TLI->emitLoadLinked(Builder, LI->getPointerOperand(), LI->getOrdering());
TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
LI->replaceAllUsesWith(Val);
LI->eraseFromParent();
@ -245,20 +250,6 @@ static void createCmpXchgInstFun(IRBuilder<> &Builder, Value *Addr,
NewLoaded = Builder.CreateExtractValue(Pair, 0, "newloaded");
}
bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
switch (TLI->shouldExpandAtomicRMWInIR(AI)) {
case TargetLoweringBase::AtomicExpansionKind::None:
return false;
case TargetLoweringBase::AtomicExpansionKind::LLSC: {
return expandAtomicRMWToLLSC(AI);
}
case TargetLoweringBase::AtomicExpansionKind::CmpXChg: {
return expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun);
}
}
llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
}
/// Emit IR to implement the given atomicrmw operation on values in registers,
/// returning the new value.
static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder,
@ -296,10 +287,28 @@ static Value *performAtomicOp(AtomicRMWInst::BinOp Op, IRBuilder<> &Builder,
}
}
bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) {
AtomicOrdering MemOpOrder = AI->getOrdering();
Value *Addr = AI->getPointerOperand();
BasicBlock *BB = AI->getParent();
bool AtomicExpand::tryExpandAtomicRMW(AtomicRMWInst *AI) {
switch (TLI->shouldExpandAtomicRMWInIR(AI)) {
case TargetLoweringBase::AtomicExpansionKind::None:
return false;
case TargetLoweringBase::AtomicExpansionKind::LLSC:
return expandAtomicOpToLLSC(AI, AI->getPointerOperand(), AI->getOrdering(),
[&](IRBuilder<> &Builder, Value *Loaded) {
return performAtomicOp(AI->getOperation(),
Builder, Loaded,
AI->getValOperand());
});
case TargetLoweringBase::AtomicExpansionKind::CmpXChg:
return expandAtomicRMWToCmpXchg(AI, createCmpXchgInstFun);
default:
llvm_unreachable("Unhandled case in tryExpandAtomicRMW");
}
}
bool AtomicExpand::expandAtomicOpToLLSC(
Instruction *I, Value *Addr, AtomicOrdering MemOpOrder,
std::function<Value *(IRBuilder<> &, Value *)> PerformOp) {
BasicBlock *BB = I->getParent();
Function *F = BB->getParent();
LLVMContext &Ctx = F->getContext();
@ -317,11 +326,11 @@ bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) {
// atomicrmw.end:
// fence?
// [...]
BasicBlock *ExitBB = BB->splitBasicBlock(AI->getIterator(), "atomicrmw.end");
BasicBlock *ExitBB = BB->splitBasicBlock(I->getIterator(), "atomicrmw.end");
BasicBlock *LoopBB = BasicBlock::Create(Ctx, "atomicrmw.start", F, ExitBB);
// This grabs the DebugLoc from AI.
IRBuilder<> Builder(AI);
// This grabs the DebugLoc from I.
IRBuilder<> Builder(I);
// The split call above "helpfully" added a branch at the end of BB (to the
// wrong place), but we might want a fence too. It's easiest to just remove
@ -334,8 +343,7 @@ bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) {
Builder.SetInsertPoint(LoopBB);
Value *Loaded = TLI->emitLoadLinked(Builder, Addr, MemOpOrder);
Value *NewVal =
performAtomicOp(AI->getOperation(), Builder, Loaded, AI->getValOperand());
Value *NewVal = PerformOp(Builder, Loaded);
Value *StoreSuccess =
TLI->emitStoreConditional(Builder, NewVal, Addr, MemOpOrder);
@ -345,8 +353,8 @@ bool AtomicExpand::expandAtomicRMWToLLSC(AtomicRMWInst *AI) {
Builder.SetInsertPoint(ExitBB, ExitBB->begin());
AI->replaceAllUsesWith(Loaded);
AI->eraseFromParent();
I->replaceAllUsesWith(Loaded);
I->eraseFromParent();
return true;
}

View File

@ -11891,7 +11891,7 @@ bool ARMTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
TargetLowering::AtomicExpansionKind
ARMTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
unsigned Size = LI->getType()->getPrimitiveSizeInBits();
return ((Size == 64) && !Subtarget->isMClass()) ? AtomicExpansionKind::LLSC
return ((Size == 64) && !Subtarget->isMClass()) ? AtomicExpansionKind::LLOnly
: AtomicExpansionKind::None;
}

View File

@ -2863,7 +2863,7 @@ TargetLowering::AtomicExpansionKind
HexagonTargetLowering::shouldExpandAtomicLoadInIR(LoadInst *LI) const {
// Do not expand loads and stores that don't exceed 64 bits.
return LI->getType()->getPrimitiveSizeInBits() > 64
? AtomicExpansionKind::LLSC
? AtomicExpansionKind::LLOnly
: AtomicExpansionKind::None;
}

View File

@ -173,10 +173,13 @@ define i128 @atomic_load_seq_cst(i128* %p) {
ret i128 %r
}
define i128 @atomic_load_relaxed(i128* %p) {
define i128 @atomic_load_relaxed(i64, i64, i128* %p) {
; CHECK-LABEL: atomic_load_relaxed:
; CHECK-NOT: dmb
; CHECK: ldxp [[LO:x[0-9]+]], [[HI:x[0-9]+]], [x0]
; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
; CHECK: ldxp [[LO:x[0-9]+]], [[HI:x[0-9]+]], [x2]
; CHECK-NEXT: stxp [[SUCCESS:w[0-9]+]], [[LO]], [[HI]], [x2]
; CHECK: cbnz [[SUCCESS]], [[LABEL]]
; CHECK-NOT: dmb
%r = load atomic i128, i128* %p monotonic, align 16
ret i128 %r

View File

@ -208,10 +208,16 @@ define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) {
define i64 @test8(i64* %ptr) {
; CHECK-LABEL: test8:
; CHECK: ldrexd [[REG1:(r[0-9]?[02468])]], [[REG2:(r[0-9]?[13579])]]
; CHECK-NOT: strexd
; CHECK: clrex
; CHECK-NOT: strexd
; CHECK: dmb {{ish$}}
; CHECK-THUMB-LABEL: test8:
; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]]
; CHECK-THUMB-NOT: strexd
; CHECK-THUMB: clrex
; CHECK-THUMB-NOT: strexd
; CHECK-THUMB: dmb {{ish$}}
%r = load atomic i64, i64* %ptr seq_cst, align 8