mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-03 01:12:53 +00:00
[AArch64] Emit clrex in the expanded cmpxchg fail block.
In the comparison failure block of a cmpxchg expansion, the initial ldrex/ldxr will not be followed by a matching strex/stxr. On ARM/AArch64, this unnecessarily ties up the execution monitor, which might have a negative performance impact on some uarchs. Instead, release the monitor in the failure block. The clrex instruction was designed for this: use it. Also see ARMARM v8-A B2.10.2: "Exclusive access instructions and Shareable memory locations". Differential Revision: http://reviews.llvm.org/D13033 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@248291 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
c8a4540af1
commit
ee629d8395
@ -1097,6 +1097,14 @@ public:
|
||||
}
|
||||
/// @}
|
||||
|
||||
// Emits code that executes when the comparison result in the ll/sc
|
||||
// expansion of a cmpxchg instruction is such that the store-conditional will
|
||||
// not execute. This makes it possible to balance out the load-linked with
|
||||
// a dedicated instruction, if desired.
|
||||
// E.g., on ARM, if ldrex isn't followed by strex, the exclusive monitor would
|
||||
// be unnecessarily held, except if clrex, inserted by this hook, is executed.
|
||||
virtual void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const {}
|
||||
|
||||
/// Returns true if the given (atomic) store should be expanded by the
|
||||
/// IR-level AtomicExpand pass into an "atomic xchg" which ignores its input.
|
||||
virtual bool shouldExpandAtomicStoreInIR(StoreInst *SI) const {
|
||||
|
@ -374,7 +374,7 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
|
||||
// %loaded = @load.linked(%addr)
|
||||
// %should_store = icmp eq %loaded, %desired
|
||||
// br i1 %should_store, label %cmpxchg.trystore,
|
||||
// label %cmpxchg.failure
|
||||
// label %cmpxchg.nostore
|
||||
// cmpxchg.trystore:
|
||||
// %stored = @store_conditional(%new, %addr)
|
||||
// %success = icmp eq i32 %stored, 0
|
||||
@ -382,6 +382,9 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
|
||||
// cmpxchg.success:
|
||||
// fence?
|
||||
// br label %cmpxchg.end
|
||||
// cmpxchg.nostore:
|
||||
// @load_linked_fail_balance()?
|
||||
// br label %cmpxchg.failure
|
||||
// cmpxchg.failure:
|
||||
// fence?
|
||||
// br label %cmpxchg.end
|
||||
@ -392,7 +395,8 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
|
||||
// [...]
|
||||
BasicBlock *ExitBB = BB->splitBasicBlock(CI, "cmpxchg.end");
|
||||
auto FailureBB = BasicBlock::Create(Ctx, "cmpxchg.failure", F, ExitBB);
|
||||
auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, FailureBB);
|
||||
auto NoStoreBB = BasicBlock::Create(Ctx, "cmpxchg.nostore", F, FailureBB);
|
||||
auto SuccessBB = BasicBlock::Create(Ctx, "cmpxchg.success", F, NoStoreBB);
|
||||
auto TryStoreBB = BasicBlock::Create(Ctx, "cmpxchg.trystore", F, SuccessBB);
|
||||
auto LoopBB = BasicBlock::Create(Ctx, "cmpxchg.start", F, TryStoreBB);
|
||||
|
||||
@ -416,7 +420,7 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
|
||||
|
||||
// If the cmpxchg doesn't actually need any ordering when it fails, we can
|
||||
// jump straight past that fence instruction (if it exists).
|
||||
Builder.CreateCondBr(ShouldStore, TryStoreBB, FailureBB);
|
||||
Builder.CreateCondBr(ShouldStore, TryStoreBB, NoStoreBB);
|
||||
|
||||
Builder.SetInsertPoint(TryStoreBB);
|
||||
Value *StoreSuccess = TLI->emitStoreConditional(
|
||||
@ -432,6 +436,13 @@ bool AtomicExpand::expandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
|
||||
/*IsLoad=*/true);
|
||||
Builder.CreateBr(ExitBB);
|
||||
|
||||
Builder.SetInsertPoint(NoStoreBB);
|
||||
// In the failing case, where we don't execute the store-conditional, the
|
||||
// target might want to balance out the load-linked with a dedicated
|
||||
// instruction (e.g., on ARM, clearing the exclusive monitor).
|
||||
TLI->emitAtomicCmpXchgNoStoreLLBalance(Builder);
|
||||
Builder.CreateBr(FailureBB);
|
||||
|
||||
Builder.SetInsertPoint(FailureBB);
|
||||
TLI->emitTrailingFence(Builder, FailureOrder, /*IsStore=*/true,
|
||||
/*IsLoad=*/true);
|
||||
|
@ -9682,6 +9682,13 @@ Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
|
||||
cast<PointerType>(Addr->getType())->getElementType());
|
||||
}
|
||||
|
||||
void AArch64TargetLowering::emitAtomicCmpXchgNoStoreLLBalance(
|
||||
IRBuilder<> &Builder) const {
|
||||
Module *M = Builder.GetInsertBlock()->getParent()->getParent();
|
||||
Builder.CreateCall(
|
||||
llvm::Intrinsic::getDeclaration(M, Intrinsic::aarch64_clrex));
|
||||
}
|
||||
|
||||
Value *AArch64TargetLowering::emitStoreConditional(IRBuilder<> &Builder,
|
||||
Value *Val, Value *Addr,
|
||||
AtomicOrdering Ord) const {
|
||||
|
@ -348,6 +348,8 @@ public:
|
||||
Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val,
|
||||
Value *Addr, AtomicOrdering Ord) const override;
|
||||
|
||||
void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const override;
|
||||
|
||||
TargetLoweringBase::AtomicExpansionKind
|
||||
shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
|
||||
bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
|
||||
|
@ -2,13 +2,17 @@
|
||||
|
||||
define i32 @val_compare_and_swap(i32* %p, i32 %cmp, i32 %new) #0 {
|
||||
; CHECK-LABEL: val_compare_and_swap:
|
||||
; CHECK-NEXT: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
|
||||
; CHECK-NEXT: ldaxr [[RESULT:w[0-9]+]], [x0]
|
||||
; CHECK-NEXT: mov x[[ADDR:[0-9]+]], x0
|
||||
; CHECK-NEXT: [[TRYBB:.?LBB[0-9_]+]]:
|
||||
; CHECK-NEXT: ldaxr [[RESULT:w[0-9]+]], [x[[ADDR]]]
|
||||
; CHECK-NEXT: cmp [[RESULT]], w1
|
||||
; CHECK-NEXT: b.ne [[LABEL2:.?LBB[0-9]+_[0-9]+]]
|
||||
; CHECK-NEXT: stxr [[SCRATCH_REG:w[0-9]+]], w2, [x0]
|
||||
; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[LABEL]]
|
||||
; CHECK-NEXT: [[LABEL2]]:
|
||||
; CHECK-NEXT: b.ne [[FAILBB:.?LBB[0-9_]+]]
|
||||
; CHECK-NEXT: stxr [[SCRATCH_REG:w[0-9]+]], w2, [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[TRYBB]]
|
||||
; CHECK-NEXT: b [[EXITBB:.?LBB[0-9_]+]]
|
||||
; CHECK-NEXT: [[FAILBB]]:
|
||||
; CHECK-NEXT: clrex
|
||||
; CHECK-NEXT: [[EXITBB]]:
|
||||
%pair = cmpxchg i32* %p, i32 %cmp, i32 %new acquire acquire
|
||||
%val = extractvalue { i32, i1 } %pair, 0
|
||||
ret i32 %val
|
||||
@ -17,13 +21,16 @@ define i32 @val_compare_and_swap(i32* %p, i32 %cmp, i32 %new) #0 {
|
||||
define i32 @val_compare_and_swap_from_load(i32* %p, i32 %cmp, i32* %pnew) #0 {
|
||||
; CHECK-LABEL: val_compare_and_swap_from_load:
|
||||
; CHECK-NEXT: ldr [[NEW:w[0-9]+]], [x2]
|
||||
; CHECK-NEXT: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
|
||||
; CHECK-NEXT: [[TRYBB:.?LBB[0-9_]+]]:
|
||||
; CHECK-NEXT: ldaxr [[RESULT:w[0-9]+]], [x0]
|
||||
; CHECK-NEXT: cmp [[RESULT]], w1
|
||||
; CHECK-NEXT: b.ne [[LABEL2:.?LBB[0-9]+_[0-9]+]]
|
||||
; CHECK-NEXT: b.ne [[FAILBB:.?LBB[0-9_]+]]
|
||||
; CHECK-NEXT: stxr [[SCRATCH_REG:w[0-9]+]], [[NEW]], [x0]
|
||||
; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[LABEL]]
|
||||
; CHECK-NEXT: [[LABEL2]]:
|
||||
; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[TRYBB]]
|
||||
; CHECK-NEXT: b [[EXITBB:.?LBB[0-9_]+]]
|
||||
; CHECK-NEXT: [[FAILBB]]:
|
||||
; CHECK-NEXT: clrex
|
||||
; CHECK-NEXT: [[EXITBB]]:
|
||||
%new = load i32, i32* %pnew
|
||||
%pair = cmpxchg i32* %p, i32 %cmp, i32 %new acquire acquire
|
||||
%val = extractvalue { i32, i1 } %pair, 0
|
||||
@ -32,13 +39,17 @@ define i32 @val_compare_and_swap_from_load(i32* %p, i32 %cmp, i32* %pnew) #0 {
|
||||
|
||||
define i32 @val_compare_and_swap_rel(i32* %p, i32 %cmp, i32 %new) #0 {
|
||||
; CHECK-LABEL: val_compare_and_swap_rel:
|
||||
; CHECK-NEXT: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
|
||||
; CHECK-NEXT: ldaxr [[RESULT:w[0-9]+]], [x0]
|
||||
; CHECK-NEXT: mov x[[ADDR:[0-9]+]], x0
|
||||
; CHECK-NEXT: [[TRYBB:.?LBB[0-9_]+]]:
|
||||
; CHECK-NEXT: ldaxr [[RESULT:w[0-9]+]], [x[[ADDR]]
|
||||
; CHECK-NEXT: cmp [[RESULT]], w1
|
||||
; CHECK-NEXT: b.ne [[LABEL2:.?LBB[0-9]+_[0-9]+]]
|
||||
; CHECK-NEXT: stlxr [[SCRATCH_REG:w[0-9]+]], w2, [x0]
|
||||
; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[LABEL]]
|
||||
; CHECK-NEXT: [[LABEL2]]:
|
||||
; CHECK-NEXT: b.ne [[FAILBB:.?LBB[0-9_]+]]
|
||||
; CHECK-NEXT: stlxr [[SCRATCH_REG:w[0-9]+]], w2, [x[[ADDR]]
|
||||
; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[TRYBB]]
|
||||
; CHECK-NEXT: b [[EXITBB:.?LBB[0-9_]+]]
|
||||
; CHECK-NEXT: [[FAILBB]]:
|
||||
; CHECK-NEXT: clrex
|
||||
; CHECK-NEXT: [[EXITBB]]:
|
||||
%pair = cmpxchg i32* %p, i32 %cmp, i32 %new acq_rel monotonic
|
||||
%val = extractvalue { i32, i1 } %pair, 0
|
||||
ret i32 %val
|
||||
@ -47,13 +58,16 @@ define i32 @val_compare_and_swap_rel(i32* %p, i32 %cmp, i32 %new) #0 {
|
||||
define i64 @val_compare_and_swap_64(i64* %p, i64 %cmp, i64 %new) #0 {
|
||||
; CHECK-LABEL: val_compare_and_swap_64:
|
||||
; CHECK-NEXT: mov x[[ADDR:[0-9]+]], x0
|
||||
; CHECK-NEXT: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
|
||||
; CHECK-NEXT: [[TRYBB:.?LBB[0-9_]+]]:
|
||||
; CHECK-NEXT: ldxr [[RESULT:x[0-9]+]], [x[[ADDR]]]
|
||||
; CHECK-NEXT: cmp [[RESULT]], x1
|
||||
; CHECK-NEXT: b.ne [[LABEL2:.?LBB[0-9]+_[0-9]+]]
|
||||
; CHECK-NEXT: b.ne [[FAILBB:.?LBB[0-9_]+]]
|
||||
; CHECK-NEXT: stxr [[SCRATCH_REG:w[0-9]+]], x2, [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[LABEL]]
|
||||
; CHECK-NEXT: [[LABEL2]]:
|
||||
; CHECK-NEXT: cbnz [[SCRATCH_REG]], [[TRYBB]]
|
||||
; CHECK-NEXT: b [[EXITBB:.?LBB[0-9_]+]]
|
||||
; CHECK-NEXT: [[FAILBB]]:
|
||||
; CHECK-NEXT: clrex
|
||||
; CHECK-NEXT: [[EXITBB]]:
|
||||
%pair = cmpxchg i64* %p, i64 %cmp, i64 %new monotonic monotonic
|
||||
%val = extractvalue { i64, i1 } %pair, 0
|
||||
ret i64 %val
|
||||
@ -61,13 +75,13 @@ define i64 @val_compare_and_swap_64(i64* %p, i64 %cmp, i64 %new) #0 {
|
||||
|
||||
define i32 @fetch_and_nand(i32* %p) #0 {
|
||||
; CHECK-LABEL: fetch_and_nand:
|
||||
; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
|
||||
; CHECK: [[TRYBB:.?LBB[0-9_]+]]:
|
||||
; CHECK: ldxr w[[DEST_REG:[0-9]+]], [x0]
|
||||
; CHECK: mvn [[TMP_REG:w[0-9]+]], w[[DEST_REG]]
|
||||
; CHECK: orr [[SCRATCH2_REG:w[0-9]+]], [[TMP_REG]], #0xfffffff8
|
||||
; CHECK-NOT: stlxr [[SCRATCH2_REG]], [[SCRATCH2_REG]]
|
||||
; CHECK: stlxr [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x0]
|
||||
; CHECK: cbnz [[SCRATCH_REG]], [[LABEL]]
|
||||
; CHECK: cbnz [[SCRATCH_REG]], [[TRYBB]]
|
||||
; CHECK: mov x0, x[[DEST_REG]]
|
||||
%val = atomicrmw nand i32* %p, i32 7 release
|
||||
ret i32 %val
|
||||
@ -76,12 +90,12 @@ define i32 @fetch_and_nand(i32* %p) #0 {
|
||||
define i64 @fetch_and_nand_64(i64* %p) #0 {
|
||||
; CHECK-LABEL: fetch_and_nand_64:
|
||||
; CHECK: mov x[[ADDR:[0-9]+]], x0
|
||||
; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
|
||||
; CHECK: [[TRYBB:.?LBB[0-9_]+]]:
|
||||
; CHECK: ldaxr x[[DEST_REG:[0-9]+]], [x[[ADDR]]]
|
||||
; CHECK: mvn w[[TMP_REG:[0-9]+]], w[[DEST_REG]]
|
||||
; CHECK: orr [[SCRATCH2_REG:x[0-9]+]], x[[TMP_REG]], #0xfffffffffffffff8
|
||||
; CHECK: stlxr [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x[[ADDR]]]
|
||||
; CHECK: cbnz [[SCRATCH_REG]], [[LABEL]]
|
||||
; CHECK: cbnz [[SCRATCH_REG]], [[TRYBB]]
|
||||
|
||||
%val = atomicrmw nand i64* %p, i64 7 acq_rel
|
||||
ret i64 %val
|
||||
@ -90,12 +104,12 @@ define i64 @fetch_and_nand_64(i64* %p) #0 {
|
||||
define i32 @fetch_and_or(i32* %p) #0 {
|
||||
; CHECK-LABEL: fetch_and_or:
|
||||
; CHECK: movz [[OLDVAL_REG:w[0-9]+]], #0x5
|
||||
; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
|
||||
; CHECK: [[TRYBB:.?LBB[0-9_]+]]:
|
||||
; CHECK: ldaxr w[[DEST_REG:[0-9]+]], [x0]
|
||||
; CHECK: orr [[SCRATCH2_REG:w[0-9]+]], w[[DEST_REG]], [[OLDVAL_REG]]
|
||||
; CHECK-NOT: stlxr [[SCRATCH2_REG]], [[SCRATCH2_REG]]
|
||||
; CHECK: stlxr [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x0]
|
||||
; CHECK: cbnz [[SCRATCH_REG]], [[LABEL]]
|
||||
; CHECK: cbnz [[SCRATCH_REG]], [[TRYBB]]
|
||||
; CHECK: mov x0, x[[DEST_REG]]
|
||||
%val = atomicrmw or i32* %p, i32 5 seq_cst
|
||||
ret i32 %val
|
||||
@ -104,11 +118,11 @@ define i32 @fetch_and_or(i32* %p) #0 {
|
||||
define i64 @fetch_and_or_64(i64* %p) #0 {
|
||||
; CHECK: fetch_and_or_64:
|
||||
; CHECK: mov x[[ADDR:[0-9]+]], x0
|
||||
; CHECK: [[LABEL:.?LBB[0-9]+_[0-9]+]]:
|
||||
; CHECK: [[TRYBB:.?LBB[0-9_]+]]:
|
||||
; CHECK: ldxr [[DEST_REG:x[0-9]+]], [x[[ADDR]]]
|
||||
; CHECK: orr [[SCRATCH2_REG:x[0-9]+]], [[DEST_REG]], #0x7
|
||||
; CHECK: stxr [[SCRATCH_REG:w[0-9]+]], [[SCRATCH2_REG]], [x[[ADDR]]]
|
||||
; CHECK: cbnz [[SCRATCH_REG]], [[LABEL]]
|
||||
; CHECK: cbnz [[SCRATCH_REG]], [[TRYBB]]
|
||||
%val = atomicrmw or i64* %p, i64 7 monotonic
|
||||
ret i64 %val
|
||||
}
|
||||
|
@ -893,6 +893,8 @@ define i8 @test_atomic_cmpxchg_i8(i8 %wanted, i8 %new) nounwind {
|
||||
; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
|
||||
; CHECK: stxrb [[STATUS:w[0-9]+]], {{w[0-9]+}}, [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
|
||||
; CHECK: [[GET_OUT]]:
|
||||
; CHECK: clrex
|
||||
; CHECK-NOT: dmb
|
||||
|
||||
; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
|
||||
@ -916,6 +918,8 @@ define i16 @test_atomic_cmpxchg_i16(i16 %wanted, i16 %new) nounwind {
|
||||
; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
|
||||
; CHECK: stlxrh [[STATUS:w[0-9]+]], {{w[0-9]+}}, [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
|
||||
; CHECK: [[GET_OUT]]:
|
||||
; CHECK: clrex
|
||||
; CHECK-NOT: dmb
|
||||
|
||||
; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
|
||||
@ -927,21 +931,21 @@ define i32 @test_atomic_cmpxchg_i32(i32 %wanted, i32 %new) nounwind {
|
||||
%pair = cmpxchg i32* @var32, i32 %wanted, i32 %new release monotonic
|
||||
%old = extractvalue { i32, i1 } %pair, 0
|
||||
|
||||
; CHECK: mov {{[xw]}}[[WANTED:[0-9]+]], {{[xw]}}0
|
||||
|
||||
; CHECK-NOT: dmb
|
||||
; CHECK: adrp [[TMPADDR:x[0-9]+]], var32
|
||||
; CHECK: add x[[ADDR:[0-9]+]], [[TMPADDR]], {{#?}}:lo12:var32
|
||||
|
||||
; CHECK: [[STARTAGAIN:.LBB[0-9]+_[0-9]+]]:
|
||||
; CHECK: ldxr w[[OLD:[0-9]+]], [x[[ADDR]]]
|
||||
; w0 below is a reasonable guess but could change: it certainly comes into the
|
||||
; function there.
|
||||
; CHECK-NEXT: cmp w[[OLD]], w0
|
||||
; CHECK-NEXT: cmp w[[OLD]], w[[WANTED]]
|
||||
; CHECK-NEXT: b.ne [[GET_OUT:.LBB[0-9]+_[0-9]+]]
|
||||
; CHECK: stlxr [[STATUS:w[0-9]+]], {{w[0-9]+}}, [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
|
||||
; CHECK: [[GET_OUT]]:
|
||||
; CHECK: clrex
|
||||
; CHECK-NOT: dmb
|
||||
|
||||
; CHECK: mov {{[xw]}}0, {{[xw]}}[[OLD]]
|
||||
ret i32 %old
|
||||
}
|
||||
|
||||
@ -963,6 +967,8 @@ define void @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
|
||||
; As above, w1 is a reasonable guess.
|
||||
; CHECK: stxr [[STATUS:w[0-9]+]], x1, [x[[ADDR]]]
|
||||
; CHECK-NEXT: cbnz [[STATUS]], [[STARTAGAIN]]
|
||||
; CHECK: [[GET_OUT]]:
|
||||
; CHECK: clrex
|
||||
; CHECK-NOT: dmb
|
||||
|
||||
; CHECK: str x[[OLD]],
|
||||
|
Loading…
Reference in New Issue
Block a user