[AtomicExpandPass]: Add a hook for custom cmpxchg expansion in IR

This involves changing the shouldExpandAtomicCmpXchgInIR interface, but I have 
updated the in-tree backends using this hook (ARM, AArch64, Hexagon) so they 
will see no functional change. Previously this hook returned bool, but it now 
returns AtomicExpansionKind.

This hook allows targets to select how a given cmpxchg is to be expanded. 
D48131 uses this to expand part-word cmpxchg to a target-specific intrinsic.

See my associated RFC for more info on the motivation for this change 
<http://lists.llvm.org/pipermail/llvm-dev/2018-June/123993.html>.

Differential Revision: https://reviews.llvm.org/D48130


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@342550 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Alex Bradbury 2018-09-19 14:51:42 +00:00
parent 80b0618b4c
commit 490f68fb29
8 changed files with 66 additions and 29 deletions

View File

@ -1574,6 +1574,15 @@ public:
llvm_unreachable("Masked atomicrmw expansion unimplemented on this target");
}
/// Perform a masked cmpxchg using a target-specific intrinsic. This
/// represents the core LL/SC loop which will be lowered at a late stage by
/// the backend.
virtual Value *emitMaskedAtomicCmpXchgIntrinsic(
IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
llvm_unreachable("Masked cmpxchg expansion unimplemented on this target");
}
/// Inserts in the IR a target-specific intrinsic specifying a fence.
/// It is called by AtomicExpandPass before expanding an
/// AtomicRMW/AtomicCmpXchg/AtomicStore/AtomicLoad
@ -1650,11 +1659,11 @@ public:
return AtomicExpansionKind::None;
}
/// Returns true if the given atomic cmpxchg should be expanded by the
/// IR-level AtomicExpand pass into a load-linked/store-conditional sequence
/// (through emitLoadLinked() and emitStoreConditional()).
virtual bool shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
return false;
/// Returns how the given atomic cmpxchg should be expanded by the IR-level
/// AtomicExpand pass.
virtual AtomicExpansionKind
shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
return AtomicExpansionKind::None;
}
/// Returns how the IR-level AtomicExpand pass should expand the given

View File

@ -98,6 +98,7 @@ namespace {
AtomicOrdering MemOpOrder,
function_ref<Value *(IRBuilder<> &, Value *)> PerformOp,
CreateCmpXchgInstFun CreateCmpXchg);
bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
bool isIdempotentRMW(AtomicRMWInst *RMWI);
@ -260,7 +261,9 @@ bool AtomicExpand::runOnFunction(Function &F) {
isAcquireOrStronger(RMWI->getOrdering()))) {
FenceOrdering = RMWI->getOrdering();
RMWI->setOrdering(AtomicOrdering::Monotonic);
} else if (CASI && !TLI->shouldExpandAtomicCmpXchgInIR(CASI) &&
} else if (CASI &&
TLI->shouldExpandAtomicCmpXchgInIR(CASI) ==
TargetLoweringBase::AtomicExpansionKind::None &&
(isReleaseOrStronger(CASI->getSuccessOrdering()) ||
isAcquireOrStronger(CASI->getSuccessOrdering()))) {
// If a compare and swap is lowered to LL/SC, we can do smarter fence
@ -334,16 +337,7 @@ bool AtomicExpand::runOnFunction(Function &F) {
MadeChange = true;
}
unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
unsigned ValueSize = getAtomicOpSize(CASI);
if (ValueSize < MinCASSize) {
assert(!TLI->shouldExpandAtomicCmpXchgInIR(CASI) &&
"MinCmpXchgSizeInBits not yet supported for LL/SC expansions.");
expandPartwordCmpXchg(CASI);
} else {
if (TLI->shouldExpandAtomicCmpXchgInIR(CASI))
MadeChange |= expandAtomicCmpXchg(CASI);
}
MadeChange |= tryExpandAtomicCmpXchg(CASI);
}
}
return MadeChange;
@ -1355,6 +1349,28 @@ Value *AtomicExpand::insertRMWCmpXchgLoop(
return NewLoaded;
}
bool AtomicExpand::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
unsigned ValueSize = getAtomicOpSize(CI);
switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) {
default:
llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg");
case TargetLoweringBase::AtomicExpansionKind::None:
if (ValueSize < MinCASSize)
expandPartwordCmpXchg(CI);
return false;
case TargetLoweringBase::AtomicExpansionKind::LLSC: {
assert(ValueSize >= MinCASSize &&
"MinCmpXchgSizeInBits not yet supported for LL/SC expansions.");
return expandAtomicCmpXchg(CI);
}
case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic:
llvm_unreachable(
"MaskedIntrinsic expansion of cmpxhg not yet implemented");
}
}
// Note: This function is exposed externally by AtomicExpandUtils.h
bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
CreateCmpXchgInstFun CreateCmpXchg) {

View File

@ -11377,16 +11377,20 @@ AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
return (Subtarget->hasLSE() && Size < 128) ? AtomicExpansionKind::None : AtomicExpansionKind::LLSC;
}
bool AArch64TargetLowering::shouldExpandAtomicCmpXchgInIR(
TargetLowering::AtomicExpansionKind
AArch64TargetLowering::shouldExpandAtomicCmpXchgInIR(
AtomicCmpXchgInst *AI) const {
// If subtarget has LSE, leave cmpxchg intact for codegen.
if (Subtarget->hasLSE()) return false;
if (Subtarget->hasLSE())
return AtomicExpansionKind::None;
// At -O0, fast-regalloc cannot cope with the live vregs necessary to
// implement cmpxchg without spilling. If the address being exchanged is also
// on the stack and close enough to the spill slot, this can lead to a
// situation where the monitor always gets cleared and the atomic operation
// can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
return getTargetMachine().getOptLevel() != 0;
if (getTargetMachine().getOptLevel() == 0)
return AtomicExpansionKind::None;
return AtomicExpansionKind::LLSC;
}
Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,

View File

@ -390,7 +390,8 @@ public:
TargetLoweringBase::AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
bool shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
TargetLoweringBase::AtomicExpansionKind
shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
bool useLoadStackGuardNode() const override;
TargetLoweringBase::LegalizeTypeAction

View File

@ -14561,16 +14561,18 @@ ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
: AtomicExpansionKind::None;
}
bool ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(
AtomicCmpXchgInst *AI) const {
TargetLowering::AtomicExpansionKind
ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
// At -O0, fast-regalloc cannot cope with the live vregs necessary to
// implement cmpxchg without spilling. If the address being exchanged is also
// on the stack and close enough to the spill slot, this can lead to a
// situation where the monitor always gets cleared and the atomic operation
// can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
bool hasAtomicCmpXchg =
bool HasAtomicCmpXchg =
!Subtarget->isThumb() || Subtarget->hasV8MBaselineOps();
return getTargetMachine().getOptLevel() != 0 && hasAtomicCmpXchg;
if (getTargetMachine().getOptLevel() != 0 && HasAtomicCmpXchg)
return AtomicExpansionKind::LLSC;
return AtomicExpansionKind::None;
}
bool ARMTargetLowering::shouldInsertFencesForAtomic(

View File

@ -538,7 +538,8 @@ class VectorType;
bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
TargetLoweringBase::AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
bool shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
TargetLoweringBase::AtomicExpansionKind
shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
bool useLoadStackGuardNode() const override;

View File

@ -3214,9 +3214,12 @@ bool HexagonTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() > 64;
}
bool HexagonTargetLowering::shouldExpandAtomicCmpXchgInIR(
AtomicCmpXchgInst *AI) const {
TargetLowering::AtomicExpansionKind
HexagonTargetLowering::shouldExpandAtomicCmpXchgInIR(
AtomicCmpXchgInst *AI) const {
const DataLayout &DL = AI->getModule()->getDataLayout();
unsigned Size = DL.getTypeStoreSize(AI->getCompareOperand()->getType());
return Size >= 4 && Size <= 8;
if (Size >= 4 && Size <= 8)
return AtomicExpansionKind::LLSC;
return AtomicExpansionKind::None;
}

View File

@ -311,7 +311,8 @@ namespace HexagonISD {
Value *Addr, AtomicOrdering Ord) const override;
AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
bool shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
AtomicExpansionKind
shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
AtomicExpansionKind
shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override {