[AtomicExpandPass]: Add a hook for custom cmpxchg expansion in IR

This involves changing the shouldExpandAtomicCmpXchgInIR interface, but I have updated the in-tree backends using this hook (ARM, AArch64, Hexagon) so they will see no functional change. Previously this hook returned bool, but it now returns AtomicExpansionKind. This hook allows targets to select how a given cmpxchg is to be expanded. D48131 uses this to expand part-word cmpxchg to a target-specific intrinsic. See my associated RFC for more info on the motivation for this change <http://lists.llvm.org/pipermail/llvm-dev/2018-June/123993.html>. Differential Revision: https://reviews.llvm.org/D48130 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@342550 91177308-0d34-0410-b5e6-96231b3b80d8
2025-04-05 23:01:38 +00:00 · 2018-09-19 14:51:42 +00:00 · 2018-09-19 14:51:42 +00:00 · 490f68fb29
commit 490f68fb29
parent 80b0618b4c
8 changed files with 66 additions and 29 deletions
--- a/include/llvm/CodeGen/TargetLowering.h
+++ b/include/llvm/CodeGen/TargetLowering.h
@ -1574,6 +1574,15 @@ public:
    llvm_unreachable("Masked atomicrmw expansion unimplemented on this target");
  }
  /// Perform a masked cmpxchg using a target-specific intrinsic. This
  /// represents the core LL/SC loop which will be lowered at a late stage by
  /// the backend.
  virtual Value *emitMaskedAtomicCmpXchgIntrinsic(
      IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr,
      Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const {
    llvm_unreachable("Masked cmpxchg expansion unimplemented on this target");
  }
  /// Inserts in the IR a target-specific intrinsic specifying a fence.
  /// It is called by AtomicExpandPass before expanding an
  ///   AtomicRMW/AtomicCmpXchg/AtomicStore/AtomicLoad
@ -1650,11 +1659,11 @@ public:
    return AtomicExpansionKind::None;
  }
-  /// Returns true if the given atomic cmpxchg should be expanded by the
+  /// Returns how the given atomic cmpxchg should be expanded by the IR-level
-  /// IR-level AtomicExpand pass into a load-linked/store-conditional sequence
+  /// AtomicExpand pass.
-  /// (through emitLoadLinked() and emitStoreConditional()).
+  virtual AtomicExpansionKind
-  virtual bool shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
+  shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
-    return false;
+    return AtomicExpansionKind::None;
  }
  /// Returns how the IR-level AtomicExpand pass should expand the given
--- a/lib/CodeGen/AtomicExpandPass.cpp
+++ b/lib/CodeGen/AtomicExpandPass.cpp
@ -98,6 +98,7 @@ namespace {
        AtomicOrdering MemOpOrder,
        function_ref<Value *(IRBuilder<> &, Value *)> PerformOp,
        CreateCmpXchgInstFun CreateCmpXchg);
    bool tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI);
    bool expandAtomicCmpXchg(AtomicCmpXchgInst *CI);
    bool isIdempotentRMW(AtomicRMWInst *RMWI);
@ -260,7 +261,9 @@ bool AtomicExpand::runOnFunction(Function &F) {
                          isAcquireOrStronger(RMWI->getOrdering()))) {
        FenceOrdering = RMWI->getOrdering();
        RMWI->setOrdering(AtomicOrdering::Monotonic);
-      } else if (CASI && !TLI->shouldExpandAtomicCmpXchgInIR(CASI) &&
+      } else if (CASI &&
                 TLI->shouldExpandAtomicCmpXchgInIR(CASI) ==
                     TargetLoweringBase::AtomicExpansionKind::None &&
                 (isReleaseOrStronger(CASI->getSuccessOrdering()) ||
                  isAcquireOrStronger(CASI->getSuccessOrdering()))) {
        // If a compare and swap is lowered to LL/SC, we can do smarter fence
@ -334,16 +337,7 @@ bool AtomicExpand::runOnFunction(Function &F) {
        MadeChange = true;
      }
-      unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
+      MadeChange |= tryExpandAtomicCmpXchg(CASI);
      unsigned ValueSize = getAtomicOpSize(CASI);
      if (ValueSize < MinCASSize) {
        assert(!TLI->shouldExpandAtomicCmpXchgInIR(CASI) &&
               "MinCmpXchgSizeInBits not yet supported for LL/SC expansions.");
        expandPartwordCmpXchg(CASI);
      } else {
        if (TLI->shouldExpandAtomicCmpXchgInIR(CASI))
          MadeChange |= expandAtomicCmpXchg(CASI);
      }
    }
  }
  return MadeChange;
@ -1355,6 +1349,28 @@ Value *AtomicExpand::insertRMWCmpXchgLoop(
  return NewLoaded;
 }
 bool AtomicExpand::tryExpandAtomicCmpXchg(AtomicCmpXchgInst *CI) {
  unsigned MinCASSize = TLI->getMinCmpXchgSizeInBits() / 8;
  unsigned ValueSize = getAtomicOpSize(CI);
  switch (TLI->shouldExpandAtomicCmpXchgInIR(CI)) {
  default:
    llvm_unreachable("Unhandled case in tryExpandAtomicCmpXchg");
  case TargetLoweringBase::AtomicExpansionKind::None:
    if (ValueSize < MinCASSize)
      expandPartwordCmpXchg(CI);
    return false;
  case TargetLoweringBase::AtomicExpansionKind::LLSC: {
    assert(ValueSize >= MinCASSize &&
           "MinCmpXchgSizeInBits not yet supported for LL/SC expansions.");
    return expandAtomicCmpXchg(CI);
  }
  case TargetLoweringBase::AtomicExpansionKind::MaskedIntrinsic:
    llvm_unreachable(
        "MaskedIntrinsic expansion of cmpxhg not yet implemented");
  }
 }
 // Note: This function is exposed externally by AtomicExpandUtils.h
 bool llvm::expandAtomicRMWToCmpXchg(AtomicRMWInst *AI,
                                    CreateCmpXchgInstFun CreateCmpXchg) {
--- a/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/lib/Target/AArch64/AArch64ISelLowering.cpp
@ -11377,16 +11377,20 @@ AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
  return (Subtarget->hasLSE() && Size < 128) ? AtomicExpansionKind::None : AtomicExpansionKind::LLSC;
 }
-bool AArch64TargetLowering::shouldExpandAtomicCmpXchgInIR(
+TargetLowering::AtomicExpansionKind
 AArch64TargetLowering::shouldExpandAtomicCmpXchgInIR(
    AtomicCmpXchgInst *AI) const {
  // If subtarget has LSE, leave cmpxchg intact for codegen.
-  if (Subtarget->hasLSE()) return false;
+  if (Subtarget->hasLSE())
    return AtomicExpansionKind::None;
  // At -O0, fast-regalloc cannot cope with the live vregs necessary to
  // implement cmpxchg without spilling. If the address being exchanged is also
  // on the stack and close enough to the spill slot, this can lead to a
  // situation where the monitor always gets cleared and the atomic operation
  // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
-  return getTargetMachine().getOptLevel() != 0;
+  if (getTargetMachine().getOptLevel() == 0)
    return AtomicExpansionKind::None;
  return AtomicExpansionKind::LLSC;
 }
 Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
--- a/lib/Target/AArch64/AArch64ISelLowering.h
+++ b/lib/Target/AArch64/AArch64ISelLowering.h
@ -390,7 +390,8 @@ public:
  TargetLoweringBase::AtomicExpansionKind
  shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
-  bool shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
+  TargetLoweringBase::AtomicExpansionKind
  shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
  bool useLoadStackGuardNode() const override;
  TargetLoweringBase::LegalizeTypeAction
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@ -14561,16 +14561,18 @@ ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
             : AtomicExpansionKind::None;
 }
-bool ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(
+TargetLowering::AtomicExpansionKind
-    AtomicCmpXchgInst *AI) const {
+ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const {
  // At -O0, fast-regalloc cannot cope with the live vregs necessary to
  // implement cmpxchg without spilling. If the address being exchanged is also
  // on the stack and close enough to the spill slot, this can lead to a
  // situation where the monitor always gets cleared and the atomic operation
  // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
-  bool hasAtomicCmpXchg =
+  bool HasAtomicCmpXchg =
      !Subtarget->isThumb() || Subtarget->hasV8MBaselineOps();
-  return getTargetMachine().getOptLevel() != 0 && hasAtomicCmpXchg;
+  if (getTargetMachine().getOptLevel() != 0 && HasAtomicCmpXchg)
    return AtomicExpansionKind::LLSC;
  return AtomicExpansionKind::None;
 }
 bool ARMTargetLowering::shouldInsertFencesForAtomic(
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@ -538,7 +538,8 @@ class VectorType;
    bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
    TargetLoweringBase::AtomicExpansionKind
    shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override;
-    bool shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
+    TargetLoweringBase::AtomicExpansionKind
    shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
    bool useLoadStackGuardNode() const override;
--- a/lib/Target/Hexagon/HexagonISelLowering.cpp
+++ b/lib/Target/Hexagon/HexagonISelLowering.cpp
@ -3214,9 +3214,12 @@ bool HexagonTargetLowering::shouldExpandAtomicStoreInIR(StoreInst *SI) const {
  return SI->getValueOperand()->getType()->getPrimitiveSizeInBits() > 64;
 }
-bool HexagonTargetLowering::shouldExpandAtomicCmpXchgInIR(
+TargetLowering::AtomicExpansionKind
-      AtomicCmpXchgInst *AI) const {
+HexagonTargetLowering::shouldExpandAtomicCmpXchgInIR(
    AtomicCmpXchgInst *AI) const {
  const DataLayout &DL = AI->getModule()->getDataLayout();
  unsigned Size = DL.getTypeStoreSize(AI->getCompareOperand()->getType());
-  return Size >= 4 && Size <= 8;
+  if (Size >= 4 && Size <= 8)
    return AtomicExpansionKind::LLSC;
  return AtomicExpansionKind::None;
 }
--- a/lib/Target/Hexagon/HexagonISelLowering.h
+++ b/lib/Target/Hexagon/HexagonISelLowering.h
@ -311,7 +311,8 @@ namespace HexagonISD {
        Value *Addr, AtomicOrdering Ord) const override;
    AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const override;
    bool shouldExpandAtomicStoreInIR(StoreInst *SI) const override;
-    bool shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
+    AtomicExpansionKind
    shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const override;
    AtomicExpansionKind
    shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override {