mirror of
https://github.com/RPCSX/llvm.git
synced 2025-02-25 07:31:32 +00:00
AArch64: expand cmpxchg after regalloc at -O0.
FastRegAlloc works only at the basic-block level and spills all live-out registers. Unfortunately for a stack-based cmpxchg near the spill slots, this can perpetually clear the exclusive monitor, which means the cmpxchg will never succeed. I believe the only way to handle this within LLVM is by expanding the loop post-regalloc. We don't want this in general because it severely limits the optimisations that can be done, so we limit this to -O0 compilations. It's an ugly hack, and about the one good point in the whole mess is that we can treat all cmpxchg operations in the most naive way possible (seq_cst, no clrex faff) without affecting correctness. Should fix PR25526. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@266339 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
a3b03dccc4
commit
a54275fdf6
@ -17,6 +17,7 @@
|
||||
#include "MCTargetDesc/AArch64AddressingModes.h"
|
||||
#include "AArch64InstrInfo.h"
|
||||
#include "AArch64Subtarget.h"
|
||||
#include "llvm/CodeGen/LivePhysRegs.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/Support/MathExtras.h"
|
||||
@ -46,9 +47,18 @@ public:
|
||||
|
||||
private:
|
||||
bool expandMBB(MachineBasicBlock &MBB);
|
||||
bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
|
||||
bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
|
||||
MachineBasicBlock::iterator &NextMBBI);
|
||||
bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
|
||||
unsigned BitSize);
|
||||
|
||||
bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
|
||||
unsigned LdarOp, unsigned StlrOp, unsigned CmpOp,
|
||||
unsigned ExtendImm, unsigned ZeroReg,
|
||||
MachineBasicBlock::iterator &NextMBBI);
|
||||
bool expandCMP_SWAP_128(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
MachineBasicBlock::iterator &NextMBBI);
|
||||
};
|
||||
char AArch64ExpandPseudo::ID = 0;
|
||||
}
|
||||
@ -579,10 +589,176 @@ bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
|
||||
return true;
|
||||
}
|
||||
|
||||
void addPostLoopLiveIns(MachineBasicBlock *MBB, LivePhysRegs &LiveRegs) {
|
||||
for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I)
|
||||
MBB->addLiveIn(*I);
|
||||
}
|
||||
|
||||
bool AArch64ExpandPseudo::expandCMP_SWAP(
|
||||
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp,
|
||||
unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg,
|
||||
MachineBasicBlock::iterator &NextMBBI) {
|
||||
MachineInstr &MI = *MBBI;
|
||||
DebugLoc DL = MI.getDebugLoc();
|
||||
MachineOperand &Dest = MI.getOperand(0);
|
||||
unsigned StatusReg = MI.getOperand(1).getReg();
|
||||
MachineOperand &Addr = MI.getOperand(2);
|
||||
MachineOperand &Desired = MI.getOperand(3);
|
||||
MachineOperand &New = MI.getOperand(4);
|
||||
|
||||
LivePhysRegs LiveRegs(&TII->getRegisterInfo());
|
||||
LiveRegs.addLiveOuts(&MBB);
|
||||
for (auto I = std::prev(MBB.end()); I != MBBI; --I)
|
||||
LiveRegs.stepBackward(*I);
|
||||
|
||||
MachineFunction *MF = MBB.getParent();
|
||||
auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
|
||||
auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
|
||||
auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
|
||||
|
||||
MF->insert(++MBB.getIterator(), LoadCmpBB);
|
||||
MF->insert(++LoadCmpBB->getIterator(), StoreBB);
|
||||
MF->insert(++StoreBB->getIterator(), DoneBB);
|
||||
|
||||
// .Lloadcmp:
|
||||
// ldaxr xDest, [xAddr]
|
||||
// cmp xDest, xDesired
|
||||
// b.ne .Ldone
|
||||
MBB.addSuccessor(LoadCmpBB);
|
||||
LoadCmpBB->addLiveIn(Addr.getReg());
|
||||
LoadCmpBB->addLiveIn(Dest.getReg());
|
||||
LoadCmpBB->addLiveIn(Desired.getReg());
|
||||
addPostLoopLiveIns(LoadCmpBB, LiveRegs);
|
||||
|
||||
BuildMI(LoadCmpBB, DL, TII->get(LdarOp), Dest.getReg())
|
||||
.addReg(Addr.getReg());
|
||||
BuildMI(LoadCmpBB, DL, TII->get(CmpOp), ZeroReg)
|
||||
.addReg(Dest.getReg(), getKillRegState(Dest.isDead()))
|
||||
.addOperand(Desired)
|
||||
.addImm(ExtendImm);
|
||||
BuildMI(LoadCmpBB, DL, TII->get(AArch64::Bcc))
|
||||
.addImm(AArch64CC::NE)
|
||||
.addMBB(DoneBB)
|
||||
.addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill);
|
||||
LoadCmpBB->addSuccessor(DoneBB);
|
||||
LoadCmpBB->addSuccessor(StoreBB);
|
||||
|
||||
// .Lstore:
|
||||
// stlxr wStatus, xNew, [xAddr]
|
||||
// cbnz wStatus, .Lloadcmp
|
||||
StoreBB->addLiveIn(Addr.getReg());
|
||||
StoreBB->addLiveIn(New.getReg());
|
||||
addPostLoopLiveIns(StoreBB, LiveRegs);
|
||||
|
||||
BuildMI(StoreBB, DL, TII->get(StlrOp), StatusReg)
|
||||
.addOperand(New)
|
||||
.addOperand(Addr);
|
||||
BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW))
|
||||
.addReg(StatusReg, RegState::Kill)
|
||||
.addMBB(LoadCmpBB);
|
||||
StoreBB->addSuccessor(LoadCmpBB);
|
||||
StoreBB->addSuccessor(DoneBB);
|
||||
|
||||
DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
|
||||
DoneBB->transferSuccessors(&MBB);
|
||||
addPostLoopLiveIns(DoneBB, LiveRegs);
|
||||
|
||||
NextMBBI = MBB.end();
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool AArch64ExpandPseudo::expandCMP_SWAP_128(
|
||||
MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
|
||||
MachineBasicBlock::iterator &NextMBBI) {
|
||||
|
||||
MachineInstr &MI = *MBBI;
|
||||
DebugLoc DL = MI.getDebugLoc();
|
||||
MachineOperand &DestLo = MI.getOperand(0);
|
||||
MachineOperand &DestHi = MI.getOperand(1);
|
||||
unsigned StatusReg = MI.getOperand(2).getReg();
|
||||
MachineOperand &Addr = MI.getOperand(3);
|
||||
MachineOperand &DesiredLo = MI.getOperand(4);
|
||||
MachineOperand &DesiredHi = MI.getOperand(5);
|
||||
MachineOperand &NewLo = MI.getOperand(6);
|
||||
MachineOperand &NewHi = MI.getOperand(7);
|
||||
|
||||
LivePhysRegs LiveRegs(&TII->getRegisterInfo());
|
||||
LiveRegs.addLiveOuts(&MBB);
|
||||
for (auto I = std::prev(MBB.end()); I != MBBI; --I)
|
||||
LiveRegs.stepBackward(*I);
|
||||
|
||||
MachineFunction *MF = MBB.getParent();
|
||||
auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
|
||||
auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
|
||||
auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
|
||||
|
||||
MF->insert(++MBB.getIterator(), LoadCmpBB);
|
||||
MF->insert(++LoadCmpBB->getIterator(), StoreBB);
|
||||
MF->insert(++StoreBB->getIterator(), DoneBB);
|
||||
|
||||
// .Lloadcmp:
|
||||
// ldaxp xDestLo, xDestHi, [xAddr]
|
||||
// cmp xDestLo, xDesiredLo
|
||||
// sbcs xDestHi, xDesiredHi
|
||||
// b.ne .Ldone
|
||||
MBB.addSuccessor(LoadCmpBB);
|
||||
LoadCmpBB->addLiveIn(Addr.getReg());
|
||||
LoadCmpBB->addLiveIn(DestLo.getReg());
|
||||
LoadCmpBB->addLiveIn(DestHi.getReg());
|
||||
LoadCmpBB->addLiveIn(DesiredLo.getReg());
|
||||
LoadCmpBB->addLiveIn(DesiredHi.getReg());
|
||||
addPostLoopLiveIns(LoadCmpBB, LiveRegs);
|
||||
|
||||
BuildMI(LoadCmpBB, DL, TII->get(AArch64::LDAXPX))
|
||||
.addReg(DestLo.getReg(), RegState::Define)
|
||||
.addReg(DestHi.getReg(), RegState::Define)
|
||||
.addReg(Addr.getReg());
|
||||
BuildMI(LoadCmpBB, DL, TII->get(AArch64::SUBSXrs), AArch64::XZR)
|
||||
.addReg(DestLo.getReg(), getKillRegState(DestLo.isDead()))
|
||||
.addOperand(DesiredLo)
|
||||
.addImm(0);
|
||||
BuildMI(LoadCmpBB, DL, TII->get(AArch64::SBCSXr), AArch64::XZR)
|
||||
.addReg(DestHi.getReg(), getKillRegState(DestHi.isDead()))
|
||||
.addOperand(DesiredHi);
|
||||
BuildMI(LoadCmpBB, DL, TII->get(AArch64::Bcc))
|
||||
.addImm(AArch64CC::NE)
|
||||
.addMBB(DoneBB)
|
||||
.addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill);
|
||||
LoadCmpBB->addSuccessor(DoneBB);
|
||||
LoadCmpBB->addSuccessor(StoreBB);
|
||||
|
||||
// .Lstore:
|
||||
// stlxp wStatus, xNewLo, xNewHi, [xAddr]
|
||||
// cbnz wStatus, .Lloadcmp
|
||||
StoreBB->addLiveIn(Addr.getReg());
|
||||
StoreBB->addLiveIn(NewLo.getReg());
|
||||
StoreBB->addLiveIn(NewHi.getReg());
|
||||
addPostLoopLiveIns(StoreBB, LiveRegs);
|
||||
BuildMI(StoreBB, DL, TII->get(AArch64::STLXPX), StatusReg)
|
||||
.addOperand(NewLo)
|
||||
.addOperand(NewHi)
|
||||
.addOperand(Addr);
|
||||
BuildMI(StoreBB, DL, TII->get(AArch64::CBNZW))
|
||||
.addReg(StatusReg, RegState::Kill)
|
||||
.addMBB(LoadCmpBB);
|
||||
StoreBB->addSuccessor(LoadCmpBB);
|
||||
StoreBB->addSuccessor(DoneBB);
|
||||
|
||||
DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
|
||||
DoneBB->transferSuccessors(&MBB);
|
||||
addPostLoopLiveIns(DoneBB, LiveRegs);
|
||||
|
||||
NextMBBI = MBB.end();
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
/// \brief If MBBI references a pseudo instruction that should be expanded here,
|
||||
/// do the expansion and return true. Otherwise return false.
|
||||
bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI) {
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
MachineBasicBlock::iterator &NextMBBI) {
|
||||
MachineInstr &MI = *MBBI;
|
||||
unsigned Opcode = MI.getOpcode();
|
||||
switch (Opcode) {
|
||||
@ -724,6 +900,28 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
case AArch64::CMP_SWAP_8:
|
||||
return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRB, AArch64::STLXRB,
|
||||
AArch64::SUBSWrx,
|
||||
AArch64_AM::getArithExtendImm(AArch64_AM::UXTB, 0),
|
||||
AArch64::WZR, NextMBBI);
|
||||
case AArch64::CMP_SWAP_16:
|
||||
return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRH, AArch64::STLXRH,
|
||||
AArch64::SUBSWrx,
|
||||
AArch64_AM::getArithExtendImm(AArch64_AM::UXTH, 0),
|
||||
AArch64::WZR, NextMBBI);
|
||||
case AArch64::CMP_SWAP_32:
|
||||
return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRW, AArch64::STLXRW,
|
||||
AArch64::SUBSWrs,
|
||||
AArch64_AM::getShifterImm(AArch64_AM::LSL, 0),
|
||||
AArch64::WZR, NextMBBI);
|
||||
case AArch64::CMP_SWAP_64:
|
||||
return expandCMP_SWAP(MBB, MBBI,
|
||||
AArch64::LDAXRX, AArch64::STLXRX, AArch64::SUBSXrs,
|
||||
AArch64_AM::getShifterImm(AArch64_AM::LSL, 0),
|
||||
AArch64::XZR, NextMBBI);
|
||||
case AArch64::CMP_SWAP_128:
|
||||
return expandCMP_SWAP_128(MBB, MBBI, NextMBBI);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
@ -736,7 +934,7 @@ bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
|
||||
MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
|
||||
while (MBBI != E) {
|
||||
MachineBasicBlock::iterator NMBBI = std::next(MBBI);
|
||||
Modified |= expandMI(MBB, MBBI);
|
||||
Modified |= expandMI(MBB, MBBI, NMBBI);
|
||||
MBBI = NMBBI;
|
||||
}
|
||||
|
||||
|
@ -198,6 +198,9 @@ private:
|
||||
}
|
||||
|
||||
bool SelectCVTFixedPosOperand(SDValue N, SDValue &FixedPos, unsigned Width);
|
||||
|
||||
void SelectCMP_SWAP(SDNode *N);
|
||||
|
||||
};
|
||||
} // end anonymous namespace
|
||||
|
||||
@ -2296,6 +2299,36 @@ SDNode *AArch64DAGToDAGISel::SelectWriteRegister(SDNode *N) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/// We've got special pseudo-instructions for these
|
||||
void AArch64DAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
|
||||
unsigned Opcode;
|
||||
EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
|
||||
if (MemTy == MVT::i8)
|
||||
Opcode = AArch64::CMP_SWAP_8;
|
||||
else if (MemTy == MVT::i16)
|
||||
Opcode = AArch64::CMP_SWAP_16;
|
||||
else if (MemTy == MVT::i32)
|
||||
Opcode = AArch64::CMP_SWAP_32;
|
||||
else if (MemTy == MVT::i64)
|
||||
Opcode = AArch64::CMP_SWAP_64;
|
||||
else
|
||||
llvm_unreachable("Unknown AtomicCmpSwap type");
|
||||
|
||||
MVT RegTy = MemTy == MVT::i64 ? MVT::i64 : MVT::i32;
|
||||
SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
|
||||
N->getOperand(0)};
|
||||
SDNode *CmpSwap = CurDAG->getMachineNode(
|
||||
Opcode, SDLoc(N),
|
||||
CurDAG->getVTList(RegTy, MVT::i32, MVT::Other), Ops);
|
||||
|
||||
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
|
||||
MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
|
||||
cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
|
||||
|
||||
ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
|
||||
ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
|
||||
}
|
||||
|
||||
SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
|
||||
// Dump information about the Node being selected
|
||||
DEBUG(errs() << "Selecting: ");
|
||||
@ -2317,6 +2350,10 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
|
||||
default:
|
||||
break;
|
||||
|
||||
case ISD::ATOMIC_CMP_SWAP:
|
||||
SelectCMP_SWAP(Node);
|
||||
return nullptr;
|
||||
|
||||
case ISD::READ_REGISTER:
|
||||
if (SDNode *Res = SelectReadRegister(Node))
|
||||
return Res;
|
||||
|
@ -412,6 +412,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
|
||||
|
||||
setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
|
||||
|
||||
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i128, Custom);
|
||||
|
||||
// Lower READCYCLECOUNTER using an mrs from PMCCNTR_EL0.
|
||||
// This requires the Performance Monitors extension.
|
||||
if (Subtarget->hasPerfMon())
|
||||
@ -10050,6 +10052,31 @@ static void ReplaceReductionResults(SDNode *N,
|
||||
Results.push_back(SplitVal);
|
||||
}
|
||||
|
||||
static void ReplaceCMP_SWAP_128Results(SDNode *N,
|
||||
SmallVectorImpl<SDValue> & Results,
|
||||
SelectionDAG &DAG) {
|
||||
assert(N->getValueType(0) == MVT::i128 &&
|
||||
"AtomicCmpSwap on types less than 128 should be legal");
|
||||
SDValue Ops[] = {N->getOperand(1),
|
||||
N->getOperand(2)->getOperand(0),
|
||||
N->getOperand(2)->getOperand(1),
|
||||
N->getOperand(3)->getOperand(0),
|
||||
N->getOperand(3)->getOperand(1),
|
||||
N->getOperand(0)};
|
||||
SDNode *CmpSwap = DAG.getMachineNode(
|
||||
AArch64::CMP_SWAP_128, SDLoc(N),
|
||||
DAG.getVTList(MVT::i64, MVT::i64, MVT::i32, MVT::Other), Ops);
|
||||
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
MachineSDNode::mmo_iterator MemOp = MF.allocateMemRefsArray(1);
|
||||
MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
|
||||
cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
|
||||
|
||||
Results.push_back(SDValue(CmpSwap, 0));
|
||||
Results.push_back(SDValue(CmpSwap, 1));
|
||||
Results.push_back(SDValue(CmpSwap, 3));
|
||||
}
|
||||
|
||||
void AArch64TargetLowering::ReplaceNodeResults(
|
||||
SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
|
||||
switch (N->getOpcode()) {
|
||||
@ -10081,6 +10108,9 @@ void AArch64TargetLowering::ReplaceNodeResults(
|
||||
assert(N->getValueType(0) == MVT::i128 && "unexpected illegal conversion");
|
||||
// Let normal code take care of it by not adding anything to Results.
|
||||
return;
|
||||
case ISD::ATOMIC_CMP_SWAP:
|
||||
ReplaceCMP_SWAP_128Results(N, Results, DAG);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
@ -10132,7 +10162,12 @@ AArch64TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
|
||||
|
||||
bool AArch64TargetLowering::shouldExpandAtomicCmpXchgInIR(
|
||||
AtomicCmpXchgInst *AI) const {
|
||||
return true;
|
||||
// At -O0, fast-regalloc cannot cope with the live vregs necessary to
|
||||
// implement cmpxchg without spilling. If the address being exchanged is also
|
||||
// on the stack and close enough to the spill slot, this can lead to a
|
||||
// situation where the monitor always gets cleared and the atomic operation
|
||||
// can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
|
||||
return getTargetMachine().getOptLevel() != 0;
|
||||
}
|
||||
|
||||
Value *AArch64TargetLowering::emitLoadLinked(IRBuilder<> &Builder, Value *Addr,
|
||||
|
@ -362,3 +362,43 @@ def : Pat<(stlxr_4 (and GPR64:$val, 0xffffffff), GPR64sp:$addr),
|
||||
// And clear exclusive.
|
||||
|
||||
def : Pat<(int_aarch64_clrex), (CLREX 0xf)>;
|
||||
|
||||
//===----------------------------------
|
||||
// Atomic cmpxchg for -O0
|
||||
//===----------------------------------
|
||||
|
||||
// The fast register allocator used during -O0 inserts spills to cover any VRegs
|
||||
// live across basic block boundaries. When this happens between an LDXR and an
|
||||
// STXR it can clear the exclusive monitor, causing all cmpxchg attempts to
|
||||
// fail.
|
||||
|
||||
// Unfortunately, this means we have to have an alternative (expanded
|
||||
// post-regalloc) path for -O0 compilations. Fortunately this path can be
|
||||
// significantly more naive than the standard expansion: we conservatively
|
||||
// assume seq_cst, strong cmpxchg and omit clrex on failure.
|
||||
|
||||
let Constraints = "@earlyclobber $Rd,@earlyclobber $status",
|
||||
mayLoad = 1, mayStore = 1 in {
|
||||
def CMP_SWAP_8 : Pseudo<(outs GPR32:$Rd, GPR32:$status),
|
||||
(ins GPR64:$addr, GPR32:$desired, GPR32:$new), []>,
|
||||
Sched<[WriteAtomic]>;
|
||||
|
||||
def CMP_SWAP_16 : Pseudo<(outs GPR32:$Rd, GPR32:$status),
|
||||
(ins GPR64:$addr, GPR32:$desired, GPR32:$new), []>,
|
||||
Sched<[WriteAtomic]>;
|
||||
|
||||
def CMP_SWAP_32 : Pseudo<(outs GPR32:$Rd, GPR32:$status),
|
||||
(ins GPR64:$addr, GPR32:$desired, GPR32:$new), []>,
|
||||
Sched<[WriteAtomic]>;
|
||||
|
||||
def CMP_SWAP_64 : Pseudo<(outs GPR64:$Rd, GPR32:$status),
|
||||
(ins GPR64:$addr, GPR64:$desired, GPR64:$new), []>,
|
||||
Sched<[WriteAtomic]>;
|
||||
}
|
||||
|
||||
let Constraints = "@earlyclobber $RdLo,@earlyclobber $RdHi,@earlyclobber $status",
|
||||
mayLoad = 1, mayStore = 1 in
|
||||
def CMP_SWAP_128 : Pseudo<(outs GPR64:$RdLo, GPR64:$RdHi, GPR32:$status),
|
||||
(ins GPR64:$addr, GPR64:$desiredLo, GPR64:$desiredHi,
|
||||
GPR64:$newLo, GPR64:$newHi), []>,
|
||||
Sched<[WriteAtomic]>;
|
||||
|
75
test/CodeGen/AArch64/cmpxchg-O0.ll
Normal file
75
test/CodeGen/AArch64/cmpxchg-O0.ll
Normal file
@ -0,0 +1,75 @@
|
||||
; RUN: llc -mtriple=aarch64-linux-gnu -O0 %s -o - | FileCheck %s
|
||||
|
||||
define { i8, i1 } @test_cmpxchg_8(i8* %addr, i8 %desired, i8 %new) nounwind {
|
||||
; CHECK-LABEL: test_cmpxchg_8:
|
||||
; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
|
||||
; CHECK: ldaxrb [[OLD:w[0-9]+]], [x0]
|
||||
; CHECK: cmp [[OLD]], w1, uxtb
|
||||
; CHECK: b.ne [[DONE:.LBB[0-9]+_[0-9]+]]
|
||||
; CHECK: stlxrb [[STATUS:w[3-9]]], w2, [x0]
|
||||
; CHECK: cbnz [[STATUS]], [[RETRY]]
|
||||
; CHECK: [[DONE]]:
|
||||
; CHECK: subs {{w[0-9]+}}, [[OLD]], w1
|
||||
; CHECK: cset {{w[0-9]+}}, eq
|
||||
%res = cmpxchg i8* %addr, i8 %desired, i8 %new seq_cst monotonic
|
||||
ret { i8, i1 } %res
|
||||
}
|
||||
|
||||
define { i16, i1 } @test_cmpxchg_16(i16* %addr, i16 %desired, i16 %new) nounwind {
|
||||
; CHECK-LABEL: test_cmpxchg_16:
|
||||
; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
|
||||
; CHECK: ldaxrh [[OLD:w[0-9]+]], [x0]
|
||||
; CHECK: cmp [[OLD]], w1, uxth
|
||||
; CHECK: b.ne [[DONE:.LBB[0-9]+_[0-9]+]]
|
||||
; CHECK: stlxrh [[STATUS:w[3-9]]], w2, [x0]
|
||||
; CHECK: cbnz [[STATUS]], [[RETRY]]
|
||||
; CHECK: [[DONE]]:
|
||||
; CHECK: subs {{w[0-9]+}}, [[OLD]], w1
|
||||
; CHECK: cset {{w[0-9]+}}, eq
|
||||
%res = cmpxchg i16* %addr, i16 %desired, i16 %new seq_cst monotonic
|
||||
ret { i16, i1 } %res
|
||||
}
|
||||
|
||||
define { i32, i1 } @test_cmpxchg_32(i32* %addr, i32 %desired, i32 %new) nounwind {
|
||||
; CHECK-LABEL: test_cmpxchg_32:
|
||||
; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
|
||||
; CHECK: ldaxr [[OLD:w[0-9]+]], [x0]
|
||||
; CHECK: cmp [[OLD]], w1
|
||||
; CHECK: b.ne [[DONE:.LBB[0-9]+_[0-9]+]]
|
||||
; CHECK: stlxr [[STATUS:w[3-9]]], w2, [x0]
|
||||
; CHECK: cbnz [[STATUS]], [[RETRY]]
|
||||
; CHECK: [[DONE]]:
|
||||
; CHECK: subs {{w[0-9]+}}, [[OLD]], w1
|
||||
; CHECK: cset {{w[0-9]+}}, eq
|
||||
%res = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst monotonic
|
||||
ret { i32, i1 } %res
|
||||
}
|
||||
|
||||
define { i64, i1 } @test_cmpxchg_64(i64* %addr, i64 %desired, i64 %new) nounwind {
|
||||
; CHECK-LABEL: test_cmpxchg_64:
|
||||
; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
|
||||
; CHECK: ldaxr [[OLD:x[0-9]+]], [x0]
|
||||
; CHECK: cmp [[OLD]], x1
|
||||
; CHECK: b.ne [[DONE:.LBB[0-9]+_[0-9]+]]
|
||||
; CHECK: stlxr [[STATUS:w[3-9]]], x2, [x0]
|
||||
; CHECK: cbnz [[STATUS]], [[RETRY]]
|
||||
; CHECK: [[DONE]]:
|
||||
; CHECK: subs {{x[0-9]+}}, [[OLD]], x1
|
||||
; CHECK: cset {{w[0-9]+}}, eq
|
||||
%res = cmpxchg i64* %addr, i64 %desired, i64 %new seq_cst monotonic
|
||||
ret { i64, i1 } %res
|
||||
}
|
||||
|
||||
define { i128, i1 } @test_cmpxchg_128(i128* %addr, i128 %desired, i128 %new) nounwind {
|
||||
; CHECK-LABEL: test_cmpxchg_128:
|
||||
; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
|
||||
; CHECK: ldaxp [[OLD_LO:x[0-9]+]], [[OLD_HI:x[0-9]+]], [x0]
|
||||
; CHECK: cmp [[OLD_LO]], x2
|
||||
; CHECK: sbcs xzr, [[OLD_HI]], x3
|
||||
; CHECK: b.ne [[DONE:.LBB[0-9]+_[0-9]+]]
|
||||
; CHECK: stlxp [[STATUS:w[0-9]+]], x4, x5, [x0]
|
||||
; CHECK: cbnz [[STATUS]], [[RETRY]]
|
||||
; CHECK: [[DONE]]:
|
||||
%res = cmpxchg i128* %addr, i128 %desired, i128 %new seq_cst monotonic
|
||||
ret { i128, i1 } %res
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user