AArch64: remove barriers from AArch64 atomic operations.

I've managed to convince myself that AArch64's acquire/release
instructions are sufficient to guarantee C++11's required semantics,
even in the sequentially-consistent case.

llvm-svn: 179005
This commit is contained in:
Tim Northover 2013-04-08 08:40:41 +00:00
parent 822ccb1509
commit 8eb5637d73
5 changed files with 395 additions and 320 deletions

@ -88,6 +88,8 @@ public:
bool SelectTSTBOperand(SDValue N, SDValue &FixedPos, unsigned RegWidth); bool SelectTSTBOperand(SDValue N, SDValue &FixedPos, unsigned RegWidth);
SDNode *SelectAtomic(SDNode *N, unsigned Op8, unsigned Op16, unsigned Op32, unsigned Op64);
SDNode *TrySelectToMoveImm(SDNode *N); SDNode *TrySelectToMoveImm(SDNode *N);
SDNode *LowerToFPLitPool(SDNode *Node); SDNode *LowerToFPLitPool(SDNode *Node);
SDNode *SelectToLitPool(SDNode *N); SDNode *SelectToLitPool(SDNode *N);
@ -318,6 +320,38 @@ AArch64DAGToDAGISel::SelectTSTBOperand(SDValue N, SDValue &FixedPos,
return true; return true;
} }
SDNode *AArch64DAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8,
unsigned Op16,unsigned Op32,
unsigned Op64) {
// Mostly direct translation to the given operations, except that we preserve
// the AtomicOrdering for use later on.
AtomicSDNode *AN = cast<AtomicSDNode>(Node);
EVT VT = AN->getMemoryVT();
unsigned Op;
if (VT == MVT::i8)
Op = Op8;
else if (VT == MVT::i16)
Op = Op16;
else if (VT == MVT::i32)
Op = Op32;
else if (VT == MVT::i64)
Op = Op64;
else
llvm_unreachable("Unexpected atomic operation");
SmallVector<SDValue, 4> Ops;
for (unsigned i = 1; i < AN->getNumOperands(); ++i)
Ops.push_back(AN->getOperand(i));
Ops.push_back(CurDAG->getTargetConstant(AN->getOrdering(), MVT::i32));
Ops.push_back(AN->getOperand(0)); // Chain moves to the end
return CurDAG->SelectNodeTo(Node, Op,
AN->getValueType(0), MVT::Other,
&Ops[0], Ops.size());
}
SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
// Dump information about the Node being selected // Dump information about the Node being selected
DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << "\n"); DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << "\n");
@ -328,6 +362,78 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) {
} }
switch (Node->getOpcode()) { switch (Node->getOpcode()) {
case ISD::ATOMIC_LOAD_ADD:
return SelectAtomic(Node,
AArch64::ATOMIC_LOAD_ADD_I8,
AArch64::ATOMIC_LOAD_ADD_I16,
AArch64::ATOMIC_LOAD_ADD_I32,
AArch64::ATOMIC_LOAD_ADD_I64);
case ISD::ATOMIC_LOAD_SUB:
return SelectAtomic(Node,
AArch64::ATOMIC_LOAD_SUB_I8,
AArch64::ATOMIC_LOAD_SUB_I16,
AArch64::ATOMIC_LOAD_SUB_I32,
AArch64::ATOMIC_LOAD_SUB_I64);
case ISD::ATOMIC_LOAD_AND:
return SelectAtomic(Node,
AArch64::ATOMIC_LOAD_AND_I8,
AArch64::ATOMIC_LOAD_AND_I16,
AArch64::ATOMIC_LOAD_AND_I32,
AArch64::ATOMIC_LOAD_AND_I64);
case ISD::ATOMIC_LOAD_OR:
return SelectAtomic(Node,
AArch64::ATOMIC_LOAD_OR_I8,
AArch64::ATOMIC_LOAD_OR_I16,
AArch64::ATOMIC_LOAD_OR_I32,
AArch64::ATOMIC_LOAD_OR_I64);
case ISD::ATOMIC_LOAD_XOR:
return SelectAtomic(Node,
AArch64::ATOMIC_LOAD_XOR_I8,
AArch64::ATOMIC_LOAD_XOR_I16,
AArch64::ATOMIC_LOAD_XOR_I32,
AArch64::ATOMIC_LOAD_XOR_I64);
case ISD::ATOMIC_LOAD_NAND:
return SelectAtomic(Node,
AArch64::ATOMIC_LOAD_NAND_I8,
AArch64::ATOMIC_LOAD_NAND_I16,
AArch64::ATOMIC_LOAD_NAND_I32,
AArch64::ATOMIC_LOAD_NAND_I64);
case ISD::ATOMIC_LOAD_MIN:
return SelectAtomic(Node,
AArch64::ATOMIC_LOAD_MIN_I8,
AArch64::ATOMIC_LOAD_MIN_I16,
AArch64::ATOMIC_LOAD_MIN_I32,
AArch64::ATOMIC_LOAD_MIN_I64);
case ISD::ATOMIC_LOAD_MAX:
return SelectAtomic(Node,
AArch64::ATOMIC_LOAD_MAX_I8,
AArch64::ATOMIC_LOAD_MAX_I16,
AArch64::ATOMIC_LOAD_MAX_I32,
AArch64::ATOMIC_LOAD_MAX_I64);
case ISD::ATOMIC_LOAD_UMIN:
return SelectAtomic(Node,
AArch64::ATOMIC_LOAD_UMIN_I8,
AArch64::ATOMIC_LOAD_UMIN_I16,
AArch64::ATOMIC_LOAD_UMIN_I32,
AArch64::ATOMIC_LOAD_UMIN_I64);
case ISD::ATOMIC_LOAD_UMAX:
return SelectAtomic(Node,
AArch64::ATOMIC_LOAD_UMAX_I8,
AArch64::ATOMIC_LOAD_UMAX_I16,
AArch64::ATOMIC_LOAD_UMAX_I32,
AArch64::ATOMIC_LOAD_UMAX_I64);
case ISD::ATOMIC_SWAP:
return SelectAtomic(Node,
AArch64::ATOMIC_SWAP_I8,
AArch64::ATOMIC_SWAP_I16,
AArch64::ATOMIC_SWAP_I32,
AArch64::ATOMIC_SWAP_I64);
case ISD::ATOMIC_CMP_SWAP:
return SelectAtomic(Node,
AArch64::ATOMIC_CMP_SWAP_I8,
AArch64::ATOMIC_CMP_SWAP_I16,
AArch64::ATOMIC_CMP_SWAP_I32,
AArch64::ATOMIC_CMP_SWAP_I64);
case ISD::FrameIndex: { case ISD::FrameIndex: {
int FI = cast<FrameIndexSDNode>(Node)->getIndex(); int FI = cast<FrameIndexSDNode>(Node)->getIndex();
EVT PtrTy = TLI.getPointerTy(); EVT PtrTy = TLI.getPointerTy();

@ -59,12 +59,9 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM)
computeRegisterProperties(); computeRegisterProperties();
// Some atomic operations can be folded into load-acquire or store-release // We have particularly efficient implementations of atomic fences if they can
// instructions on AArch64. It's marginally simpler to let LLVM expand // be combined with nearby atomic loads and stores.
// everything out to a barrier and then recombine the (few) barriers we can. setShouldFoldAtomicFences(true);
setInsertFencesForAtomic(true);
setTargetDAGCombine(ISD::ATOMIC_FENCE);
setTargetDAGCombine(ISD::ATOMIC_STORE);
// We combine OR nodes for bitfield and NEON BSL operations. // We combine OR nodes for bitfield and NEON BSL operations.
setTargetDAGCombine(ISD::OR); setTargetDAGCombine(ISD::OR);
@ -275,27 +272,34 @@ EVT AArch64TargetLowering::getSetCCResultType(EVT VT) const {
return VT.changeVectorElementTypeToInteger(); return VT.changeVectorElementTypeToInteger();
} }
static void getExclusiveOperation(unsigned Size, unsigned &ldrOpc, static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord,
unsigned &strOpc) { unsigned &LdrOpc,
switch (Size) { unsigned &StrOpc) {
default: llvm_unreachable("unsupported size for atomic binary op!"); static unsigned LoadBares[] = {AArch64::LDXR_byte, AArch64::LDXR_hword,
case 1: AArch64::LDXR_word, AArch64::LDXR_dword};
ldrOpc = AArch64::LDXR_byte; static unsigned LoadAcqs[] = {AArch64::LDAXR_byte, AArch64::LDAXR_hword,
strOpc = AArch64::STXR_byte; AArch64::LDAXR_word, AArch64::LDAXR_dword};
break; static unsigned StoreBares[] = {AArch64::STXR_byte, AArch64::STXR_hword,
case 2: AArch64::STXR_word, AArch64::STXR_dword};
ldrOpc = AArch64::LDXR_hword; static unsigned StoreRels[] = {AArch64::STLXR_byte, AArch64::STLXR_hword,
strOpc = AArch64::STXR_hword; AArch64::STLXR_word, AArch64::STLXR_dword};
break;
case 4: unsigned *LoadOps, *StoreOps;
ldrOpc = AArch64::LDXR_word; if (Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent)
strOpc = AArch64::STXR_word; LoadOps = LoadAcqs;
break; else
case 8: LoadOps = LoadBares;
ldrOpc = AArch64::LDXR_dword;
strOpc = AArch64::STXR_dword; if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent)
break; StoreOps = StoreRels;
} else
StoreOps = StoreBares;
assert(isPowerOf2_32(Size) && Size <= 8 &&
"unsupported size for atomic binary op!");
LdrOpc = LoadOps[Log2_32(Size)];
StrOpc = StoreOps[Log2_32(Size)];
} }
MachineBasicBlock * MachineBasicBlock *
@ -313,12 +317,13 @@ AArch64TargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
unsigned dest = MI->getOperand(0).getReg(); unsigned dest = MI->getOperand(0).getReg();
unsigned ptr = MI->getOperand(1).getReg(); unsigned ptr = MI->getOperand(1).getReg();
unsigned incr = MI->getOperand(2).getReg(); unsigned incr = MI->getOperand(2).getReg();
AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
DebugLoc dl = MI->getDebugLoc(); DebugLoc dl = MI->getDebugLoc();
MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); MachineRegisterInfo &MRI = BB->getParent()->getRegInfo();
unsigned ldrOpc, strOpc; unsigned ldrOpc, strOpc;
getExclusiveOperation(Size, ldrOpc, strOpc); getExclusiveOperation(Size, Ord, ldrOpc, strOpc);
MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
@ -397,6 +402,8 @@ AArch64TargetLowering::emitAtomicBinaryMinMax(MachineInstr *MI,
unsigned dest = MI->getOperand(0).getReg(); unsigned dest = MI->getOperand(0).getReg();
unsigned ptr = MI->getOperand(1).getReg(); unsigned ptr = MI->getOperand(1).getReg();
unsigned incr = MI->getOperand(2).getReg(); unsigned incr = MI->getOperand(2).getReg();
AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(3).getImm());
unsigned oldval = dest; unsigned oldval = dest;
DebugLoc dl = MI->getDebugLoc(); DebugLoc dl = MI->getDebugLoc();
@ -411,7 +418,7 @@ AArch64TargetLowering::emitAtomicBinaryMinMax(MachineInstr *MI,
} }
unsigned ldrOpc, strOpc; unsigned ldrOpc, strOpc;
getExclusiveOperation(Size, ldrOpc, strOpc); getExclusiveOperation(Size, Ord, ldrOpc, strOpc);
MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB);
MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
@ -479,6 +486,7 @@ AArch64TargetLowering::emitAtomicCmpSwap(MachineInstr *MI,
unsigned ptr = MI->getOperand(1).getReg(); unsigned ptr = MI->getOperand(1).getReg();
unsigned oldval = MI->getOperand(2).getReg(); unsigned oldval = MI->getOperand(2).getReg();
unsigned newval = MI->getOperand(3).getReg(); unsigned newval = MI->getOperand(3).getReg();
AtomicOrdering Ord = static_cast<AtomicOrdering>(MI->getOperand(4).getImm());
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc dl = MI->getDebugLoc(); DebugLoc dl = MI->getDebugLoc();
@ -487,7 +495,7 @@ AArch64TargetLowering::emitAtomicCmpSwap(MachineInstr *MI,
TRCsp = Size == 8 ? &AArch64::GPR64xspRegClass : &AArch64::GPR32wspRegClass; TRCsp = Size == 8 ? &AArch64::GPR64xspRegClass : &AArch64::GPR32wspRegClass;
unsigned ldrOpc, strOpc; unsigned ldrOpc, strOpc;
getExclusiveOperation(Size, ldrOpc, strOpc); getExclusiveOperation(Size, Ord, ldrOpc, strOpc);
MachineFunction *MF = BB->getParent(); MachineFunction *MF = BB->getParent();
const BasicBlock *LLVM_BB = BB->getBasicBlock(); const BasicBlock *LLVM_BB = BB->getBasicBlock();
@ -2377,78 +2385,6 @@ static SDValue PerformANDCombine(SDNode *N,
DAG.getConstant(LSB + Width - 1, MVT::i64)); DAG.getConstant(LSB + Width - 1, MVT::i64));
} }
static SDValue PerformATOMIC_FENCECombine(SDNode *FenceNode,
TargetLowering::DAGCombinerInfo &DCI) {
// An atomic operation followed by an acquiring atomic fence can be reduced to
// an acquiring load. The atomic operation provides a convenient pointer to
// load from. If the original operation was a load anyway we can actually
// combine the two operations into an acquiring load.
SelectionDAG &DAG = DCI.DAG;
SDValue AtomicOp = FenceNode->getOperand(0);
AtomicSDNode *AtomicNode = dyn_cast<AtomicSDNode>(AtomicOp);
// A fence on its own can't be optimised
if (!AtomicNode)
return SDValue();
AtomicOrdering FenceOrder
= static_cast<AtomicOrdering>(FenceNode->getConstantOperandVal(1));
SynchronizationScope FenceScope
= static_cast<SynchronizationScope>(FenceNode->getConstantOperandVal(2));
if (FenceOrder != Acquire || FenceScope != AtomicNode->getSynchScope())
return SDValue();
// If the original operation was an ATOMIC_LOAD then we'll be replacing it, so
// the chain we use should be its input, otherwise we'll put our store after
// it so we use its output chain.
SDValue Chain = AtomicNode->getOpcode() == ISD::ATOMIC_LOAD ?
AtomicNode->getChain() : AtomicOp;
// We have an acquire fence with a handy atomic operation nearby, we can
// convert the fence into a load-acquire, discarding the result.
DebugLoc DL = FenceNode->getDebugLoc();
SDValue Op = DAG.getAtomic(ISD::ATOMIC_LOAD, DL, AtomicNode->getMemoryVT(),
AtomicNode->getValueType(0),
Chain, // Chain
AtomicOp.getOperand(1), // Pointer
AtomicNode->getMemOperand(), Acquire,
FenceScope);
if (AtomicNode->getOpcode() == ISD::ATOMIC_LOAD)
DAG.ReplaceAllUsesWith(AtomicNode, Op.getNode());
return Op.getValue(1);
}
static SDValue PerformATOMIC_STORECombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
// A releasing atomic fence followed by an atomic store can be combined into a
// single store operation.
SelectionDAG &DAG = DCI.DAG;
AtomicSDNode *AtomicNode = cast<AtomicSDNode>(N);
SDValue FenceOp = AtomicNode->getOperand(0);
if (FenceOp.getOpcode() != ISD::ATOMIC_FENCE)
return SDValue();
AtomicOrdering FenceOrder
= static_cast<AtomicOrdering>(FenceOp->getConstantOperandVal(1));
SynchronizationScope FenceScope
= static_cast<SynchronizationScope>(FenceOp->getConstantOperandVal(2));
if (FenceOrder != Release || FenceScope != AtomicNode->getSynchScope())
return SDValue();
DebugLoc DL = AtomicNode->getDebugLoc();
return DAG.getAtomic(ISD::ATOMIC_STORE, DL, AtomicNode->getMemoryVT(),
FenceOp.getOperand(0), // Chain
AtomicNode->getOperand(1), // Pointer
AtomicNode->getOperand(2), // Value
AtomicNode->getMemOperand(), Release,
FenceScope);
}
/// For a true bitfield insert, the bits getting into that contiguous mask /// For a true bitfield insert, the bits getting into that contiguous mask
/// should come from the low part of an existing value: they must be formed from /// should come from the low part of an existing value: they must be formed from
/// a compatible SHL operation (unless they're already low). This function /// a compatible SHL operation (unless they're already low). This function
@ -2804,8 +2740,6 @@ AArch64TargetLowering::PerformDAGCombine(SDNode *N,
switch (N->getOpcode()) { switch (N->getOpcode()) {
default: break; default: break;
case ISD::AND: return PerformANDCombine(N, DCI); case ISD::AND: return PerformANDCombine(N, DCI);
case ISD::ATOMIC_FENCE: return PerformATOMIC_FENCECombine(N, DCI);
case ISD::ATOMIC_STORE: return PerformATOMIC_STORECombine(N, DCI);
case ISD::OR: return PerformORCombine(N, DCI, Subtarget); case ISD::OR: return PerformORCombine(N, DCI, Subtarget);
case ISD::SRA: return PerformSRACombine(N, DCI); case ISD::SRA: return PerformSRACombine(N, DCI);
} }

@ -159,49 +159,55 @@ let Defs = [XSP], Uses = [XSP] in {
// Atomic operation pseudo-instructions // Atomic operation pseudo-instructions
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
let usesCustomInserter = 1 in { // These get selected from C++ code as a pretty much direct translation from the
multiclass AtomicSizes<string opname> { // generic DAG nodes. The one exception is the AtomicOrdering is added as an
def _I8 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr), // operand so that the eventual lowering can make use of it and choose
[(set i32:$dst, (!cast<SDNode>(opname # "_8") i64:$ptr, i32:$incr))]>; // acquire/release operations when required.
def _I16 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr),
[(set i32:$dst, (!cast<SDNode>(opname # "_16") i64:$ptr, i32:$incr))]>; let usesCustomInserter = 1, hasCtrlDep = 1, mayLoad = 1, mayStore = 1 in {
def _I32 : PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$incr), multiclass AtomicSizes {
[(set i32:$dst, (!cast<SDNode>(opname # "_32") i64:$ptr, i32:$incr))]>; def _I8 : PseudoInst<(outs GPR32:$dst),
def _I64 : PseudoInst<(outs GPR64:$dst), (ins GPR64:$ptr, GPR64:$incr), (ins GPR64xsp:$ptr, GPR32:$incr, i32imm:$ordering), []>;
[(set i64:$dst, (!cast<SDNode>(opname # "_64") i64:$ptr, i64:$incr))]>; def _I16 : PseudoInst<(outs GPR32:$dst),
(ins GPR64xsp:$ptr, GPR32:$incr, i32imm:$ordering), []>;
def _I32 : PseudoInst<(outs GPR32:$dst),
(ins GPR64xsp:$ptr, GPR32:$incr, i32imm:$ordering), []>;
def _I64 : PseudoInst<(outs GPR64:$dst),
(ins GPR64xsp:$ptr, GPR64:$incr, i32imm:$ordering), []>;
} }
} }
defm ATOMIC_LOAD_ADD : AtomicSizes<"atomic_load_add">; defm ATOMIC_LOAD_ADD : AtomicSizes;
defm ATOMIC_LOAD_SUB : AtomicSizes<"atomic_load_sub">; defm ATOMIC_LOAD_SUB : AtomicSizes;
defm ATOMIC_LOAD_AND : AtomicSizes<"atomic_load_and">; defm ATOMIC_LOAD_AND : AtomicSizes;
defm ATOMIC_LOAD_OR : AtomicSizes<"atomic_load_or">; defm ATOMIC_LOAD_OR : AtomicSizes;
defm ATOMIC_LOAD_XOR : AtomicSizes<"atomic_load_xor">; defm ATOMIC_LOAD_XOR : AtomicSizes;
defm ATOMIC_LOAD_NAND : AtomicSizes<"atomic_load_nand">; defm ATOMIC_LOAD_NAND : AtomicSizes;
defm ATOMIC_SWAP : AtomicSizes<"atomic_swap">; defm ATOMIC_SWAP : AtomicSizes;
let Defs = [NZCV] in { let Defs = [NZCV] in {
// These operations need a CMP to calculate the correct value // These operations need a CMP to calculate the correct value
defm ATOMIC_LOAD_MIN : AtomicSizes<"atomic_load_min">; defm ATOMIC_LOAD_MIN : AtomicSizes;
defm ATOMIC_LOAD_MAX : AtomicSizes<"atomic_load_max">; defm ATOMIC_LOAD_MAX : AtomicSizes;
defm ATOMIC_LOAD_UMIN : AtomicSizes<"atomic_load_umin">; defm ATOMIC_LOAD_UMIN : AtomicSizes;
defm ATOMIC_LOAD_UMAX : AtomicSizes<"atomic_load_umax">; defm ATOMIC_LOAD_UMAX : AtomicSizes;
} }
let usesCustomInserter = 1, Defs = [NZCV] in { class AtomicCmpSwap<RegisterClass GPRData>
def ATOMIC_CMP_SWAP_I8 : PseudoInst<(outs GPRData:$dst),
: PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new), (ins GPR64xsp:$ptr, GPRData:$old, GPRData:$new,
[(set i32:$dst, (atomic_cmp_swap_8 i64:$ptr, i32:$old, i32:$new))]>; i32imm:$ordering), []> {
def ATOMIC_CMP_SWAP_I16 let usesCustomInserter = 1;
: PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new), let hasCtrlDep = 1;
[(set i32:$dst, (atomic_cmp_swap_16 i64:$ptr, i32:$old, i32:$new))]>; let mayLoad = 1;
def ATOMIC_CMP_SWAP_I32 let mayStore = 1;
: PseudoInst<(outs GPR32:$dst), (ins GPR64:$ptr, GPR32:$old, GPR32:$new), let Defs = [NZCV];
[(set i32:$dst, (atomic_cmp_swap_32 i64:$ptr, i32:$old, i32:$new))]>;
def ATOMIC_CMP_SWAP_I64
: PseudoInst<(outs GPR64:$dst), (ins GPR64:$ptr, GPR64:$old, GPR64:$new),
[(set i64:$dst, (atomic_cmp_swap_64 i64:$ptr, i64:$old, i64:$new))]>;
} }
def ATOMIC_CMP_SWAP_I8 : AtomicCmpSwap<GPR32>;
def ATOMIC_CMP_SWAP_I16 : AtomicCmpSwap<GPR32>;
def ATOMIC_CMP_SWAP_I32 : AtomicCmpSwap<GPR32>;
def ATOMIC_CMP_SWAP_I64 : AtomicCmpSwap<GPR64>;
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// Add-subtract (extended register) instructions // Add-subtract (extended register) instructions
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
@ -2579,7 +2585,8 @@ defm LDAR : A64I_LRex<"ldar", 0b101>;
class acquiring_load<PatFrag base> class acquiring_load<PatFrag base>
: PatFrag<(ops node:$ptr), (base node:$ptr), [{ : PatFrag<(ops node:$ptr), (base node:$ptr), [{
return cast<AtomicSDNode>(N)->getOrdering() == Acquire; AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
return Ordering == Acquire || Ordering == SequentiallyConsistent;
}]>; }]>;
def atomic_load_acquire_8 : acquiring_load<atomic_load_8>; def atomic_load_acquire_8 : acquiring_load<atomic_load_8>;
@ -2610,7 +2617,8 @@ class A64I_SLexs_impl<bits<2> size, bits<3> opcode, string asm, dag outs,
class releasing_store<PatFrag base> class releasing_store<PatFrag base>
: PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{ : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{
return cast<AtomicSDNode>(N)->getOrdering() == Release; AtomicOrdering Ordering = cast<AtomicSDNode>(N)->getOrdering();
return Ordering == Release || Ordering == SequentiallyConsistent;
}]>; }]>;
def atomic_store_release_8 : releasing_store<atomic_store_8>; def atomic_store_release_8 : releasing_store<atomic_store_8>;

@ -1,4 +1,4 @@
; RUN: llc -mtriple=aarch64-none-linux-gnu < %s | FileCheck %s ; RUN: llc -mtriple=aarch64-none-linux-gnu -verify-machineinstrs < %s | FileCheck %s
define i32 @foo(i32* %var, i1 %cond) { define i32 @foo(i32* %var, i1 %cond) {
; CHECK: foo: ; CHECK: foo:
@ -9,7 +9,9 @@ simple_ver:
store i32 %newval, i32* %var store i32 %newval, i32* %var
br label %somewhere br label %somewhere
atomic_ver: atomic_ver:
%val = atomicrmw add i32* %var, i32 -1 seq_cst fence seq_cst
%val = atomicrmw add i32* %var, i32 -1 monotonic
fence seq_cst
br label %somewhere br label %somewhere
; CHECK: dmb ; CHECK: dmb
; CHECK: ldxr ; CHECK: ldxr

File diff suppressed because it is too large Load Diff