mirror of
https://github.com/RPCSX/llvm.git
synced 2025-01-26 22:45:05 +00:00
ARM: use a pseudo-instruction for cmpxchg at -O0.
The fast register-allocator cannot cope with inter-block dependencies without spilling. This is fine for ldrex/strex loops coming from atomicrmw instructions where any value produced within a block is dead by the end, but not for cmpxchg. So we lower a cmpxchg at -O0 via a pseudo-inst that gets expanded after regalloc. Fortunately this is at -O0 so we don't have to care about performance. This simplifies the various axes of expansion considerably: we assume a strong seq_cst operation and ensure ordering via the always-present DMB instructions rather than v8 acquire/release instructions. Should fix the 32-bit part of PR25526. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@266679 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
37e715dc57
commit
97c0826552
@ -20,6 +20,7 @@
|
||||
#include "ARMConstantPoolValue.h"
|
||||
#include "ARMMachineFunctionInfo.h"
|
||||
#include "MCTargetDesc/ARMAddressingModes.h"
|
||||
#include "llvm/CodeGen/LivePhysRegs.h"
|
||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
@ -63,7 +64,8 @@ namespace {
|
||||
void TransferImpOps(MachineInstr &OldMI,
|
||||
MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI);
|
||||
bool ExpandMI(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI);
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
MachineBasicBlock::iterator &NextMBBI);
|
||||
bool ExpandMBB(MachineBasicBlock &MBB);
|
||||
void ExpandVLD(MachineBasicBlock::iterator &MBBI);
|
||||
void ExpandVST(MachineBasicBlock::iterator &MBBI);
|
||||
@ -72,6 +74,14 @@ namespace {
|
||||
unsigned Opc, bool IsExt);
|
||||
void ExpandMOV32BitImm(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator &MBBI);
|
||||
bool ExpandCMP_SWAP(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI, unsigned LdrexOp,
|
||||
unsigned StrexOp, unsigned UxtOp,
|
||||
MachineBasicBlock::iterator &NextMBBI);
|
||||
|
||||
bool ExpandCMP_SWAP_64(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
MachineBasicBlock::iterator &NextMBBI);
|
||||
};
|
||||
char ARMExpandPseudo::ID = 0;
|
||||
}
|
||||
@ -742,8 +752,240 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
|
||||
MI.eraseFromParent();
|
||||
}
|
||||
|
||||
static void addPostLoopLiveIns(MachineBasicBlock *MBB, LivePhysRegs &LiveRegs) {
|
||||
for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I)
|
||||
MBB->addLiveIn(*I);
|
||||
}
|
||||
|
||||
/// Expand a CMP_SWAP pseudo-inst to an ldrex/strex loop as simply as
|
||||
/// possible. This only gets used at -O0 so we don't care about efficiency of the
|
||||
/// generated code.
|
||||
bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
unsigned LdrexOp, unsigned StrexOp,
|
||||
unsigned UxtOp,
|
||||
MachineBasicBlock::iterator &NextMBBI) {
|
||||
bool IsThumb = STI->isThumb();
|
||||
MachineInstr &MI = *MBBI;
|
||||
DebugLoc DL = MI.getDebugLoc();
|
||||
MachineOperand &Dest = MI.getOperand(0);
|
||||
unsigned StatusReg = MI.getOperand(1).getReg();
|
||||
MachineOperand &Addr = MI.getOperand(2);
|
||||
MachineOperand &Desired = MI.getOperand(3);
|
||||
MachineOperand &New = MI.getOperand(4);
|
||||
|
||||
LivePhysRegs LiveRegs(&TII->getRegisterInfo());
|
||||
LiveRegs.addLiveOuts(&MBB);
|
||||
for (auto I = std::prev(MBB.end()); I != MBBI; --I)
|
||||
LiveRegs.stepBackward(*I);
|
||||
|
||||
MachineFunction *MF = MBB.getParent();
|
||||
auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
|
||||
auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
|
||||
auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
|
||||
|
||||
MF->insert(++MBB.getIterator(), LoadCmpBB);
|
||||
MF->insert(++LoadCmpBB->getIterator(), StoreBB);
|
||||
MF->insert(++StoreBB->getIterator(), DoneBB);
|
||||
|
||||
if (UxtOp) {
|
||||
MachineInstrBuilder MIB =
|
||||
BuildMI(MBB, MBBI, DL, TII->get(UxtOp), Desired.getReg())
|
||||
.addReg(Desired.getReg(), RegState::Kill);
|
||||
if (!IsThumb)
|
||||
MIB.addImm(0);
|
||||
AddDefaultPred(MIB);
|
||||
}
|
||||
|
||||
// .Lloadcmp:
|
||||
// ldrex rDest, [rAddr]
|
||||
// cmp rDest, rDesired
|
||||
// bne .Ldone
|
||||
MBB.addSuccessor(LoadCmpBB);
|
||||
LoadCmpBB->addLiveIn(Addr.getReg());
|
||||
LoadCmpBB->addLiveIn(Dest.getReg());
|
||||
LoadCmpBB->addLiveIn(Desired.getReg());
|
||||
addPostLoopLiveIns(LoadCmpBB, LiveRegs);
|
||||
|
||||
MachineInstrBuilder MIB;
|
||||
MIB = BuildMI(LoadCmpBB, DL, TII->get(LdrexOp), Dest.getReg());
|
||||
MIB.addReg(Addr.getReg());
|
||||
if (LdrexOp == ARM::t2LDREX)
|
||||
MIB.addImm(0); // a 32-bit Thumb ldrex (only) allows an offset.
|
||||
AddDefaultPred(MIB);
|
||||
|
||||
unsigned CMPrr = IsThumb ? ARM::tCMPhir : ARM::CMPrr;
|
||||
AddDefaultPred(BuildMI(LoadCmpBB, DL, TII->get(CMPrr))
|
||||
.addReg(Dest.getReg(), getKillRegState(Dest.isDead()))
|
||||
.addOperand(Desired));
|
||||
unsigned Bcc = IsThumb ? ARM::tBcc : ARM::Bcc;
|
||||
BuildMI(LoadCmpBB, DL, TII->get(Bcc))
|
||||
.addMBB(DoneBB)
|
||||
.addImm(ARMCC::NE)
|
||||
.addReg(ARM::CPSR, RegState::Kill);
|
||||
LoadCmpBB->addSuccessor(DoneBB);
|
||||
LoadCmpBB->addSuccessor(StoreBB);
|
||||
|
||||
// .Lstore:
|
||||
// strex rStatus, rNew, [rAddr]
|
||||
// cmp rStatus, #0
|
||||
// bne .Lloadcmp
|
||||
StoreBB->addLiveIn(Addr.getReg());
|
||||
StoreBB->addLiveIn(New.getReg());
|
||||
addPostLoopLiveIns(StoreBB, LiveRegs);
|
||||
|
||||
|
||||
MIB = BuildMI(StoreBB, DL, TII->get(StrexOp), StatusReg);
|
||||
MIB.addOperand(New);
|
||||
MIB.addOperand(Addr);
|
||||
if (StrexOp == ARM::t2STREX)
|
||||
MIB.addImm(0); // a 32-bit Thumb strex (only) allows an offset.
|
||||
AddDefaultPred(MIB);
|
||||
|
||||
unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri;
|
||||
AddDefaultPred(BuildMI(StoreBB, DL, TII->get(CMPri))
|
||||
.addReg(StatusReg, RegState::Kill)
|
||||
.addImm(0));
|
||||
BuildMI(StoreBB, DL, TII->get(Bcc))
|
||||
.addMBB(LoadCmpBB)
|
||||
.addImm(ARMCC::NE)
|
||||
.addReg(ARM::CPSR, RegState::Kill);
|
||||
StoreBB->addSuccessor(LoadCmpBB);
|
||||
StoreBB->addSuccessor(DoneBB);
|
||||
|
||||
DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
|
||||
DoneBB->transferSuccessors(&MBB);
|
||||
addPostLoopLiveIns(DoneBB, LiveRegs);
|
||||
|
||||
NextMBBI = MBB.end();
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
/// ARM's ldrexd/strexd take a consecutive register pair (represented as a
|
||||
/// single GPRPair register), Thumb's take two separate registers so we need to
|
||||
/// extract the subregs from the pair.
|
||||
static void addExclusiveRegPair(MachineInstrBuilder &MIB, MachineOperand &Reg,
|
||||
unsigned Flags, bool IsThumb,
|
||||
const TargetRegisterInfo *TRI) {
|
||||
if (IsThumb) {
|
||||
unsigned RegLo = TRI->getSubReg(Reg.getReg(), ARM::gsub_0);
|
||||
unsigned RegHi = TRI->getSubReg(Reg.getReg(), ARM::gsub_1);
|
||||
MIB.addReg(RegLo, Flags | getKillRegState(Reg.isDead()));
|
||||
MIB.addReg(RegHi, Flags | getKillRegState(Reg.isDead()));
|
||||
} else
|
||||
MIB.addReg(Reg.getReg(), Flags | getKillRegState(Reg.isDead()));
|
||||
}
|
||||
|
||||
/// Expand a 64-bit CMP_SWAP to an ldrexd/strexd loop.
|
||||
bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
MachineBasicBlock::iterator &NextMBBI) {
|
||||
bool IsThumb = STI->isThumb();
|
||||
MachineInstr &MI = *MBBI;
|
||||
DebugLoc DL = MI.getDebugLoc();
|
||||
MachineOperand &Dest = MI.getOperand(0);
|
||||
unsigned StatusReg = MI.getOperand(1).getReg();
|
||||
MachineOperand &Addr = MI.getOperand(2);
|
||||
MachineOperand &Desired = MI.getOperand(3);
|
||||
MachineOperand &New = MI.getOperand(4);
|
||||
|
||||
unsigned DestLo = TRI->getSubReg(Dest.getReg(), ARM::gsub_0);
|
||||
unsigned DestHi = TRI->getSubReg(Dest.getReg(), ARM::gsub_1);
|
||||
unsigned DesiredLo = TRI->getSubReg(Desired.getReg(), ARM::gsub_0);
|
||||
unsigned DesiredHi = TRI->getSubReg(Desired.getReg(), ARM::gsub_1);
|
||||
|
||||
LivePhysRegs LiveRegs(&TII->getRegisterInfo());
|
||||
LiveRegs.addLiveOuts(&MBB);
|
||||
for (auto I = std::prev(MBB.end()); I != MBBI; --I)
|
||||
LiveRegs.stepBackward(*I);
|
||||
|
||||
MachineFunction *MF = MBB.getParent();
|
||||
auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
|
||||
auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
|
||||
auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
|
||||
|
||||
MF->insert(++MBB.getIterator(), LoadCmpBB);
|
||||
MF->insert(++LoadCmpBB->getIterator(), StoreBB);
|
||||
MF->insert(++StoreBB->getIterator(), DoneBB);
|
||||
|
||||
// .Lloadcmp:
|
||||
// ldrexd rDestLo, rDestHi, [rAddr]
|
||||
// cmp rDestLo, rDesiredLo
|
||||
// sbcs rStatus<dead>, rDestHi, rDesiredHi
|
||||
// bne .Ldone
|
||||
MBB.addSuccessor(LoadCmpBB);
|
||||
LoadCmpBB->addLiveIn(Addr.getReg());
|
||||
LoadCmpBB->addLiveIn(Dest.getReg());
|
||||
LoadCmpBB->addLiveIn(Desired.getReg());
|
||||
addPostLoopLiveIns(LoadCmpBB, LiveRegs);
|
||||
|
||||
unsigned LDREXD = IsThumb ? ARM::t2LDREXD : ARM::LDREXD;
|
||||
MachineInstrBuilder MIB;
|
||||
MIB = BuildMI(LoadCmpBB, DL, TII->get(LDREXD));
|
||||
addExclusiveRegPair(MIB, Dest, RegState::Define, IsThumb, TRI);
|
||||
MIB.addReg(Addr.getReg());
|
||||
AddDefaultPred(MIB);
|
||||
|
||||
unsigned CMPrr = IsThumb ? ARM::tCMPhir : ARM::CMPrr;
|
||||
AddDefaultPred(BuildMI(LoadCmpBB, DL, TII->get(CMPrr))
|
||||
.addReg(DestLo, getKillRegState(Dest.isDead()))
|
||||
.addReg(DesiredLo, getKillRegState(Desired.isDead())));
|
||||
|
||||
unsigned SBCrr = IsThumb ? ARM::t2SBCrr : ARM::SBCrr;
|
||||
MIB = BuildMI(LoadCmpBB, DL, TII->get(SBCrr))
|
||||
.addReg(StatusReg, RegState::Define | RegState::Dead)
|
||||
.addReg(DestHi, getKillRegState(Dest.isDead()))
|
||||
.addReg(DesiredHi, getKillRegState(Desired.isDead()));
|
||||
AddDefaultPred(MIB);
|
||||
MIB.addReg(ARM::CPSR, RegState::Kill);
|
||||
|
||||
unsigned Bcc = IsThumb ? ARM::tBcc : ARM::Bcc;
|
||||
BuildMI(LoadCmpBB, DL, TII->get(Bcc))
|
||||
.addMBB(DoneBB)
|
||||
.addImm(ARMCC::NE)
|
||||
.addReg(ARM::CPSR, RegState::Kill);
|
||||
LoadCmpBB->addSuccessor(DoneBB);
|
||||
LoadCmpBB->addSuccessor(StoreBB);
|
||||
|
||||
// .Lstore:
|
||||
// strexd rStatus, rNewLo, rNewHi, [rAddr]
|
||||
// cmp rStatus, #0
|
||||
// bne .Lloadcmp
|
||||
StoreBB->addLiveIn(Addr.getReg());
|
||||
StoreBB->addLiveIn(New.getReg());
|
||||
addPostLoopLiveIns(StoreBB, LiveRegs);
|
||||
|
||||
unsigned STREXD = IsThumb ? ARM::t2STREXD : ARM::STREXD;
|
||||
MIB = BuildMI(StoreBB, DL, TII->get(STREXD), StatusReg);
|
||||
addExclusiveRegPair(MIB, New, 0, IsThumb, TRI);
|
||||
MIB.addOperand(Addr);
|
||||
AddDefaultPred(MIB);
|
||||
|
||||
unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri;
|
||||
AddDefaultPred(BuildMI(StoreBB, DL, TII->get(CMPri))
|
||||
.addReg(StatusReg, RegState::Kill)
|
||||
.addImm(0));
|
||||
BuildMI(StoreBB, DL, TII->get(Bcc))
|
||||
.addMBB(LoadCmpBB)
|
||||
.addImm(ARMCC::NE)
|
||||
.addReg(ARM::CPSR, RegState::Kill);
|
||||
StoreBB->addSuccessor(LoadCmpBB);
|
||||
StoreBB->addSuccessor(DoneBB);
|
||||
|
||||
DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
|
||||
DoneBB->transferSuccessors(&MBB);
|
||||
addPostLoopLiveIns(DoneBB, LiveRegs);
|
||||
|
||||
NextMBBI = MBB.end();
|
||||
MI.eraseFromParent();
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MBBI) {
|
||||
MachineBasicBlock::iterator MBBI,
|
||||
MachineBasicBlock::iterator &NextMBBI) {
|
||||
MachineInstr &MI = *MBBI;
|
||||
unsigned Opcode = MI.getOpcode();
|
||||
switch (Opcode) {
|
||||
@ -1380,6 +1622,30 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
|
||||
case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false); return true;
|
||||
case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true;
|
||||
case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true); return true;
|
||||
|
||||
case ARM::CMP_SWAP_8:
|
||||
if (STI->isThumb())
|
||||
return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREXB, ARM::t2STREXB,
|
||||
ARM::tUXTB, NextMBBI);
|
||||
else
|
||||
return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREXB, ARM::STREXB,
|
||||
ARM::UXTB, NextMBBI);
|
||||
case ARM::CMP_SWAP_16:
|
||||
if (STI->isThumb())
|
||||
return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREXH, ARM::t2STREXH,
|
||||
ARM::tUXTH, NextMBBI);
|
||||
else
|
||||
return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREXH, ARM::STREXH,
|
||||
ARM::UXTH, NextMBBI);
|
||||
case ARM::CMP_SWAP_32:
|
||||
if (STI->isThumb())
|
||||
return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREX, ARM::t2STREX, 0,
|
||||
NextMBBI);
|
||||
else
|
||||
return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREX, ARM::STREX, 0, NextMBBI);
|
||||
|
||||
case ARM::CMP_SWAP_64:
|
||||
return ExpandCMP_SWAP_64(MBB, MBBI, NextMBBI);
|
||||
}
|
||||
}
|
||||
|
||||
@ -1389,7 +1655,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
|
||||
MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
|
||||
while (MBBI != E) {
|
||||
MachineBasicBlock::iterator NMBBI = std::next(MBBI);
|
||||
Modified |= ExpandMI(MBB, MBBI);
|
||||
Modified |= ExpandMI(MBB, MBBI, NMBBI);
|
||||
MBBI = NMBBI;
|
||||
}
|
||||
|
||||
|
@ -253,6 +253,8 @@ private:
|
||||
|
||||
SDNode *SelectSMLAWSMULW(SDNode *N);
|
||||
|
||||
SDNode *SelectCMP_SWAP(SDNode *N);
|
||||
|
||||
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
|
||||
/// inline asm expressions.
|
||||
bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID,
|
||||
@ -2597,6 +2599,34 @@ SDNode *ARMDAGToDAGISel::SelectSMLAWSMULW(SDNode *N) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
/// We've got special pseudo-instructions for these
|
||||
SDNode *ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
|
||||
unsigned Opcode;
|
||||
EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
|
||||
if (MemTy == MVT::i8)
|
||||
Opcode = ARM::CMP_SWAP_8;
|
||||
else if (MemTy == MVT::i16)
|
||||
Opcode = ARM::CMP_SWAP_16;
|
||||
else if (MemTy == MVT::i32)
|
||||
Opcode = ARM::CMP_SWAP_32;
|
||||
else
|
||||
llvm_unreachable("Unknown AtomicCmpSwap type");
|
||||
|
||||
SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
|
||||
N->getOperand(0)};
|
||||
SDNode *CmpSwap = CurDAG->getMachineNode(
|
||||
Opcode, SDLoc(N),
|
||||
CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
|
||||
|
||||
MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1);
|
||||
MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
|
||||
cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
|
||||
|
||||
ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
|
||||
ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) {
|
||||
// The only time a CONCAT_VECTORS operation can have legal types is when
|
||||
// two 64-bit vectors are concatenated to a 128-bit vector.
|
||||
@ -3493,6 +3523,9 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) {
|
||||
|
||||
case ISD::CONCAT_VECTORS:
|
||||
return SelectConcatVector(N);
|
||||
|
||||
case ISD::ATOMIC_CMP_SWAP:
|
||||
return SelectCMP_SWAP(N);
|
||||
}
|
||||
|
||||
return SelectCode(N);
|
||||
|
@ -850,10 +850,12 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM,
|
||||
// ATOMIC_FENCE needs custom lowering; the others should have been expanded
|
||||
// to ldrex/strex loops already.
|
||||
setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom);
|
||||
if (!Subtarget->isThumb() || !Subtarget->isMClass())
|
||||
setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom);
|
||||
|
||||
// On v8, we have particularly efficient implementations of atomic fences
|
||||
// if they can be combined with nearby atomic loads and stores.
|
||||
if (!Subtarget->hasV8Ops()) {
|
||||
if (!Subtarget->hasV8Ops() || getTargetMachine().getOptLevel() == 0) {
|
||||
// Automatically insert fences (dmb ish) around ATOMIC_SWAP etc.
|
||||
InsertFencesForAtomic = true;
|
||||
}
|
||||
@ -6969,6 +6971,44 @@ static void ReplaceREADCYCLECOUNTER(SDNode *N,
|
||||
Results.push_back(Cycles32.getValue(1));
|
||||
}
|
||||
|
||||
static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V0, SDValue V1) {
|
||||
SDLoc dl(V0.getNode());
|
||||
SDValue RegClass =
|
||||
DAG.getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
|
||||
SDValue SubReg0 = DAG.getTargetConstant(ARM::gsub_0, dl, MVT::i32);
|
||||
SDValue SubReg1 = DAG.getTargetConstant(ARM::gsub_1, dl, MVT::i32);
|
||||
const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
|
||||
return SDValue(
|
||||
DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0);
|
||||
}
|
||||
|
||||
static void ReplaceCMP_SWAP_64Results(SDNode *N,
|
||||
SmallVectorImpl<SDValue> & Results,
|
||||
SelectionDAG &DAG) {
|
||||
assert(N->getValueType(0) == MVT::i64 &&
|
||||
"AtomicCmpSwap on types less than 64 should be legal");
|
||||
SDValue Ops[] = {N->getOperand(1),
|
||||
createGPRPairNode(DAG, N->getOperand(2)->getOperand(0),
|
||||
N->getOperand(2)->getOperand(1)),
|
||||
createGPRPairNode(DAG, N->getOperand(3)->getOperand(0),
|
||||
N->getOperand(3)->getOperand(1)),
|
||||
N->getOperand(0)};
|
||||
SDNode *CmpSwap = DAG.getMachineNode(
|
||||
ARM::CMP_SWAP_64, SDLoc(N),
|
||||
DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other), Ops);
|
||||
|
||||
MachineFunction &MF = DAG.getMachineFunction();
|
||||
MachineSDNode::mmo_iterator MemOp = MF.allocateMemRefsArray(1);
|
||||
MemOp[0] = cast<MemSDNode>(N)->getMemOperand();
|
||||
cast<MachineSDNode>(CmpSwap)->setMemRefs(MemOp, MemOp + 1);
|
||||
|
||||
Results.push_back(DAG.getTargetExtractSubreg(ARM::gsub_0, SDLoc(N), MVT::i32,
|
||||
SDValue(CmpSwap, 0)));
|
||||
Results.push_back(DAG.getTargetExtractSubreg(ARM::gsub_1, SDLoc(N), MVT::i32,
|
||||
SDValue(CmpSwap, 0)));
|
||||
Results.push_back(SDValue(CmpSwap, 2));
|
||||
}
|
||||
|
||||
SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
|
||||
switch (Op.getOpcode()) {
|
||||
default: llvm_unreachable("Don't know how to custom lower this!");
|
||||
@ -7097,6 +7137,9 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
|
||||
assert(Subtarget->isTargetWindows() && "can only expand DIV on Windows");
|
||||
return ExpandDIV_Windows(SDValue(N, 0), DAG, N->getOpcode() == ISD::SDIV,
|
||||
Results);
|
||||
case ISD::ATOMIC_CMP_SWAP:
|
||||
ReplaceCMP_SWAP_64Results(N, Results, DAG);
|
||||
return;
|
||||
}
|
||||
if (Res.getNode())
|
||||
Results.push_back(Res);
|
||||
@ -12155,7 +12198,12 @@ ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const {
|
||||
|
||||
bool ARMTargetLowering::shouldExpandAtomicCmpXchgInIR(
|
||||
AtomicCmpXchgInst *AI) const {
|
||||
return true;
|
||||
// At -O0, fast-regalloc cannot cope with the live vregs necessary to
|
||||
// implement cmpxchg without spilling. If the address being exchanged is also
|
||||
// on the stack and close enough to the spill slot, this can lead to a
|
||||
// situation where the monitor always gets cleared and the atomic operation
|
||||
// can never succeed. So at -O0 we need a late-expanded pseudo-inst instead.
|
||||
return getTargetMachine().getOptLevel() != 0;
|
||||
}
|
||||
|
||||
bool ARMTargetLowering::shouldInsertFencesForAtomic(
|
||||
|
@ -5793,3 +5793,36 @@ let mayLoad = 1, mayStore =1, hasSideEffects = 1 in
|
||||
def SPACE : PseudoInst<(outs GPR:$Rd), (ins i32imm:$size, GPR:$Rn),
|
||||
NoItinerary,
|
||||
[(set GPR:$Rd, (int_arm_space imm:$size, GPR:$Rn))]>;
|
||||
|
||||
//===----------------------------------
|
||||
// Atomic cmpxchg for -O0
|
||||
//===----------------------------------
|
||||
|
||||
// The fast register allocator used during -O0 inserts spills to cover any VRegs
|
||||
// live across basic block boundaries. When this happens between an LDXR and an
|
||||
// STXR it can clear the exclusive monitor, causing all cmpxchg attempts to
|
||||
// fail.
|
||||
|
||||
// Unfortunately, this means we have to have an alternative (expanded
|
||||
// post-regalloc) path for -O0 compilations. Fortunately this path can be
|
||||
// significantly more naive than the standard expansion: we conservatively
|
||||
// assume seq_cst, strong cmpxchg and omit clrex on failure.
|
||||
|
||||
let Constraints = "@earlyclobber $Rd,@earlyclobber $status",
|
||||
mayLoad = 1, mayStore = 1 in {
|
||||
def CMP_SWAP_8 : PseudoInst<(outs GPR:$Rd, GPR:$status),
|
||||
(ins GPR:$addr, GPR:$desired, GPR:$new),
|
||||
NoItinerary, []>, Sched<[]>;
|
||||
|
||||
def CMP_SWAP_16 : PseudoInst<(outs GPR:$Rd, GPR:$status),
|
||||
(ins GPR:$addr, GPR:$desired, GPR:$new),
|
||||
NoItinerary, []>, Sched<[]>;
|
||||
|
||||
def CMP_SWAP_32 : PseudoInst<(outs GPR:$Rd, GPR:$status),
|
||||
(ins GPR:$addr, GPR:$desired, GPR:$new),
|
||||
NoItinerary, []>, Sched<[]>;
|
||||
|
||||
def CMP_SWAP_64 : PseudoInst<(outs GPRPair:$Rd, GPR:$status),
|
||||
(ins GPR:$addr, GPRPair:$desired, GPRPair:$new),
|
||||
NoItinerary, []>, Sched<[]>;
|
||||
}
|
||||
|
81
test/CodeGen/ARM/cmpxchg-O0.ll
Normal file
81
test/CodeGen/ARM/cmpxchg-O0.ll
Normal file
@ -0,0 +1,81 @@
|
||||
; RUN: llc -verify-machineinstrs -mtriple=armv7-linux-gnu -O0 %s -o - | FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs -mtriple=thumbv8-linux-gnu -O0 %s -o - | FileCheck %s
|
||||
; RUN: llc -verify-machineinstrs -mtriple=thumbv6m-none-eabi -O0 %s -o - | FileCheck %s --check-prefix=CHECK-T1
|
||||
|
||||
; CHECK-T1-NOT: ldrex
|
||||
; CHECK-T1-NOT: strex
|
||||
|
||||
define { i8, i1 } @test_cmpxchg_8(i8* %addr, i8 %desired, i8 %new) nounwind {
|
||||
; CHECK-LABEL: test_cmpxchg_8:
|
||||
; CHECK: dmb ish
|
||||
; CHECK: uxtb [[DESIRED:r[0-9]+]], [[DESIRED]]
|
||||
; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
|
||||
; CHECK: ldrexb [[OLD:r[0-9]+]], [r0]
|
||||
; CHECK: cmp [[OLD]], [[DESIRED]]
|
||||
; CHECK: bne [[DONE:.LBB[0-9]+_[0-9]+]]
|
||||
; CHECK: strexb [[STATUS:r[0-9]+]], r2, [r0]
|
||||
; CHECK: cmp{{(\.w)?}} [[STATUS]], #0
|
||||
; CHECK: bne [[RETRY]]
|
||||
; CHECK: [[DONE]]:
|
||||
; CHECK: cmp{{(\.w)?}} [[OLD]], [[DESIRED]]
|
||||
; CHECK: {{moveq.w|movweq}} {{r[0-9]+}}, #1
|
||||
; CHECK: dmb ish
|
||||
%res = cmpxchg i8* %addr, i8 %desired, i8 %new seq_cst monotonic
|
||||
ret { i8, i1 } %res
|
||||
}
|
||||
|
||||
define { i16, i1 } @test_cmpxchg_16(i16* %addr, i16 %desired, i16 %new) nounwind {
|
||||
; CHECK-LABEL: test_cmpxchg_16:
|
||||
; CHECK: dmb ish
|
||||
; CHECK: uxth [[DESIRED:r[0-9]+]], [[DESIRED]]
|
||||
; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
|
||||
; CHECK: ldrexh [[OLD:r[0-9]+]], [r0]
|
||||
; CHECK: cmp [[OLD]], [[DESIRED]]
|
||||
; CHECK: bne [[DONE:.LBB[0-9]+_[0-9]+]]
|
||||
; CHECK: strexh [[STATUS:r[0-9]+]], r2, [r0]
|
||||
; CHECK: cmp{{(\.w)?}} [[STATUS]], #0
|
||||
; CHECK: bne [[RETRY]]
|
||||
; CHECK: [[DONE]]:
|
||||
; CHECK: cmp{{(\.w)?}} [[OLD]], [[DESIRED]]
|
||||
; CHECK: {{moveq.w|movweq}} {{r[0-9]+}}, #1
|
||||
; CHECK: dmb ish
|
||||
%res = cmpxchg i16* %addr, i16 %desired, i16 %new seq_cst monotonic
|
||||
ret { i16, i1 } %res
|
||||
}
|
||||
|
||||
define { i32, i1 } @test_cmpxchg_32(i32* %addr, i32 %desired, i32 %new) nounwind {
|
||||
; CHECK-LABEL: test_cmpxchg_32:
|
||||
; CHECK: dmb ish
|
||||
; CHECK-NOT: uxt
|
||||
; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
|
||||
; CHECK: ldrex [[OLD:r[0-9]+]], [r0]
|
||||
; CHECK: cmp [[OLD]], [[DESIRED]]
|
||||
; CHECK: bne [[DONE:.LBB[0-9]+_[0-9]+]]
|
||||
; CHECK: strex [[STATUS:r[0-9]+]], r2, [r0]
|
||||
; CHECK: cmp{{(\.w)?}} [[STATUS]], #0
|
||||
; CHECK: bne [[RETRY]]
|
||||
; CHECK: [[DONE]]:
|
||||
; CHECK: cmp{{(\.w)?}} [[OLD]], [[DESIRED]]
|
||||
; CHECK: {{moveq.w|movweq}} {{r[0-9]+}}, #1
|
||||
; CHECK: dmb ish
|
||||
%res = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst monotonic
|
||||
ret { i32, i1 } %res
|
||||
}
|
||||
|
||||
define { i64, i1 } @test_cmpxchg_64(i64* %addr, i64 %desired, i64 %new) nounwind {
|
||||
; CHECK-LABEL: test_cmpxchg_64:
|
||||
; CHECK: dmb ish
|
||||
; CHECK-NOT: uxt
|
||||
; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]:
|
||||
; CHECK: ldrexd [[OLDLO:r[0-9]+]], [[OLDHI:r[0-9]+]], [r0]
|
||||
; CHECK: cmp [[OLDLO]], r6
|
||||
; CHECK: sbcs{{(\.w)?}} [[STATUS:r[0-9]+]], [[OLDHI]], r7
|
||||
; CHECK: bne [[DONE:.LBB[0-9]+_[0-9]+]]
|
||||
; CHECK: strexd [[STATUS]], r4, r5, [r0]
|
||||
; CHECK: cmp{{(\.w)?}} [[STATUS]], #0
|
||||
; CHECK: bne [[RETRY]]
|
||||
; CHECK: [[DONE]]:
|
||||
; CHECK: dmb ish
|
||||
%res = cmpxchg i64* %addr, i64 %desired, i64 %new seq_cst monotonic
|
||||
ret { i64, i1 } %res
|
||||
}
|
@ -1,4 +1,4 @@
|
||||
; RUN: opt -S -o - -mtriple=armv7-apple-ios7.0 -atomic-expand %s | FileCheck %s
|
||||
; RUN: opt -S -o - -mtriple=armv7-apple-ios7.0 -atomic-expand -codegen-opt-level=1 %s | FileCheck %s
|
||||
|
||||
define i8 @test_atomic_xchg_i8(i8* %ptr, i8 %xchgend) {
|
||||
; CHECK-LABEL: @test_atomic_xchg_i8
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: opt -S -o - -mtriple=armv8-linux-gnueabihf -atomic-expand %s | FileCheck %s
|
||||
; RUN: opt -S -o - -mtriple=armv8-linux-gnueabihf -atomic-expand %s -codegen-opt-level=1 | FileCheck %s
|
||||
|
||||
define i8 @test_atomic_xchg_i8(i8* %ptr, i8 %xchgend) {
|
||||
; CHECK-LABEL: @test_atomic_xchg_i8
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: opt -atomic-expand -S -mtriple=thumbv7s-apple-ios7.0 %s | FileCheck %s
|
||||
; RUN: opt -atomic-expand -codegen-opt-level=1 -S -mtriple=thumbv7s-apple-ios7.0 %s | FileCheck %s
|
||||
|
||||
define i32 @test_cmpxchg_seq_cst(i32* %addr, i32 %desired, i32 %new) {
|
||||
; CHECK-LABEL: @test_cmpxchg_seq_cst
|
||||
|
@ -136,6 +136,10 @@ static cl::opt<bool>
|
||||
OptLevelO3("O3",
|
||||
cl::desc("Optimization level 3. Similar to clang -O3"));
|
||||
|
||||
static cl::opt<unsigned>
|
||||
CodeGenOptLevel("codegen-opt-level",
|
||||
cl::desc("Override optimization level for codegen hooks"));
|
||||
|
||||
static cl::opt<std::string>
|
||||
TargetTriple("mtriple", cl::desc("Override target triple for module"));
|
||||
|
||||
@ -272,6 +276,8 @@ static void AddStandardLinkPasses(legacy::PassManagerBase &PM) {
|
||||
//
|
||||
|
||||
static CodeGenOpt::Level GetCodeGenOptLevel() {
|
||||
if (CodeGenOptLevel.getNumOccurrences())
|
||||
return static_cast<CodeGenOpt::Level>(unsigned(CodeGenOptLevel));
|
||||
if (OptLevelO1)
|
||||
return CodeGenOpt::Less;
|
||||
if (OptLevelO2)
|
||||
|
Loading…
x
Reference in New Issue
Block a user