diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp index 992558f0cb0..aefa20ba643 100644 --- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -20,6 +20,7 @@ #include "ARMConstantPoolValue.h" #include "ARMMachineFunctionInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" +#include "llvm/CodeGen/LivePhysRegs.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -63,7 +64,8 @@ namespace { void TransferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI); bool ExpandMI(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI); + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); bool ExpandMBB(MachineBasicBlock &MBB); void ExpandVLD(MachineBasicBlock::iterator &MBBI); void ExpandVST(MachineBasicBlock::iterator &MBBI); @@ -72,6 +74,14 @@ namespace { unsigned Opc, bool IsExt); void ExpandMOV32BitImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI); + bool ExpandCMP_SWAP(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, unsigned LdrexOp, + unsigned StrexOp, unsigned UxtOp, + MachineBasicBlock::iterator &NextMBBI); + + bool ExpandCMP_SWAP_64(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI); }; char ARMExpandPseudo::ID = 0; } @@ -742,8 +752,240 @@ void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, MI.eraseFromParent(); } +static void addPostLoopLiveIns(MachineBasicBlock *MBB, LivePhysRegs &LiveRegs) { + for (auto I = LiveRegs.begin(); I != LiveRegs.end(); ++I) + MBB->addLiveIn(*I); +} + +/// Expand a CMP_SWAP pseudo-inst to an ldrex/strex loop as simply as +/// possible. This only gets used at -O0 so we don't care about efficiency of the +/// generated code. +bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + unsigned LdrexOp, unsigned StrexOp, + unsigned UxtOp, + MachineBasicBlock::iterator &NextMBBI) { + bool IsThumb = STI->isThumb(); + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + MachineOperand &Dest = MI.getOperand(0); + unsigned StatusReg = MI.getOperand(1).getReg(); + MachineOperand &Addr = MI.getOperand(2); + MachineOperand &Desired = MI.getOperand(3); + MachineOperand &New = MI.getOperand(4); + + LivePhysRegs LiveRegs(&TII->getRegisterInfo()); + LiveRegs.addLiveOuts(&MBB); + for (auto I = std::prev(MBB.end()); I != MBBI; --I) + LiveRegs.stepBackward(*I); + + MachineFunction *MF = MBB.getParent(); + auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + + MF->insert(++MBB.getIterator(), LoadCmpBB); + MF->insert(++LoadCmpBB->getIterator(), StoreBB); + MF->insert(++StoreBB->getIterator(), DoneBB); + + if (UxtOp) { + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, DL, TII->get(UxtOp), Desired.getReg()) + .addReg(Desired.getReg(), RegState::Kill); + if (!IsThumb) + MIB.addImm(0); + AddDefaultPred(MIB); + } + + // .Lloadcmp: + // ldrex rDest, [rAddr] + // cmp rDest, rDesired + // bne .Ldone + MBB.addSuccessor(LoadCmpBB); + LoadCmpBB->addLiveIn(Addr.getReg()); + LoadCmpBB->addLiveIn(Dest.getReg()); + LoadCmpBB->addLiveIn(Desired.getReg()); + addPostLoopLiveIns(LoadCmpBB, LiveRegs); + + MachineInstrBuilder MIB; + MIB = BuildMI(LoadCmpBB, DL, TII->get(LdrexOp), Dest.getReg()); + MIB.addReg(Addr.getReg()); + if (LdrexOp == ARM::t2LDREX) + MIB.addImm(0); // a 32-bit Thumb ldrex (only) allows an offset. + AddDefaultPred(MIB); + + unsigned CMPrr = IsThumb ? ARM::tCMPhir : ARM::CMPrr; + AddDefaultPred(BuildMI(LoadCmpBB, DL, TII->get(CMPrr)) + .addReg(Dest.getReg(), getKillRegState(Dest.isDead())) + .addOperand(Desired)); + unsigned Bcc = IsThumb ? ARM::tBcc : ARM::Bcc; + BuildMI(LoadCmpBB, DL, TII->get(Bcc)) + .addMBB(DoneBB) + .addImm(ARMCC::NE) + .addReg(ARM::CPSR, RegState::Kill); + LoadCmpBB->addSuccessor(DoneBB); + LoadCmpBB->addSuccessor(StoreBB); + + // .Lstore: + // strex rStatus, rNew, [rAddr] + // cmp rStatus, #0 + // bne .Lloadcmp + StoreBB->addLiveIn(Addr.getReg()); + StoreBB->addLiveIn(New.getReg()); + addPostLoopLiveIns(StoreBB, LiveRegs); + + + MIB = BuildMI(StoreBB, DL, TII->get(StrexOp), StatusReg); + MIB.addOperand(New); + MIB.addOperand(Addr); + if (StrexOp == ARM::t2STREX) + MIB.addImm(0); // a 32-bit Thumb strex (only) allows an offset. + AddDefaultPred(MIB); + + unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri; + AddDefaultPred(BuildMI(StoreBB, DL, TII->get(CMPri)) + .addReg(StatusReg, RegState::Kill) + .addImm(0)); + BuildMI(StoreBB, DL, TII->get(Bcc)) + .addMBB(LoadCmpBB) + .addImm(ARMCC::NE) + .addReg(ARM::CPSR, RegState::Kill); + StoreBB->addSuccessor(LoadCmpBB); + StoreBB->addSuccessor(DoneBB); + + DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); + DoneBB->transferSuccessors(&MBB); + addPostLoopLiveIns(DoneBB, LiveRegs); + + NextMBBI = MBB.end(); + MI.eraseFromParent(); + return true; +} + +/// ARM's ldrexd/strexd take a consecutive register pair (represented as a +/// single GPRPair register), Thumb's take two separate registers so we need to +/// extract the subregs from the pair. +static void addExclusiveRegPair(MachineInstrBuilder &MIB, MachineOperand &Reg, + unsigned Flags, bool IsThumb, + const TargetRegisterInfo *TRI) { + if (IsThumb) { + unsigned RegLo = TRI->getSubReg(Reg.getReg(), ARM::gsub_0); + unsigned RegHi = TRI->getSubReg(Reg.getReg(), ARM::gsub_1); + MIB.addReg(RegLo, Flags | getKillRegState(Reg.isDead())); + MIB.addReg(RegHi, Flags | getKillRegState(Reg.isDead())); + } else + MIB.addReg(Reg.getReg(), Flags | getKillRegState(Reg.isDead())); +} + +/// Expand a 64-bit CMP_SWAP to an ldrexd/strexd loop. +bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { + bool IsThumb = STI->isThumb(); + MachineInstr &MI = *MBBI; + DebugLoc DL = MI.getDebugLoc(); + MachineOperand &Dest = MI.getOperand(0); + unsigned StatusReg = MI.getOperand(1).getReg(); + MachineOperand &Addr = MI.getOperand(2); + MachineOperand &Desired = MI.getOperand(3); + MachineOperand &New = MI.getOperand(4); + + unsigned DestLo = TRI->getSubReg(Dest.getReg(), ARM::gsub_0); + unsigned DestHi = TRI->getSubReg(Dest.getReg(), ARM::gsub_1); + unsigned DesiredLo = TRI->getSubReg(Desired.getReg(), ARM::gsub_0); + unsigned DesiredHi = TRI->getSubReg(Desired.getReg(), ARM::gsub_1); + + LivePhysRegs LiveRegs(&TII->getRegisterInfo()); + LiveRegs.addLiveOuts(&MBB); + for (auto I = std::prev(MBB.end()); I != MBBI; --I) + LiveRegs.stepBackward(*I); + + MachineFunction *MF = MBB.getParent(); + auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); + + MF->insert(++MBB.getIterator(), LoadCmpBB); + MF->insert(++LoadCmpBB->getIterator(), StoreBB); + MF->insert(++StoreBB->getIterator(), DoneBB); + + // .Lloadcmp: + // ldrexd rDestLo, rDestHi, [rAddr] + // cmp rDestLo, rDesiredLo + // sbcs rStatus, rDestHi, rDesiredHi + // bne .Ldone + MBB.addSuccessor(LoadCmpBB); + LoadCmpBB->addLiveIn(Addr.getReg()); + LoadCmpBB->addLiveIn(Dest.getReg()); + LoadCmpBB->addLiveIn(Desired.getReg()); + addPostLoopLiveIns(LoadCmpBB, LiveRegs); + + unsigned LDREXD = IsThumb ? ARM::t2LDREXD : ARM::LDREXD; + MachineInstrBuilder MIB; + MIB = BuildMI(LoadCmpBB, DL, TII->get(LDREXD)); + addExclusiveRegPair(MIB, Dest, RegState::Define, IsThumb, TRI); + MIB.addReg(Addr.getReg()); + AddDefaultPred(MIB); + + unsigned CMPrr = IsThumb ? ARM::tCMPhir : ARM::CMPrr; + AddDefaultPred(BuildMI(LoadCmpBB, DL, TII->get(CMPrr)) + .addReg(DestLo, getKillRegState(Dest.isDead())) + .addReg(DesiredLo, getKillRegState(Desired.isDead()))); + + unsigned SBCrr = IsThumb ? ARM::t2SBCrr : ARM::SBCrr; + MIB = BuildMI(LoadCmpBB, DL, TII->get(SBCrr)) + .addReg(StatusReg, RegState::Define | RegState::Dead) + .addReg(DestHi, getKillRegState(Dest.isDead())) + .addReg(DesiredHi, getKillRegState(Desired.isDead())); + AddDefaultPred(MIB); + MIB.addReg(ARM::CPSR, RegState::Kill); + + unsigned Bcc = IsThumb ? ARM::tBcc : ARM::Bcc; + BuildMI(LoadCmpBB, DL, TII->get(Bcc)) + .addMBB(DoneBB) + .addImm(ARMCC::NE) + .addReg(ARM::CPSR, RegState::Kill); + LoadCmpBB->addSuccessor(DoneBB); + LoadCmpBB->addSuccessor(StoreBB); + + // .Lstore: + // strexd rStatus, rNewLo, rNewHi, [rAddr] + // cmp rStatus, #0 + // bne .Lloadcmp + StoreBB->addLiveIn(Addr.getReg()); + StoreBB->addLiveIn(New.getReg()); + addPostLoopLiveIns(StoreBB, LiveRegs); + + unsigned STREXD = IsThumb ? ARM::t2STREXD : ARM::STREXD; + MIB = BuildMI(StoreBB, DL, TII->get(STREXD), StatusReg); + addExclusiveRegPair(MIB, New, 0, IsThumb, TRI); + MIB.addOperand(Addr); + AddDefaultPred(MIB); + + unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri; + AddDefaultPred(BuildMI(StoreBB, DL, TII->get(CMPri)) + .addReg(StatusReg, RegState::Kill) + .addImm(0)); + BuildMI(StoreBB, DL, TII->get(Bcc)) + .addMBB(LoadCmpBB) + .addImm(ARMCC::NE) + .addReg(ARM::CPSR, RegState::Kill); + StoreBB->addSuccessor(LoadCmpBB); + StoreBB->addSuccessor(DoneBB); + + DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); + DoneBB->transferSuccessors(&MBB); + addPostLoopLiveIns(DoneBB, LiveRegs); + + NextMBBI = MBB.end(); + MI.eraseFromParent(); + return true; +} + + bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, - MachineBasicBlock::iterator MBBI) { + MachineBasicBlock::iterator MBBI, + MachineBasicBlock::iterator &NextMBBI) { MachineInstr &MI = *MBBI; unsigned Opcode = MI.getOpcode(); switch (Opcode) { @@ -1380,6 +1622,30 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false); return true; case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true; case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true); return true; + + case ARM::CMP_SWAP_8: + if (STI->isThumb()) + return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREXB, ARM::t2STREXB, + ARM::tUXTB, NextMBBI); + else + return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREXB, ARM::STREXB, + ARM::UXTB, NextMBBI); + case ARM::CMP_SWAP_16: + if (STI->isThumb()) + return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREXH, ARM::t2STREXH, + ARM::tUXTH, NextMBBI); + else + return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREXH, ARM::STREXH, + ARM::UXTH, NextMBBI); + case ARM::CMP_SWAP_32: + if (STI->isThumb()) + return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREX, ARM::t2STREX, 0, + NextMBBI); + else + return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREX, ARM::STREX, 0, NextMBBI); + + case ARM::CMP_SWAP_64: + return ExpandCMP_SWAP_64(MBB, MBBI, NextMBBI); } } @@ -1389,7 +1655,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); while (MBBI != E) { MachineBasicBlock::iterator NMBBI = std::next(MBBI); - Modified |= ExpandMI(MBB, MBBI); + Modified |= ExpandMI(MBB, MBBI, NMBBI); MBBI = NMBBI; } diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index 146fa6b32bd..ac1b7d474cb 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -253,6 +253,8 @@ private: SDNode *SelectSMLAWSMULW(SDNode *N); + SDNode *SelectCMP_SWAP(SDNode *N); + /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. bool SelectInlineAsmMemoryOperand(const SDValue &Op, unsigned ConstraintID, @@ -2597,6 +2599,34 @@ SDNode *ARMDAGToDAGISel::SelectSMLAWSMULW(SDNode *N) { return nullptr; } +/// We've got special pseudo-instructions for these +SDNode *ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) { + unsigned Opcode; + EVT MemTy = cast(N)->getMemoryVT(); + if (MemTy == MVT::i8) + Opcode = ARM::CMP_SWAP_8; + else if (MemTy == MVT::i16) + Opcode = ARM::CMP_SWAP_16; + else if (MemTy == MVT::i32) + Opcode = ARM::CMP_SWAP_32; + else + llvm_unreachable("Unknown AtomicCmpSwap type"); + + SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3), + N->getOperand(0)}; + SDNode *CmpSwap = CurDAG->getMachineNode( + Opcode, SDLoc(N), + CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops); + + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = cast(N)->getMemOperand(); + cast(CmpSwap)->setMemRefs(MemOp, MemOp + 1); + + ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0)); + ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2)); + return nullptr; +} + SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) { // The only time a CONCAT_VECTORS operation can have legal types is when // two 64-bit vectors are concatenated to a 128-bit vector. @@ -3493,6 +3523,9 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case ISD::CONCAT_VECTORS: return SelectConcatVector(N); + + case ISD::ATOMIC_CMP_SWAP: + return SelectCMP_SWAP(N); } return SelectCode(N); diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 95354740185..9d210375c12 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -850,10 +850,12 @@ ARMTargetLowering::ARMTargetLowering(const TargetMachine &TM, // ATOMIC_FENCE needs custom lowering; the others should have been expanded // to ldrex/strex loops already. setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + if (!Subtarget->isThumb() || !Subtarget->isMClass()) + setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom); // On v8, we have particularly efficient implementations of atomic fences // if they can be combined with nearby atomic loads and stores. - if (!Subtarget->hasV8Ops()) { + if (!Subtarget->hasV8Ops() || getTargetMachine().getOptLevel() == 0) { // Automatically insert fences (dmb ish) around ATOMIC_SWAP etc. InsertFencesForAtomic = true; } @@ -6969,6 +6971,44 @@ static void ReplaceREADCYCLECOUNTER(SDNode *N, Results.push_back(Cycles32.getValue(1)); } +static SDValue createGPRPairNode(SelectionDAG &DAG, SDValue V0, SDValue V1) { + SDLoc dl(V0.getNode()); + SDValue RegClass = + DAG.getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32); + SDValue SubReg0 = DAG.getTargetConstant(ARM::gsub_0, dl, MVT::i32); + SDValue SubReg1 = DAG.getTargetConstant(ARM::gsub_1, dl, MVT::i32); + const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 }; + return SDValue( + DAG.getMachineNode(TargetOpcode::REG_SEQUENCE, dl, MVT::Untyped, Ops), 0); +} + +static void ReplaceCMP_SWAP_64Results(SDNode *N, + SmallVectorImpl & Results, + SelectionDAG &DAG) { + assert(N->getValueType(0) == MVT::i64 && + "AtomicCmpSwap on types less than 64 should be legal"); + SDValue Ops[] = {N->getOperand(1), + createGPRPairNode(DAG, N->getOperand(2)->getOperand(0), + N->getOperand(2)->getOperand(1)), + createGPRPairNode(DAG, N->getOperand(3)->getOperand(0), + N->getOperand(3)->getOperand(1)), + N->getOperand(0)}; + SDNode *CmpSwap = DAG.getMachineNode( + ARM::CMP_SWAP_64, SDLoc(N), + DAG.getVTList(MVT::Untyped, MVT::i32, MVT::Other), Ops); + + MachineFunction &MF = DAG.getMachineFunction(); + MachineSDNode::mmo_iterator MemOp = MF.allocateMemRefsArray(1); + MemOp[0] = cast(N)->getMemOperand(); + cast(CmpSwap)->setMemRefs(MemOp, MemOp + 1); + + Results.push_back(DAG.getTargetExtractSubreg(ARM::gsub_0, SDLoc(N), MVT::i32, + SDValue(CmpSwap, 0))); + Results.push_back(DAG.getTargetExtractSubreg(ARM::gsub_1, SDLoc(N), MVT::i32, + SDValue(CmpSwap, 0))); + Results.push_back(SDValue(CmpSwap, 2)); +} + SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Don't know how to custom lower this!"); @@ -7097,6 +7137,9 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, assert(Subtarget->isTargetWindows() && "can only expand DIV on Windows"); return ExpandDIV_Windows(SDValue(N, 0), DAG, N->getOpcode() == ISD::SDIV, Results); + case ISD::ATOMIC_CMP_SWAP: + ReplaceCMP_SWAP_64Results(N, Results, DAG); + return; } if (Res.getNode()) Results.push_back(Res); @@ -12155,7 +12198,12 @@ ARMTargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const { bool ARMTargetLowering::shouldExpandAtomicCmpXchgInIR( AtomicCmpXchgInst *AI) const { - return true; + // At -O0, fast-regalloc cannot cope with the live vregs necessary to + // implement cmpxchg without spilling. If the address being exchanged is also + // on the stack and close enough to the spill slot, this can lead to a + // situation where the monitor always gets cleared and the atomic operation + // can never succeed. So at -O0 we need a late-expanded pseudo-inst instead. + return getTargetMachine().getOptLevel() != 0; } bool ARMTargetLowering::shouldInsertFencesForAtomic( diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index d116e9d08a3..3f32dd7c363 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -5793,3 +5793,36 @@ let mayLoad = 1, mayStore =1, hasSideEffects = 1 in def SPACE : PseudoInst<(outs GPR:$Rd), (ins i32imm:$size, GPR:$Rn), NoItinerary, [(set GPR:$Rd, (int_arm_space imm:$size, GPR:$Rn))]>; + +//===---------------------------------- +// Atomic cmpxchg for -O0 +//===---------------------------------- + +// The fast register allocator used during -O0 inserts spills to cover any VRegs +// live across basic block boundaries. When this happens between an LDXR and an +// STXR it can clear the exclusive monitor, causing all cmpxchg attempts to +// fail. + +// Unfortunately, this means we have to have an alternative (expanded +// post-regalloc) path for -O0 compilations. Fortunately this path can be +// significantly more naive than the standard expansion: we conservatively +// assume seq_cst, strong cmpxchg and omit clrex on failure. + +let Constraints = "@earlyclobber $Rd,@earlyclobber $status", + mayLoad = 1, mayStore = 1 in { +def CMP_SWAP_8 : PseudoInst<(outs GPR:$Rd, GPR:$status), + (ins GPR:$addr, GPR:$desired, GPR:$new), + NoItinerary, []>, Sched<[]>; + +def CMP_SWAP_16 : PseudoInst<(outs GPR:$Rd, GPR:$status), + (ins GPR:$addr, GPR:$desired, GPR:$new), + NoItinerary, []>, Sched<[]>; + +def CMP_SWAP_32 : PseudoInst<(outs GPR:$Rd, GPR:$status), + (ins GPR:$addr, GPR:$desired, GPR:$new), + NoItinerary, []>, Sched<[]>; + +def CMP_SWAP_64 : PseudoInst<(outs GPRPair:$Rd, GPR:$status), + (ins GPR:$addr, GPRPair:$desired, GPRPair:$new), + NoItinerary, []>, Sched<[]>; +} diff --git a/test/CodeGen/ARM/cmpxchg-O0.ll b/test/CodeGen/ARM/cmpxchg-O0.ll new file mode 100644 index 00000000000..0bf22b05a76 --- /dev/null +++ b/test/CodeGen/ARM/cmpxchg-O0.ll @@ -0,0 +1,81 @@ +; RUN: llc -verify-machineinstrs -mtriple=armv7-linux-gnu -O0 %s -o - | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=thumbv8-linux-gnu -O0 %s -o - | FileCheck %s +; RUN: llc -verify-machineinstrs -mtriple=thumbv6m-none-eabi -O0 %s -o - | FileCheck %s --check-prefix=CHECK-T1 + +; CHECK-T1-NOT: ldrex +; CHECK-T1-NOT: strex + +define { i8, i1 } @test_cmpxchg_8(i8* %addr, i8 %desired, i8 %new) nounwind { +; CHECK-LABEL: test_cmpxchg_8: +; CHECK: dmb ish +; CHECK: uxtb [[DESIRED:r[0-9]+]], [[DESIRED]] +; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: +; CHECK: ldrexb [[OLD:r[0-9]+]], [r0] +; CHECK: cmp [[OLD]], [[DESIRED]] +; CHECK: bne [[DONE:.LBB[0-9]+_[0-9]+]] +; CHECK: strexb [[STATUS:r[0-9]+]], r2, [r0] +; CHECK: cmp{{(\.w)?}} [[STATUS]], #0 +; CHECK: bne [[RETRY]] +; CHECK: [[DONE]]: +; CHECK: cmp{{(\.w)?}} [[OLD]], [[DESIRED]] +; CHECK: {{moveq.w|movweq}} {{r[0-9]+}}, #1 +; CHECK: dmb ish + %res = cmpxchg i8* %addr, i8 %desired, i8 %new seq_cst monotonic + ret { i8, i1 } %res +} + +define { i16, i1 } @test_cmpxchg_16(i16* %addr, i16 %desired, i16 %new) nounwind { +; CHECK-LABEL: test_cmpxchg_16: +; CHECK: dmb ish +; CHECK: uxth [[DESIRED:r[0-9]+]], [[DESIRED]] +; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: +; CHECK: ldrexh [[OLD:r[0-9]+]], [r0] +; CHECK: cmp [[OLD]], [[DESIRED]] +; CHECK: bne [[DONE:.LBB[0-9]+_[0-9]+]] +; CHECK: strexh [[STATUS:r[0-9]+]], r2, [r0] +; CHECK: cmp{{(\.w)?}} [[STATUS]], #0 +; CHECK: bne [[RETRY]] +; CHECK: [[DONE]]: +; CHECK: cmp{{(\.w)?}} [[OLD]], [[DESIRED]] +; CHECK: {{moveq.w|movweq}} {{r[0-9]+}}, #1 +; CHECK: dmb ish + %res = cmpxchg i16* %addr, i16 %desired, i16 %new seq_cst monotonic + ret { i16, i1 } %res +} + +define { i32, i1 } @test_cmpxchg_32(i32* %addr, i32 %desired, i32 %new) nounwind { +; CHECK-LABEL: test_cmpxchg_32: +; CHECK: dmb ish +; CHECK-NOT: uxt +; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: +; CHECK: ldrex [[OLD:r[0-9]+]], [r0] +; CHECK: cmp [[OLD]], [[DESIRED]] +; CHECK: bne [[DONE:.LBB[0-9]+_[0-9]+]] +; CHECK: strex [[STATUS:r[0-9]+]], r2, [r0] +; CHECK: cmp{{(\.w)?}} [[STATUS]], #0 +; CHECK: bne [[RETRY]] +; CHECK: [[DONE]]: +; CHECK: cmp{{(\.w)?}} [[OLD]], [[DESIRED]] +; CHECK: {{moveq.w|movweq}} {{r[0-9]+}}, #1 +; CHECK: dmb ish + %res = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst monotonic + ret { i32, i1 } %res +} + +define { i64, i1 } @test_cmpxchg_64(i64* %addr, i64 %desired, i64 %new) nounwind { +; CHECK-LABEL: test_cmpxchg_64: +; CHECK: dmb ish +; CHECK-NOT: uxt +; CHECK: [[RETRY:.LBB[0-9]+_[0-9]+]]: +; CHECK: ldrexd [[OLDLO:r[0-9]+]], [[OLDHI:r[0-9]+]], [r0] +; CHECK: cmp [[OLDLO]], r6 +; CHECK: sbcs{{(\.w)?}} [[STATUS:r[0-9]+]], [[OLDHI]], r7 +; CHECK: bne [[DONE:.LBB[0-9]+_[0-9]+]] +; CHECK: strexd [[STATUS]], r4, r5, [r0] +; CHECK: cmp{{(\.w)?}} [[STATUS]], #0 +; CHECK: bne [[RETRY]] +; CHECK: [[DONE]]: +; CHECK: dmb ish + %res = cmpxchg i64* %addr, i64 %desired, i64 %new seq_cst monotonic + ret { i64, i1 } %res +} diff --git a/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll b/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll index 31d970f7ef1..5e84460b9c0 100644 --- a/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll +++ b/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -o - -mtriple=armv7-apple-ios7.0 -atomic-expand %s | FileCheck %s +; RUN: opt -S -o - -mtriple=armv7-apple-ios7.0 -atomic-expand -codegen-opt-level=1 %s | FileCheck %s define i8 @test_atomic_xchg_i8(i8* %ptr, i8 %xchgend) { ; CHECK-LABEL: @test_atomic_xchg_i8 diff --git a/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll b/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll index 7bb6ffed397..8397182e7e8 100644 --- a/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll +++ b/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll @@ -1,4 +1,4 @@ -; RUN: opt -S -o - -mtriple=armv8-linux-gnueabihf -atomic-expand %s | FileCheck %s +; RUN: opt -S -o - -mtriple=armv8-linux-gnueabihf -atomic-expand %s -codegen-opt-level=1 | FileCheck %s define i8 @test_atomic_xchg_i8(i8* %ptr, i8 %xchgend) { ; CHECK-LABEL: @test_atomic_xchg_i8 diff --git a/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll b/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll index 02e4dd1f1d5..375b41a26db 100644 --- a/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll +++ b/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll @@ -1,4 +1,4 @@ -; RUN: opt -atomic-expand -S -mtriple=thumbv7s-apple-ios7.0 %s | FileCheck %s +; RUN: opt -atomic-expand -codegen-opt-level=1 -S -mtriple=thumbv7s-apple-ios7.0 %s | FileCheck %s define i32 @test_cmpxchg_seq_cst(i32* %addr, i32 %desired, i32 %new) { ; CHECK-LABEL: @test_cmpxchg_seq_cst diff --git a/tools/opt/opt.cpp b/tools/opt/opt.cpp index ca3ab8a49ab..96dee1ba598 100644 --- a/tools/opt/opt.cpp +++ b/tools/opt/opt.cpp @@ -136,6 +136,10 @@ static cl::opt OptLevelO3("O3", cl::desc("Optimization level 3. Similar to clang -O3")); +static cl::opt +CodeGenOptLevel("codegen-opt-level", + cl::desc("Override optimization level for codegen hooks")); + static cl::opt TargetTriple("mtriple", cl::desc("Override target triple for module")); @@ -272,6 +276,8 @@ static void AddStandardLinkPasses(legacy::PassManagerBase &PM) { // static CodeGenOpt::Level GetCodeGenOptLevel() { + if (CodeGenOptLevel.getNumOccurrences()) + return static_cast(unsigned(CodeGenOptLevel)); if (OptLevelO1) return CodeGenOpt::Less; if (OptLevelO2)