diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 3d283a5ac95..6beb1abd222 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -5969,9 +5969,6 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); } - unsigned ldrOpc = isThumb2 ? ARM::t2LDREXD : ARM::LDREXD; - unsigned strOpc = isThumb2 ? ARM::t2STREXD : ARM::STREXD; - MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *contBB = 0, *cont2BB = 0; if (IsCmpxchg || IsMinMax) @@ -6009,42 +6006,26 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, // cmp storesuccess, #0 // bne- loopMBB // fallthrough --> exitMBB - // - // Note that the registers are explicitly specified because there is not any - // way to force the register allocator to allocate a register pair. - // - // FIXME: The hardcoded registers are not necessary for Thumb2, but we - // need to properly enforce the restriction that the two output registers - // for ldrexd must be different. BB = loopMBB; + // Load - unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - unsigned GPRPair1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - unsigned GPRPair2; - if (IsMinMax) { - //We need an extra double register for doing min/max. - unsigned undef = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - GPRPair2 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), undef); - BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1) - .addReg(undef) - .addReg(vallo) - .addImm(ARM::gsub_0); - BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), GPRPair2) - .addReg(r1) - .addReg(valhi) - .addImm(ARM::gsub_1); + if (isThumb2) { + AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2LDREXD)) + .addReg(destlo, RegState::Define) + .addReg(desthi, RegState::Define) + .addReg(ptr)); + } else { + unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDREXD)) + .addReg(GPRPair0, RegState::Define).addReg(ptr)); + // Copy r2/r3 into dest. (This copy will normally be coalesced.) + BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo) + .addReg(GPRPair0, 0, ARM::gsub_0); + BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi) + .addReg(GPRPair0, 0, ARM::gsub_1); } - AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc)) - .addReg(GPRPair0, RegState::Define).addReg(ptr)); - // Copy r2/r3 into dest. (This copy will normally be coalesced.) - BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo) - .addReg(GPRPair0, 0, ARM::gsub_0); - BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi) - .addReg(GPRPair0, 0, ARM::gsub_1); - + unsigned StoreLo, StoreHi; if (IsCmpxchg) { // Add early exit for (unsigned i = 0; i < 2; i++) { @@ -6060,19 +6041,8 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, } // Copy to physregs for strexd - unsigned setlo = MI->getOperand(5).getReg(); - unsigned sethi = MI->getOperand(6).getReg(); - unsigned undef = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), undef); - BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1) - .addReg(undef) - .addReg(setlo) - .addImm(ARM::gsub_0); - BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), GPRPair1) - .addReg(r1) - .addReg(sethi) - .addImm(ARM::gsub_1); + StoreLo = MI->getOperand(5).getReg(); + StoreHi = MI->getOperand(6).getReg(); } else if (Op1) { // Perform binary operation unsigned tmpRegLo = MRI.createVirtualRegister(TRC); @@ -6084,32 +6054,13 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, .addReg(desthi).addReg(valhi)) .addReg(IsMinMax ? ARM::CPSR : 0, getDefRegState(IsMinMax)); - unsigned UndefPair = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), UndefPair); - unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1) - .addReg(UndefPair) - .addReg(tmpRegLo) - .addImm(ARM::gsub_0); - BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), GPRPair1) - .addReg(r1) - .addReg(tmpRegHi) - .addImm(ARM::gsub_1); + StoreLo = tmpRegLo; + StoreHi = tmpRegHi; } else { // Copy to physregs for strexd - unsigned UndefPair = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), UndefPair); - BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1) - .addReg(UndefPair) - .addReg(vallo) - .addImm(ARM::gsub_0); - BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), GPRPair1) - .addReg(r1) - .addReg(valhi) - .addImm(ARM::gsub_1); + StoreLo = vallo; + StoreHi = valhi; } - unsigned GPRPairStore = GPRPair1; if (IsMinMax) { // Compare and branch to exit block. BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) @@ -6117,12 +6068,33 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, BB->addSuccessor(exitMBB); BB->addSuccessor(contBB); BB = contBB; - GPRPairStore = GPRPair2; + StoreLo = vallo; + StoreHi = valhi; } // Store - AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess) - .addReg(GPRPairStore).addReg(ptr)); + if (isThumb2) { + AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2STREXD), storesuccess) + .addReg(StoreLo).addReg(StoreHi).addReg(ptr)); + } else { + // Marshal a pair... + unsigned StorePair = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + unsigned UndefPair = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + unsigned r1 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + BuildMI(BB, dl, TII->get(TargetOpcode::IMPLICIT_DEF), UndefPair); + BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), r1) + .addReg(UndefPair) + .addReg(StoreLo) + .addImm(ARM::gsub_0); + BuildMI(BB, dl, TII->get(TargetOpcode::INSERT_SUBREG), StorePair) + .addReg(r1) + .addReg(StoreHi) + .addImm(ARM::gsub_1); + + // ...and store it + AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::STREXD), storesuccess) + .addReg(StorePair).addReg(ptr)); + } // Cmp+jump AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri)) .addReg(storesuccess).addImm(0)); diff --git a/test/CodeGen/ARM/atomic-64bit.ll b/test/CodeGen/ARM/atomic-64bit.ll index 4ac5e5d3233..f2c7305ff33 100644 --- a/test/CodeGen/ARM/atomic-64bit.ll +++ b/test/CodeGen/ARM/atomic-64bit.ll @@ -1,4 +1,5 @@ ; RUN: llc < %s -mtriple=armv7-apple-ios | FileCheck %s +; RUN: llc < %s -mtriple=thumbv7-none-linux-gnueabihf | FileCheck %s --check-prefix=CHECK-THUMB define i64 @test1(i64* %ptr, i64 %val) { ; CHECK: test1: @@ -10,6 +11,17 @@ define i64 @test1(i64* %ptr, i64 %val) { ; CHECK: cmp ; CHECK: bne ; CHECK: dmb ish + +; CHECK-THUMB: test1: +; CHECK-THUMB: dmb ish +; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] +; CHECK-THUMB: adds.w [[REG3:[a-z0-9]+]], [[REG1]] +; CHECK-THUMB: adc.w [[REG4:[a-z0-9]+]], [[REG2]] +; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] +; CHECK-THUMB: cmp +; CHECK-THUMB: bne +; CHECK-THUMB: dmb ish + %r = atomicrmw add i64* %ptr, i64 %val seq_cst ret i64 %r } @@ -24,6 +36,17 @@ define i64 @test2(i64* %ptr, i64 %val) { ; CHECK: cmp ; CHECK: bne ; CHECK: dmb ish + +; CHECK-THUMB: test2: +; CHECK-THUMB: dmb ish +; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] +; CHECK-THUMB: subs.w [[REG3:[a-z0-9]+]], [[REG1]] +; CHECK-THUMB: sbc.w [[REG4:[a-z0-9]+]], [[REG2]] +; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] +; CHECK-THUMB: cmp +; CHECK-THUMB: bne +; CHECK-THUMB: dmb ish + %r = atomicrmw sub i64* %ptr, i64 %val seq_cst ret i64 %r } @@ -38,6 +61,17 @@ define i64 @test3(i64* %ptr, i64 %val) { ; CHECK: cmp ; CHECK: bne ; CHECK: dmb ish + +; CHECK-THUMB: test3: +; CHECK-THUMB: dmb ish +; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] +; CHECK-THUMB: and.w [[REG3:[a-z0-9]+]], [[REG1]] +; CHECK-THUMB: and.w [[REG4:[a-z0-9]+]], [[REG2]] +; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] +; CHECK-THUMB: cmp +; CHECK-THUMB: bne +; CHECK-THUMB: dmb ish + %r = atomicrmw and i64* %ptr, i64 %val seq_cst ret i64 %r } @@ -52,6 +86,17 @@ define i64 @test4(i64* %ptr, i64 %val) { ; CHECK: cmp ; CHECK: bne ; CHECK: dmb ish + +; CHECK-THUMB: test4: +; CHECK-THUMB: dmb ish +; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] +; CHECK-THUMB: orr.w [[REG3:[a-z0-9]+]], [[REG1]] +; CHECK-THUMB: orr.w [[REG4:[a-z0-9]+]], [[REG2]] +; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] +; CHECK-THUMB: cmp +; CHECK-THUMB: bne +; CHECK-THUMB: dmb ish + %r = atomicrmw or i64* %ptr, i64 %val seq_cst ret i64 %r } @@ -66,6 +111,17 @@ define i64 @test5(i64* %ptr, i64 %val) { ; CHECK: cmp ; CHECK: bne ; CHECK: dmb ish + +; CHECK-THUMB: test5: +; CHECK-THUMB: dmb ish +; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] +; CHECK-THUMB: eor.w [[REG3:[a-z0-9]+]], [[REG1]] +; CHECK-THUMB: eor.w [[REG4:[a-z0-9]+]], [[REG2]] +; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] +; CHECK-THUMB: cmp +; CHECK-THUMB: bne +; CHECK-THUMB: dmb ish + %r = atomicrmw xor i64* %ptr, i64 %val seq_cst ret i64 %r } @@ -78,6 +134,15 @@ define i64 @test6(i64* %ptr, i64 %val) { ; CHECK: cmp ; CHECK: bne ; CHECK: dmb ish + +; CHECK-THUMB: test6: +; CHECK-THUMB: dmb ish +; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] +; CHECK-THUMB: strexd {{[a-z0-9]+}}, {{[a-z0-9]+}}, {{[a-z0-9]+}} +; CHECK-THUMB: cmp +; CHECK-THUMB: bne +; CHECK-THUMB: dmb ish + %r = atomicrmw xchg i64* %ptr, i64 %val seq_cst ret i64 %r } @@ -93,6 +158,19 @@ define i64 @test7(i64* %ptr, i64 %val1, i64 %val2) { ; CHECK: cmp ; CHECK: bne ; CHECK: dmb ish + +; CHECK-THUMB: test7: +; CHECK-THUMB: dmb ish +; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] +; CHECK-THUMB: cmp [[REG1]] +; CHECK-THUMB: it eq +; CHECK-THUMB: cmpeq [[REG2]] +; CHECK-THUMB: bne +; CHECK-THUMB: strexd {{[a-z0-9]+}}, {{[a-z0-9]+}}, {{[a-z0-9]+}} +; CHECK-THUMB: cmp +; CHECK-THUMB: bne +; CHECK-THUMB: dmb ish + %r = cmpxchg i64* %ptr, i64 %val1, i64 %val2 seq_cst ret i64 %r } @@ -109,6 +187,18 @@ define i64 @test8(i64* %ptr) { ; CHECK: cmp ; CHECK: bne ; CHECK: dmb ish + +; CHECK-THUMB: test8: +; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] +; CHECK-THUMB: cmp [[REG1]] +; CHECK-THUMB: it eq +; CHECK-THUMB: cmpeq [[REG2]] +; CHECK-THUMB: bne +; CHECK-THUMB: strexd {{[a-z0-9]+}}, {{[a-z0-9]+}}, {{[a-z0-9]+}} +; CHECK-THUMB: cmp +; CHECK-THUMB: bne +; CHECK-THUMB: dmb ish + %r = load atomic i64* %ptr seq_cst, align 8 ret i64 %r } @@ -123,6 +213,15 @@ define void @test9(i64* %ptr, i64 %val) { ; CHECK: cmp ; CHECK: bne ; CHECK: dmb ish + +; CHECK-THUMB: test9: +; CHECK-THUMB: dmb ish +; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] +; CHECK-THUMB: strexd {{[a-z0-9]+}}, {{[a-z0-9]+}}, {{[a-z0-9]+}} +; CHECK-THUMB: cmp +; CHECK-THUMB: bne +; CHECK-THUMB: dmb ish + store atomic i64 %val, i64* %ptr seq_cst, align 8 ret void } @@ -138,6 +237,18 @@ define i64 @test10(i64* %ptr, i64 %val) { ; CHECK: cmp ; CHECK: bne ; CHECK: dmb ish + +; CHECK-THUMB: test10: +; CHECK-THUMB: dmb ish +; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] +; CHECK-THUMB: subs.w {{[a-z0-9]+}}, [[REG1]], [[REG3:[a-z0-9]+]] +; CHECK-THUMB: sbcs.w {{[a-z0-9]+}}, [[REG2]], [[REG4:[a-z0-9]+]] +; CHECK-THUMB: blt +; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] +; CHECK-THUMB: cmp +; CHECK-THUMB: bne +; CHECK-THUMB: dmb ish + %r = atomicrmw min i64* %ptr, i64 %val seq_cst ret i64 %r } @@ -153,6 +264,19 @@ define i64 @test11(i64* %ptr, i64 %val) { ; CHECK: cmp ; CHECK: bne ; CHECK: dmb ish + + +; CHECK-THUMB: test11: +; CHECK-THUMB: dmb ish +; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] +; CHECK-THUMB: subs.w {{[a-z0-9]+}}, [[REG1]], [[REG3:[a-z0-9]+]] +; CHECK-THUMB: sbcs.w {{[a-z0-9]+}}, [[REG2]], [[REG4:[a-z0-9]+]] +; CHECK-THUMB: blo +; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] +; CHECK-THUMB: cmp +; CHECK-THUMB: bne +; CHECK-THUMB: dmb ish + %r = atomicrmw umin i64* %ptr, i64 %val seq_cst ret i64 %r } @@ -168,6 +292,18 @@ define i64 @test12(i64* %ptr, i64 %val) { ; CHECK: cmp ; CHECK: bne ; CHECK: dmb ish + +; CHECK-THUMB: test12: +; CHECK-THUMB: dmb ish +; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] +; CHECK-THUMB: subs.w {{[a-z0-9]+}}, [[REG1]], [[REG3:[a-z0-9]+]] +; CHECK-THUMB: sbcs.w {{[a-z0-9]+}}, [[REG2]], [[REG4:[a-z0-9]+]] +; CHECK-THUMB: bge +; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] +; CHECK-THUMB: cmp +; CHECK-THUMB: bne +; CHECK-THUMB: dmb ish + %r = atomicrmw max i64* %ptr, i64 %val seq_cst ret i64 %r } @@ -183,6 +319,17 @@ define i64 @test13(i64* %ptr, i64 %val) { ; CHECK: cmp ; CHECK: bne ; CHECK: dmb ish + +; CHECK-THUMB: test13: +; CHECK-THUMB: dmb ish +; CHECK-THUMB: ldrexd [[REG1:[a-z0-9]+]], [[REG2:[a-z0-9]+]] +; CHECK-THUMB: subs.w {{[a-z0-9]+}}, [[REG1]], [[REG3:[a-z0-9]+]] +; CHECK-THUMB: sbcs.w {{[a-z0-9]+}}, [[REG2]], [[REG4:[a-z0-9]+]] +; CHECK-THUMB: bhs +; CHECK-THUMB: strexd {{[a-z0-9]+}}, [[REG3]], [[REG4]] +; CHECK-THUMB: cmp +; CHECK-THUMB: bne +; CHECK-THUMB: dmb ish %r = atomicrmw umax i64* %ptr, i64 %val seq_cst ret i64 %r }