ARM: Thumb2 LDRD/STRD supports independent input/output regs

The existing code would unnecessarily break LDRD/STRD apart with
non-adjacent registers, on thumb2 this is not necessary.

Ideally on thumb2 we shouldn't match for ldrd/strd pre-regalloc anymore
as there is not reason to set register hints anymore, changing that is
something for a future patch however.

Differential Revision: http://reviews.llvm.org/D9694

Recommiting after the revert in r238821, the buildbot still failed with
the patch removed so there seems to be another reason for the breakage.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@238935 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Matthias Braun 2015-06-03 16:30:24 +00:00
parent e017a69163
commit e942914d29
4 changed files with 48 additions and 30 deletions

View File

@ -1470,8 +1470,7 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI) { MachineBasicBlock::iterator &MBBI) {
MachineInstr *MI = &*MBBI; MachineInstr *MI = &*MBBI;
unsigned Opcode = MI->getOpcode(); unsigned Opcode = MI->getOpcode();
if (Opcode == ARM::LDRD || Opcode == ARM::STRD || if (Opcode == ARM::LDRD || Opcode == ARM::STRD) {
Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
const MachineOperand &BaseOp = MI->getOperand(2); const MachineOperand &BaseOp = MI->getOperand(2);
unsigned BaseReg = BaseOp.getReg(); unsigned BaseReg = BaseOp.getReg();
unsigned EvenReg = MI->getOperand(0).getReg(); unsigned EvenReg = MI->getOperand(0).getReg();
@ -1949,10 +1948,11 @@ static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
bool bool
ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1, ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
DebugLoc &dl, DebugLoc &dl, unsigned &NewOpc,
unsigned &NewOpc, unsigned &EvenReg, unsigned &FirstReg,
unsigned &OddReg, unsigned &BaseReg, unsigned &SecondReg,
int &Offset, unsigned &PredReg, unsigned &BaseReg, int &Offset,
unsigned &PredReg,
ARMCC::CondCodes &Pred, ARMCC::CondCodes &Pred,
bool &isT2) { bool &isT2) {
// Make sure we're allowed to generate LDRD/STRD. // Make sure we're allowed to generate LDRD/STRD.
@ -2011,9 +2011,9 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
return false; return false;
Offset = ARM_AM::getAM3Opc(AddSub, OffImm); Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
} }
EvenReg = Op0->getOperand(0).getReg(); FirstReg = Op0->getOperand(0).getReg();
OddReg = Op1->getOperand(0).getReg(); SecondReg = Op1->getOperand(0).getReg();
if (EvenReg == OddReg) if (FirstReg == SecondReg)
return false; return false;
BaseReg = Op0->getOperand(1).getReg(); BaseReg = Op0->getOperand(1).getReg();
Pred = getInstrPredicate(Op0, PredReg); Pred = getInstrPredicate(Op0, PredReg);
@ -2109,7 +2109,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
// to try to allocate a pair of registers that can form register pairs. // to try to allocate a pair of registers that can form register pairs.
MachineInstr *Op0 = Ops.back(); MachineInstr *Op0 = Ops.back();
MachineInstr *Op1 = Ops[Ops.size()-2]; MachineInstr *Op1 = Ops[Ops.size()-2];
unsigned EvenReg = 0, OddReg = 0; unsigned FirstReg = 0, SecondReg = 0;
unsigned BaseReg = 0, PredReg = 0; unsigned BaseReg = 0, PredReg = 0;
ARMCC::CondCodes Pred = ARMCC::AL; ARMCC::CondCodes Pred = ARMCC::AL;
bool isT2 = false; bool isT2 = false;
@ -2117,21 +2117,21 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
int Offset = 0; int Offset = 0;
DebugLoc dl; DebugLoc dl;
if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc, if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
EvenReg, OddReg, BaseReg, FirstReg, SecondReg, BaseReg,
Offset, PredReg, Pred, isT2)) { Offset, PredReg, Pred, isT2)) {
Ops.pop_back(); Ops.pop_back();
Ops.pop_back(); Ops.pop_back();
const MCInstrDesc &MCID = TII->get(NewOpc); const MCInstrDesc &MCID = TII->get(NewOpc);
const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF); const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
MRI->constrainRegClass(EvenReg, TRC); MRI->constrainRegClass(FirstReg, TRC);
MRI->constrainRegClass(OddReg, TRC); MRI->constrainRegClass(SecondReg, TRC);
// Form the pair instruction. // Form the pair instruction.
if (isLd) { if (isLd) {
MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
.addReg(EvenReg, RegState::Define) .addReg(FirstReg, RegState::Define)
.addReg(OddReg, RegState::Define) .addReg(SecondReg, RegState::Define)
.addReg(BaseReg); .addReg(BaseReg);
// FIXME: We're converting from LDRi12 to an insn that still // FIXME: We're converting from LDRi12 to an insn that still
// uses addrmode2, so we need an explicit offset reg. It should // uses addrmode2, so we need an explicit offset reg. It should
@ -2144,8 +2144,8 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
++NumLDRDFormed; ++NumLDRDFormed;
} else { } else {
MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID) MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
.addReg(EvenReg) .addReg(FirstReg)
.addReg(OddReg) .addReg(SecondReg)
.addReg(BaseReg); .addReg(BaseReg);
// FIXME: We're converting from LDRi12 to an insn that still // FIXME: We're converting from LDRi12 to an insn that still
// uses addrmode2, so we need an explicit offset reg. It should // uses addrmode2, so we need an explicit offset reg. It should
@ -2160,9 +2160,11 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
MBB->erase(Op0); MBB->erase(Op0);
MBB->erase(Op1); MBB->erase(Op1);
// Add register allocation hints to form register pairs. if (!isT2) {
MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg); // Add register allocation hints to form register pairs.
MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg); MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg);
MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg);
}
} else { } else {
for (unsigned i = 0; i != NumMove; ++i) { for (unsigned i = 0; i != NumMove; ++i) {
MachineInstr *Op = Ops.back(); MachineInstr *Op = Ops.back();

View File

@ -664,7 +664,7 @@ define void @test_atomic_load_min_i64(i64 %offset) nounwind {
; CHECK: movt r[[ADDR]], :upper16:var64 ; CHECK: movt r[[ADDR]], :upper16:var64
; CHECK: .LBB{{[0-9]+}}_1: ; CHECK: .LBB{{[0-9]+}}_1:
; CHECK: ldaexd [[OLD1:r[0-9]+]], [[OLD2:r[0-9]+]], [r[[ADDR]]] ; CHECK: ldaexd [[OLD1:r[0-9]+|lr]], [[OLD2:r[0-9]+|lr]], [r[[ADDR]]]
; r0, r1 below is a reasonable guess but could change: it certainly comes into the ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
; function there. ; function there.
; CHECK-ARM: mov [[LOCARRY:r[0-9]+|lr]], #0 ; CHECK-ARM: mov [[LOCARRY:r[0-9]+|lr]], #0
@ -782,7 +782,7 @@ define void @test_atomic_load_max_i64(i64 %offset) nounwind {
; CHECK: movt r[[ADDR]], :upper16:var64 ; CHECK: movt r[[ADDR]], :upper16:var64
; CHECK: .LBB{{[0-9]+}}_1: ; CHECK: .LBB{{[0-9]+}}_1:
; CHECK: ldrexd [[OLD1:r[0-9]+]], [[OLD2:r[0-9]+]], [r[[ADDR]]] ; CHECK: ldrexd [[OLD1:r[0-9]+]], [[OLD2:r[0-9]+|lr]], [r[[ADDR]]]
; r0, r1 below is a reasonable guess but could change: it certainly comes into the ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
; function there. ; function there.
; CHECK-ARM: mov [[LOCARRY:r[0-9]+|lr]], #0 ; CHECK-ARM: mov [[LOCARRY:r[0-9]+|lr]], #0
@ -900,7 +900,7 @@ define void @test_atomic_load_umin_i64(i64 %offset) nounwind {
; CHECK: movt r[[ADDR]], :upper16:var64 ; CHECK: movt r[[ADDR]], :upper16:var64
; CHECK: .LBB{{[0-9]+}}_1: ; CHECK: .LBB{{[0-9]+}}_1:
; CHECK: ldaexd [[OLD1:r[0-9]+]], [[OLD2:r[0-9]+]], [r[[ADDR]]] ; CHECK: ldaexd [[OLD1:r[0-9]+|lr]], [[OLD2:r[0-9]+|lr]], [r[[ADDR]]]
; r0, r1 below is a reasonable guess but could change: it certainly comes into the ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
; function there. ; function there.
; CHECK-ARM: mov [[LOCARRY:r[0-9]+|lr]], #0 ; CHECK-ARM: mov [[LOCARRY:r[0-9]+|lr]], #0
@ -1018,7 +1018,7 @@ define void @test_atomic_load_umax_i64(i64 %offset) nounwind {
; CHECK: movt r[[ADDR]], :upper16:var64 ; CHECK: movt r[[ADDR]], :upper16:var64
; CHECK: .LBB{{[0-9]+}}_1: ; CHECK: .LBB{{[0-9]+}}_1:
; CHECK: ldaexd [[OLD1:r[0-9]+]], [[OLD2:r[0-9]+]], [r[[ADDR]]] ; CHECK: ldaexd [[OLD1:r[0-9]+|lr]], [[OLD2:r[0-9]+|lr]], [r[[ADDR]]]
; r0, r1 below is a reasonable guess but could change: it certainly comes into the ; r0, r1 below is a reasonable guess but could change: it certainly comes into the
; function there. ; function there.
; CHECK-ARM: mov [[LOCARRY:r[0-9]+|lr]], #0 ; CHECK-ARM: mov [[LOCARRY:r[0-9]+|lr]], #0
@ -1146,10 +1146,12 @@ define void @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
; function there. ; function there.
; CHECK-LE-DAG: eor{{(\.w)?}} [[MISMATCH_LO:r[0-9]+|lr]], [[OLD1]], r0 ; CHECK-LE-DAG: eor{{(\.w)?}} [[MISMATCH_LO:r[0-9]+|lr]], [[OLD1]], r0
; CHECK-LE-DAG: eor{{(\.w)?}} [[MISMATCH_HI:r[0-9]+|lr]], [[OLD2]], r1 ; CHECK-LE-DAG: eor{{(\.w)?}} [[MISMATCH_HI:r[0-9]+|lr]], [[OLD2]], r1
; CHECK-LE: orrs{{(\.w)?}} {{r[0-9]+}}, [[MISMATCH_LO]], [[MISMATCH_HI]] ; CHECK-ARM-LE: orrs{{(\.w)?}} {{r[0-9]+}}, [[MISMATCH_LO]], [[MISMATCH_HI]]
; CHECK-THUMB-LE: orrs{{(\.w)?}} {{(r[0-9]+, )?}}[[MISMATCH_HI]], [[MISMATCH_LO]]
; CHECK-BE-DAG: eor{{(\.w)?}} [[MISMATCH_HI:r[0-9]+|lr]], [[OLD2]], r1 ; CHECK-BE-DAG: eor{{(\.w)?}} [[MISMATCH_HI:r[0-9]+|lr]], [[OLD2]], r1
; CHECK-BE-DAG: eor{{(\.w)?}} [[MISMATCH_LO:r[0-9]+|lr]], [[OLD1]], r0 ; CHECK-BE-DAG: eor{{(\.w)?}} [[MISMATCH_LO:r[0-9]+|lr]], [[OLD1]], r0
; CHECK-BE: orrs{{(\.w)?}} {{r[0-9]+}}, [[MISMATCH_HI]], [[MISMATCH_LO]] ; CHECK-ARM-BE: orrs{{(\.w)?}} {{r[0-9]+}}, [[MISMATCH_HI]], [[MISMATCH_LO]]
; CHECK-THUMB-BE: orrs{{(\.w)?}} {{(r[0-9]+, )?}}[[MISMATCH_LO]], [[MISMATCH_HI]]
; CHECK-NEXT: bne .LBB{{[0-9]+}}_3 ; CHECK-NEXT: bne .LBB{{[0-9]+}}_3
; CHECK-NEXT: BB#2: ; CHECK-NEXT: BB#2:
; As above, r2, r3 is a reasonable guess. ; As above, r2, r3 is a reasonable guess.

View File

@ -92,6 +92,22 @@ entry:
ret void ret void
} }
declare void @extfunc(i32, i32, i32, i32)
; CHECK-LABEL: Func2:
; A8: ldrd
; A8: blx
; A8: pop
define void @Func2(i32* %p) {
entry:
%addr0 = getelementptr i32, i32* %p, i32 0
%addr1 = getelementptr i32, i32* %p, i32 1
%v0 = load i32, i32* %addr0
%v1 = load i32, i32* %addr1
; try to force %v0/%v1 into non-adjacent registers
call void @extfunc(i32 %v0, i32 0, i32 0, i32 %v1)
ret void
}
declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind

View File

@ -109,7 +109,7 @@ entry:
define double @load_d(double* %a) { define double @load_d(double* %a) {
entry: entry:
; CHECK-LABEL: load_d: ; CHECK-LABEL: load_d:
; NONE: ldm r0, {r0, r1} ; NONE: ldrd r0, r1, [r0]
; HARD: vldr d0, [r0] ; HARD: vldr d0, [r0]
%0 = load double, double* %a, align 8 %0 = load double, double* %a, align 8
ret double %0 ret double %0
@ -127,9 +127,7 @@ entry:
define void @store_d(double* %a, double %b) { define void @store_d(double* %a, double %b) {
entry: entry:
; CHECK-LABEL: store_d: ; CHECK-LABEL: store_d:
; NONE: mov r1, r3 ; NONE: strd r2, r3, [r0]
; NONE: str r2, [r0]
; NONE: str r1, [r0, #4]
; HARD: vstr d0, [r0] ; HARD: vstr d0, [r0]
store double %b, double* %a, align 8 store double %b, double* %a, align 8
ret void ret void