mirror of
https://github.com/RPCS3/llvm.git
synced 2025-01-19 16:35:10 +00:00
ARM: Thumb2 LDRD/STRD supports independent input/output regs
The existing code would unnecessarily break LDRD/STRD apart with non-adjacent registers, on thumb2 this is not necessary. Ideally on thumb2 we shouldn't match for ldrd/strd pre-regalloc anymore as there is not reason to set register hints anymore, changing that is something for a future patch however. Differential Revision: http://reviews.llvm.org/D9694 Recommiting after the revert in r238821, the buildbot still failed with the patch removed so there seems to be another reason for the breakage. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@238935 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
e017a69163
commit
e942914d29
@ -1470,8 +1470,7 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator &MBBI) {
|
||||
MachineInstr *MI = &*MBBI;
|
||||
unsigned Opcode = MI->getOpcode();
|
||||
if (Opcode == ARM::LDRD || Opcode == ARM::STRD ||
|
||||
Opcode == ARM::t2LDRDi8 || Opcode == ARM::t2STRDi8) {
|
||||
if (Opcode == ARM::LDRD || Opcode == ARM::STRD) {
|
||||
const MachineOperand &BaseOp = MI->getOperand(2);
|
||||
unsigned BaseReg = BaseOp.getReg();
|
||||
unsigned EvenReg = MI->getOperand(0).getReg();
|
||||
@ -1949,10 +1948,11 @@ static void concatenateMemOperands(MachineInstr *MI, MachineInstr *Op0,
|
||||
|
||||
bool
|
||||
ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
|
||||
DebugLoc &dl,
|
||||
unsigned &NewOpc, unsigned &EvenReg,
|
||||
unsigned &OddReg, unsigned &BaseReg,
|
||||
int &Offset, unsigned &PredReg,
|
||||
DebugLoc &dl, unsigned &NewOpc,
|
||||
unsigned &FirstReg,
|
||||
unsigned &SecondReg,
|
||||
unsigned &BaseReg, int &Offset,
|
||||
unsigned &PredReg,
|
||||
ARMCC::CondCodes &Pred,
|
||||
bool &isT2) {
|
||||
// Make sure we're allowed to generate LDRD/STRD.
|
||||
@ -2011,9 +2011,9 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
|
||||
return false;
|
||||
Offset = ARM_AM::getAM3Opc(AddSub, OffImm);
|
||||
}
|
||||
EvenReg = Op0->getOperand(0).getReg();
|
||||
OddReg = Op1->getOperand(0).getReg();
|
||||
if (EvenReg == OddReg)
|
||||
FirstReg = Op0->getOperand(0).getReg();
|
||||
SecondReg = Op1->getOperand(0).getReg();
|
||||
if (FirstReg == SecondReg)
|
||||
return false;
|
||||
BaseReg = Op0->getOperand(1).getReg();
|
||||
Pred = getInstrPredicate(Op0, PredReg);
|
||||
@ -2109,7 +2109,7 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
|
||||
// to try to allocate a pair of registers that can form register pairs.
|
||||
MachineInstr *Op0 = Ops.back();
|
||||
MachineInstr *Op1 = Ops[Ops.size()-2];
|
||||
unsigned EvenReg = 0, OddReg = 0;
|
||||
unsigned FirstReg = 0, SecondReg = 0;
|
||||
unsigned BaseReg = 0, PredReg = 0;
|
||||
ARMCC::CondCodes Pred = ARMCC::AL;
|
||||
bool isT2 = false;
|
||||
@ -2117,21 +2117,21 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
|
||||
int Offset = 0;
|
||||
DebugLoc dl;
|
||||
if (NumMove == 2 && CanFormLdStDWord(Op0, Op1, dl, NewOpc,
|
||||
EvenReg, OddReg, BaseReg,
|
||||
FirstReg, SecondReg, BaseReg,
|
||||
Offset, PredReg, Pred, isT2)) {
|
||||
Ops.pop_back();
|
||||
Ops.pop_back();
|
||||
|
||||
const MCInstrDesc &MCID = TII->get(NewOpc);
|
||||
const TargetRegisterClass *TRC = TII->getRegClass(MCID, 0, TRI, *MF);
|
||||
MRI->constrainRegClass(EvenReg, TRC);
|
||||
MRI->constrainRegClass(OddReg, TRC);
|
||||
MRI->constrainRegClass(FirstReg, TRC);
|
||||
MRI->constrainRegClass(SecondReg, TRC);
|
||||
|
||||
// Form the pair instruction.
|
||||
if (isLd) {
|
||||
MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
|
||||
.addReg(EvenReg, RegState::Define)
|
||||
.addReg(OddReg, RegState::Define)
|
||||
.addReg(FirstReg, RegState::Define)
|
||||
.addReg(SecondReg, RegState::Define)
|
||||
.addReg(BaseReg);
|
||||
// FIXME: We're converting from LDRi12 to an insn that still
|
||||
// uses addrmode2, so we need an explicit offset reg. It should
|
||||
@ -2144,8 +2144,8 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
|
||||
++NumLDRDFormed;
|
||||
} else {
|
||||
MachineInstrBuilder MIB = BuildMI(*MBB, InsertPos, dl, MCID)
|
||||
.addReg(EvenReg)
|
||||
.addReg(OddReg)
|
||||
.addReg(FirstReg)
|
||||
.addReg(SecondReg)
|
||||
.addReg(BaseReg);
|
||||
// FIXME: We're converting from LDRi12 to an insn that still
|
||||
// uses addrmode2, so we need an explicit offset reg. It should
|
||||
@ -2160,9 +2160,11 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB,
|
||||
MBB->erase(Op0);
|
||||
MBB->erase(Op1);
|
||||
|
||||
if (!isT2) {
|
||||
// Add register allocation hints to form register pairs.
|
||||
MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg);
|
||||
MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg);
|
||||
MRI->setRegAllocationHint(FirstReg, ARMRI::RegPairEven, SecondReg);
|
||||
MRI->setRegAllocationHint(SecondReg, ARMRI::RegPairOdd, FirstReg);
|
||||
}
|
||||
} else {
|
||||
for (unsigned i = 0; i != NumMove; ++i) {
|
||||
MachineInstr *Op = Ops.back();
|
||||
|
@ -664,7 +664,7 @@ define void @test_atomic_load_min_i64(i64 %offset) nounwind {
|
||||
; CHECK: movt r[[ADDR]], :upper16:var64
|
||||
|
||||
; CHECK: .LBB{{[0-9]+}}_1:
|
||||
; CHECK: ldaexd [[OLD1:r[0-9]+]], [[OLD2:r[0-9]+]], [r[[ADDR]]]
|
||||
; CHECK: ldaexd [[OLD1:r[0-9]+|lr]], [[OLD2:r[0-9]+|lr]], [r[[ADDR]]]
|
||||
; r0, r1 below is a reasonable guess but could change: it certainly comes into the
|
||||
; function there.
|
||||
; CHECK-ARM: mov [[LOCARRY:r[0-9]+|lr]], #0
|
||||
@ -782,7 +782,7 @@ define void @test_atomic_load_max_i64(i64 %offset) nounwind {
|
||||
; CHECK: movt r[[ADDR]], :upper16:var64
|
||||
|
||||
; CHECK: .LBB{{[0-9]+}}_1:
|
||||
; CHECK: ldrexd [[OLD1:r[0-9]+]], [[OLD2:r[0-9]+]], [r[[ADDR]]]
|
||||
; CHECK: ldrexd [[OLD1:r[0-9]+]], [[OLD2:r[0-9]+|lr]], [r[[ADDR]]]
|
||||
; r0, r1 below is a reasonable guess but could change: it certainly comes into the
|
||||
; function there.
|
||||
; CHECK-ARM: mov [[LOCARRY:r[0-9]+|lr]], #0
|
||||
@ -900,7 +900,7 @@ define void @test_atomic_load_umin_i64(i64 %offset) nounwind {
|
||||
; CHECK: movt r[[ADDR]], :upper16:var64
|
||||
|
||||
; CHECK: .LBB{{[0-9]+}}_1:
|
||||
; CHECK: ldaexd [[OLD1:r[0-9]+]], [[OLD2:r[0-9]+]], [r[[ADDR]]]
|
||||
; CHECK: ldaexd [[OLD1:r[0-9]+|lr]], [[OLD2:r[0-9]+|lr]], [r[[ADDR]]]
|
||||
; r0, r1 below is a reasonable guess but could change: it certainly comes into the
|
||||
; function there.
|
||||
; CHECK-ARM: mov [[LOCARRY:r[0-9]+|lr]], #0
|
||||
@ -1018,7 +1018,7 @@ define void @test_atomic_load_umax_i64(i64 %offset) nounwind {
|
||||
; CHECK: movt r[[ADDR]], :upper16:var64
|
||||
|
||||
; CHECK: .LBB{{[0-9]+}}_1:
|
||||
; CHECK: ldaexd [[OLD1:r[0-9]+]], [[OLD2:r[0-9]+]], [r[[ADDR]]]
|
||||
; CHECK: ldaexd [[OLD1:r[0-9]+|lr]], [[OLD2:r[0-9]+|lr]], [r[[ADDR]]]
|
||||
; r0, r1 below is a reasonable guess but could change: it certainly comes into the
|
||||
; function there.
|
||||
; CHECK-ARM: mov [[LOCARRY:r[0-9]+|lr]], #0
|
||||
@ -1146,10 +1146,12 @@ define void @test_atomic_cmpxchg_i64(i64 %wanted, i64 %new) nounwind {
|
||||
; function there.
|
||||
; CHECK-LE-DAG: eor{{(\.w)?}} [[MISMATCH_LO:r[0-9]+|lr]], [[OLD1]], r0
|
||||
; CHECK-LE-DAG: eor{{(\.w)?}} [[MISMATCH_HI:r[0-9]+|lr]], [[OLD2]], r1
|
||||
; CHECK-LE: orrs{{(\.w)?}} {{r[0-9]+}}, [[MISMATCH_LO]], [[MISMATCH_HI]]
|
||||
; CHECK-ARM-LE: orrs{{(\.w)?}} {{r[0-9]+}}, [[MISMATCH_LO]], [[MISMATCH_HI]]
|
||||
; CHECK-THUMB-LE: orrs{{(\.w)?}} {{(r[0-9]+, )?}}[[MISMATCH_HI]], [[MISMATCH_LO]]
|
||||
; CHECK-BE-DAG: eor{{(\.w)?}} [[MISMATCH_HI:r[0-9]+|lr]], [[OLD2]], r1
|
||||
; CHECK-BE-DAG: eor{{(\.w)?}} [[MISMATCH_LO:r[0-9]+|lr]], [[OLD1]], r0
|
||||
; CHECK-BE: orrs{{(\.w)?}} {{r[0-9]+}}, [[MISMATCH_HI]], [[MISMATCH_LO]]
|
||||
; CHECK-ARM-BE: orrs{{(\.w)?}} {{r[0-9]+}}, [[MISMATCH_HI]], [[MISMATCH_LO]]
|
||||
; CHECK-THUMB-BE: orrs{{(\.w)?}} {{(r[0-9]+, )?}}[[MISMATCH_LO]], [[MISMATCH_HI]]
|
||||
; CHECK-NEXT: bne .LBB{{[0-9]+}}_3
|
||||
; CHECK-NEXT: BB#2:
|
||||
; As above, r2, r3 is a reasonable guess.
|
||||
|
@ -92,6 +92,22 @@ entry:
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @extfunc(i32, i32, i32, i32)
|
||||
|
||||
; CHECK-LABEL: Func2:
|
||||
; A8: ldrd
|
||||
; A8: blx
|
||||
; A8: pop
|
||||
define void @Func2(i32* %p) {
|
||||
entry:
|
||||
%addr0 = getelementptr i32, i32* %p, i32 0
|
||||
%addr1 = getelementptr i32, i32* %p, i32 1
|
||||
%v0 = load i32, i32* %addr0
|
||||
%v1 = load i32, i32* %addr1
|
||||
; try to force %v0/%v1 into non-adjacent registers
|
||||
call void @extfunc(i32 %v0, i32 0, i32 0, i32 %v1)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.lifetime.start(i64, i8* nocapture) nounwind
|
||||
declare void @llvm.lifetime.end(i64, i8* nocapture) nounwind
|
||||
|
@ -109,7 +109,7 @@ entry:
|
||||
define double @load_d(double* %a) {
|
||||
entry:
|
||||
; CHECK-LABEL: load_d:
|
||||
; NONE: ldm r0, {r0, r1}
|
||||
; NONE: ldrd r0, r1, [r0]
|
||||
; HARD: vldr d0, [r0]
|
||||
%0 = load double, double* %a, align 8
|
||||
ret double %0
|
||||
@ -127,9 +127,7 @@ entry:
|
||||
define void @store_d(double* %a, double %b) {
|
||||
entry:
|
||||
; CHECK-LABEL: store_d:
|
||||
; NONE: mov r1, r3
|
||||
; NONE: str r2, [r0]
|
||||
; NONE: str r1, [r0, #4]
|
||||
; NONE: strd r2, r3, [r0]
|
||||
; HARD: vstr d0, [r0]
|
||||
store double %b, double* %a, align 8
|
||||
ret void
|
||||
|
Loading…
x
Reference in New Issue
Block a user