mirror of
https://github.com/RPCS3/llvm.git
synced 2025-04-02 13:21:43 +00:00
ARM: Use ldrd/strd to spill 64-bit pairs when available.
This allows common sp-offsets to be part of the instruction and is probably faster on modern CPUs too. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@179977 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
7aad829243
commit
4cc1407b84
@ -747,10 +747,10 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB,
|
||||
Mov->addRegisterKilled(SrcReg, TRI);
|
||||
}
|
||||
|
||||
static const
|
||||
MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB,
|
||||
unsigned Reg, unsigned SubIdx, unsigned State,
|
||||
const TargetRegisterInfo *TRI) {
|
||||
const MachineInstrBuilder &
|
||||
ARMBaseInstrInfo::AddDReg(MachineInstrBuilder &MIB, unsigned Reg,
|
||||
unsigned SubIdx, unsigned State,
|
||||
const TargetRegisterInfo *TRI) const {
|
||||
if (!SubIdx)
|
||||
return MIB.addReg(Reg, State);
|
||||
|
||||
@ -795,12 +795,22 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
|
||||
.addReg(SrcReg, getKillRegState(isKill))
|
||||
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
|
||||
} else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
|
||||
MachineInstrBuilder MIB =
|
||||
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STMIA))
|
||||
.addFrameIndex(FI))
|
||||
.addMemOperand(MMO);
|
||||
MIB = AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
|
||||
AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
|
||||
if (Subtarget.hasV5TEOps()) {
|
||||
MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::STRD));
|
||||
AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
|
||||
AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
|
||||
MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO);
|
||||
|
||||
AddDefaultPred(MIB);
|
||||
} else {
|
||||
// Fallback to STM instruction, which has existed since the dawn of
|
||||
// time.
|
||||
MachineInstrBuilder MIB =
|
||||
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STMIA))
|
||||
.addFrameIndex(FI).addMemOperand(MMO));
|
||||
AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
|
||||
AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
|
||||
}
|
||||
} else
|
||||
llvm_unreachable("Unknown reg class!");
|
||||
break;
|
||||
@ -948,7 +958,6 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
|
||||
DebugLoc DL;
|
||||
if (I != MBB.end()) DL = I->getDebugLoc();
|
||||
MachineFunction &MF = *MBB.getParent();
|
||||
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
|
||||
MachineFrameInfo &MFI = *MF.getFrameInfo();
|
||||
unsigned Align = MFI.getObjectAlignment(FI);
|
||||
MachineMemOperand *MMO =
|
||||
@ -975,12 +984,24 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
|
||||
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
|
||||
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
|
||||
} else if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
|
||||
unsigned LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA : ARM::LDMIA;
|
||||
MachineInstrBuilder MIB =
|
||||
AddDefaultPred(BuildMI(MBB, I, DL, get(LdmOpc))
|
||||
.addFrameIndex(FI).addMemOperand(MMO));
|
||||
MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
|
||||
MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
|
||||
MachineInstrBuilder MIB;
|
||||
|
||||
if (Subtarget.hasV5TEOps()) {
|
||||
MIB = BuildMI(MBB, I, DL, get(ARM::LDRD));
|
||||
AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
|
||||
AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
|
||||
MIB.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO);
|
||||
|
||||
AddDefaultPred(MIB);
|
||||
} else {
|
||||
// Fallback to LDM instruction, which has existed since the dawn of
|
||||
// time.
|
||||
MIB = AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDMIA))
|
||||
.addFrameIndex(FI).addMemOperand(MMO));
|
||||
MIB = AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
|
||||
MIB = AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
|
||||
}
|
||||
|
||||
if (TargetRegisterInfo::isPhysicalRegister(DestReg))
|
||||
MIB.addReg(DestReg, RegState::ImplicitDefine);
|
||||
} else
|
||||
|
@ -141,6 +141,10 @@ public:
|
||||
|
||||
MachineInstr *commuteInstruction(MachineInstr*, bool=false) const;
|
||||
|
||||
const MachineInstrBuilder &AddDReg(MachineInstrBuilder &MIB, unsigned Reg,
|
||||
unsigned SubIdx, unsigned State,
|
||||
const TargetRegisterInfo *TRI) const;
|
||||
|
||||
virtual bool produceSameValue(const MachineInstr *MI0,
|
||||
const MachineInstr *MI1,
|
||||
const MachineRegisterInfo *MRI) const;
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include "llvm/CodeGen/MachineFrameInfo.h"
|
||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
|
||||
#include "llvm/CodeGen/MachineMemOperand.h"
|
||||
#include "llvm/CodeGen/MachineRegisterInfo.h"
|
||||
#include "llvm/MC/MCInst.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
|
||||
@ -126,25 +127,41 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
|
||||
unsigned SrcReg, bool isKill, int FI,
|
||||
const TargetRegisterClass *RC,
|
||||
const TargetRegisterInfo *TRI) const {
|
||||
DebugLoc DL;
|
||||
if (I != MBB.end()) DL = I->getDebugLoc();
|
||||
|
||||
MachineFunction &MF = *MBB.getParent();
|
||||
MachineFrameInfo &MFI = *MF.getFrameInfo();
|
||||
MachineMemOperand *MMO =
|
||||
MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
|
||||
MachineMemOperand::MOStore,
|
||||
MFI.getObjectSize(FI),
|
||||
MFI.getObjectAlignment(FI));
|
||||
|
||||
if (RC == &ARM::GPRRegClass || RC == &ARM::tGPRRegClass ||
|
||||
RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass ||
|
||||
RC == &ARM::GPRnopcRegClass) {
|
||||
DebugLoc DL;
|
||||
if (I != MBB.end()) DL = I->getDebugLoc();
|
||||
|
||||
MachineFunction &MF = *MBB.getParent();
|
||||
MachineFrameInfo &MFI = *MF.getFrameInfo();
|
||||
MachineMemOperand *MMO =
|
||||
MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
|
||||
MachineMemOperand::MOStore,
|
||||
MFI.getObjectSize(FI),
|
||||
MFI.getObjectAlignment(FI));
|
||||
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2STRi12))
|
||||
.addReg(SrcReg, getKillRegState(isKill))
|
||||
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
|
||||
return;
|
||||
}
|
||||
|
||||
if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
|
||||
// Thumb2 STRD expects its dest-registers to be in rGPR. Not a problem for
|
||||
// gsub_0, but needs an extra constraint for gsub_1 (which could be sp
|
||||
// otherwise).
|
||||
MachineRegisterInfo *MRI = &MF.getRegInfo();
|
||||
MRI->constrainRegClass(SrcReg, &ARM::GPRPair_with_gsub_1_in_rGPRRegClass);
|
||||
|
||||
MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2STRDi8));
|
||||
AddDReg(MIB, SrcReg, ARM::gsub_0, getKillRegState(isKill), TRI);
|
||||
AddDReg(MIB, SrcReg, ARM::gsub_1, 0, TRI);
|
||||
MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO);
|
||||
AddDefaultPred(MIB);
|
||||
return;
|
||||
}
|
||||
|
||||
ARMBaseInstrInfo::storeRegToStackSlot(MBB, I, SrcReg, isKill, FI, RC, TRI);
|
||||
}
|
||||
|
||||
@ -153,24 +170,42 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
|
||||
unsigned DestReg, int FI,
|
||||
const TargetRegisterClass *RC,
|
||||
const TargetRegisterInfo *TRI) const {
|
||||
MachineFunction &MF = *MBB.getParent();
|
||||
MachineFrameInfo &MFI = *MF.getFrameInfo();
|
||||
MachineMemOperand *MMO =
|
||||
MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
|
||||
MachineMemOperand::MOLoad,
|
||||
MFI.getObjectSize(FI),
|
||||
MFI.getObjectAlignment(FI));
|
||||
DebugLoc DL;
|
||||
if (I != MBB.end()) DL = I->getDebugLoc();
|
||||
|
||||
if (RC == &ARM::GPRRegClass || RC == &ARM::tGPRRegClass ||
|
||||
RC == &ARM::tcGPRRegClass || RC == &ARM::rGPRRegClass ||
|
||||
RC == &ARM::GPRnopcRegClass) {
|
||||
DebugLoc DL;
|
||||
if (I != MBB.end()) DL = I->getDebugLoc();
|
||||
|
||||
MachineFunction &MF = *MBB.getParent();
|
||||
MachineFrameInfo &MFI = *MF.getFrameInfo();
|
||||
MachineMemOperand *MMO =
|
||||
MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI),
|
||||
MachineMemOperand::MOLoad,
|
||||
MFI.getObjectSize(FI),
|
||||
MFI.getObjectAlignment(FI));
|
||||
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2LDRi12), DestReg)
|
||||
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
|
||||
return;
|
||||
}
|
||||
|
||||
if (ARM::GPRPairRegClass.hasSubClassEq(RC)) {
|
||||
// Thumb2 LDRD expects its dest-registers to be in rGPR. Not a problem for
|
||||
// gsub_0, but needs an extra constraint for gsub_1 (which could be sp
|
||||
// otherwise).
|
||||
MachineRegisterInfo *MRI = &MF.getRegInfo();
|
||||
MRI->constrainRegClass(DestReg, &ARM::GPRPair_with_gsub_1_in_rGPRRegClass);
|
||||
|
||||
MachineInstrBuilder MIB = BuildMI(MBB, I, DL, get(ARM::t2LDRDi8));
|
||||
AddDReg(MIB, DestReg, ARM::gsub_0, RegState::DefineNoRead, TRI);
|
||||
AddDReg(MIB, DestReg, ARM::gsub_1, RegState::DefineNoRead, TRI);
|
||||
MIB.addFrameIndex(FI).addImm(0).addMemOperand(MMO);
|
||||
AddDefaultPred(MIB);
|
||||
|
||||
if (TargetRegisterInfo::isPhysicalRegister(DestReg))
|
||||
MIB.addReg(DestReg, RegState::ImplicitDefine);
|
||||
return;
|
||||
}
|
||||
|
||||
ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC, TRI);
|
||||
}
|
||||
|
||||
@ -514,6 +549,15 @@ bool llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
|
||||
Offset = -Offset;
|
||||
isSub = true;
|
||||
}
|
||||
} else if (AddrMode == ARMII::AddrModeT2_i8s4) {
|
||||
Offset += MI.getOperand(FrameRegIdx + 1).getImm() * 4;
|
||||
NumBits = 8;
|
||||
// MCInst operand has already scaled value.
|
||||
Scale = 1;
|
||||
if (Offset < 0) {
|
||||
isSub = true;
|
||||
Offset = -Offset;
|
||||
}
|
||||
} else {
|
||||
llvm_unreachable("Unsupported addressing mode!");
|
||||
}
|
||||
|
@ -1,29 +1,43 @@
|
||||
; RUN: llc -mtriple=armv7-none-linux-gnueabi < %s | FileCheck %s
|
||||
; RUN: llc -mtriple=armv7-none-linux-gnueabi -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-WITH-LDRD
|
||||
; RUN: llc -mtriple=armv4-none-linux-gnueabi -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-WITHOUT-LDRD
|
||||
; RUN: llc -mtriple=thumbv7-none-linux-gnueabi -verify-machineinstrs < %s | FileCheck %s --check-prefix=CHECK-WITH-LDRD
|
||||
; RUN: llc -mtriple=thumbv7-none-linux-gnueabi -debug -o /dev/null < %s 2>&1 | FileCheck %s --check-prefix=INSTRS-ARE-THUMB
|
||||
|
||||
define void @foo(i64* %addr) {
|
||||
%val1 = tail call i64 asm sideeffect "ldrd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
|
||||
%val2 = tail call i64 asm sideeffect "ldrd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
|
||||
%val3 = tail call i64 asm sideeffect "ldrd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
|
||||
%val4 = tail call i64 asm sideeffect "ldrd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
|
||||
%val5 = tail call i64 asm sideeffect "ldrd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
|
||||
%val6 = tail call i64 asm sideeffect "ldrd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
|
||||
%val7 = tail call i64 asm sideeffect "ldrd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
|
||||
%val1 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
|
||||
%val2 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
|
||||
%val3 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
|
||||
%val4 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
|
||||
%val5 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
|
||||
%val6 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
|
||||
%val7 = tail call i64 asm sideeffect "ldrexd $0, ${0:H}, [r0]", "=&r,r"(i64* %addr)
|
||||
|
||||
; Key point is that enough 64-bit paired GPR values are live that
|
||||
; one of them has to be spilled. This used to cause an abort because
|
||||
; an LDMIA was created with both a FrameIndex and an offset, which
|
||||
; is not allowed.
|
||||
|
||||
; CHECK-WITH-LDRD: strd {{r[0-9]+}}, {{r[0-9]+}}, [sp, #8]
|
||||
; CHECK-WITH-LDRD: strd {{r[0-9]+}}, {{r[0-9]+}}, [sp]
|
||||
|
||||
; CHECK-WITH-LDRD: ldrd {{r[0-9]+}}, {{r[0-9]+}}, [sp, #8]
|
||||
; CHECK-WITH-LDRD: ldrd {{r[0-9]+}}, {{r[0-9]+}}, [sp]
|
||||
|
||||
; We also want to ensure the register scavenger is working (i.e. an
|
||||
; offset from sp can be generated), so we need two spills.
|
||||
; CHECK: add [[ADDRREG:[a-z0-9]+]], sp, #{{[0-9]+}}
|
||||
; CHECK: stm [[ADDRREG]], {r{{[0-9]+}}, r{{[0-9]+}}}
|
||||
; CHECK: stm sp, {r{{[0-9]+}}, r{{[0-9]+}}}
|
||||
; CHECK-WITHOUT-LDRD: add [[ADDRREG:[a-z0-9]+]], sp, #{{[0-9]+}}
|
||||
; CHECK-WITHOUT-LDRD: stm [[ADDRREG]], {r{{[0-9]+}}, r{{[0-9]+}}}
|
||||
; CHECK-WITHOUT-LDRD: stm sp, {r{{[0-9]+}}, r{{[0-9]+}}}
|
||||
|
||||
; In principle LLVM may have to recalculate the offset. At the moment
|
||||
; it reuses the original though.
|
||||
; CHECK: ldm [[ADDRREG]], {r{{[0-9]+}}, r{{[0-9]+}}}
|
||||
; CHECK: ldm sp, {r{{[0-9]+}}, r{{[0-9]+}}}
|
||||
; CHECK-WITHOUT-LDRD: ldm [[ADDRREG]], {r{{[0-9]+}}, r{{[0-9]+}}}
|
||||
; CHECK-WITHOUT-LDRD: ldm sp, {r{{[0-9]+}}, r{{[0-9]+}}}
|
||||
|
||||
; Make sure we are actually creating the Thumb versions of the spill
|
||||
; instructions.
|
||||
; INSTRS-ARE-THUMB: t2STRDi8
|
||||
; INSTRS-ARE-THUMB: t2LDRDi8
|
||||
|
||||
store volatile i64 %val1, i64* %addr
|
||||
store volatile i64 %val2, i64* %addr
|
||||
|
Loading…
x
Reference in New Issue
Block a user