From 3219c7fbe5d8d38a3f4a4151727a84f514a7957b Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Mon, 15 Jun 2009 08:28:29 +0000 Subject: [PATCH] Part 1. - Change register allocation hint to a pair of unsigned integers. The hint type is zero (which means prefer the register specified as second part of the pair) or entirely target dependent. - Allow targets to specify alternative register allocation orders based on allocation hint. Part 2. - Use the register allocation hint system to implement more aggressive load / store multiple formation. - Aggressively form LDRD / STRD. These are formed *before* register allocation. It has to be done this way to shorten live interval of base and offset registers. e.g. v1025 = LDR v1024, 0 v1026 = LDR v1024, 0 => v1025,v1026 = LDRD v1024, 0 If this transformation isn't done before allocation, v1024 will overlap v1025 which means it more difficult to allocate a register pair. - Even with the register allocation hint, it may not be possible to get the desired allocation. In that case, the post-allocation load / store multiple pass must fix the ldrd / strd instructions. They can either become ldm / stm instructions or back to a pair of ldr / str instructions. This is work in progress, not yet enabled. llvm-svn: 73381 --- include/llvm/CodeGen/MachineRegisterInfo.h | 26 +- include/llvm/Target/TargetRegisterInfo.h | 33 ++- lib/CodeGen/LiveInterval.cpp | 12 +- lib/CodeGen/MachineRegisterInfo.cpp | 2 +- lib/CodeGen/RegAllocLinearScan.cpp | 25 +- lib/CodeGen/SimpleRegisterCoalescing.cpp | 47 +--- lib/CodeGen/VirtRegMap.cpp | 39 +-- lib/Target/ARM/ARMCodeEmitter.cpp | 4 + lib/Target/ARM/ARMInstrInfo.td | 10 +- lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 203 +++++++++++++- lib/Target/ARM/ARMRegisterInfo.cpp | 305 ++++++++++++++++++++- lib/Target/ARM/ARMRegisterInfo.h | 34 ++- test/CodeGen/ARM/ldrd.ll | 14 + 13 files changed, 613 insertions(+), 141 deletions(-) create mode 100644 test/CodeGen/ARM/ldrd.ll diff --git a/include/llvm/CodeGen/MachineRegisterInfo.h b/include/llvm/CodeGen/MachineRegisterInfo.h index c343d42a92e..80c37b39ca0 100644 --- a/include/llvm/CodeGen/MachineRegisterInfo.h +++ b/include/llvm/CodeGen/MachineRegisterInfo.h @@ -25,16 +25,6 @@ namespace llvm { /// registers, including vreg register classes, use/def chains for registers, /// etc. class MachineRegisterInfo { -public: - /// Register allocation hints. - enum RegAllocHintType { - RA_None, /// No preference - RA_Preference, /// Prefer a particular register - RA_PairEven, /// Even register of a register pair - RA_PairOdd /// Odd register of a register pair - }; - -private: /// VRegInfo - Information we keep for each virtual register. The entries in /// this vector are actually converted to vreg numbers by adding the /// TargetRegisterInfo::FirstVirtualRegister delta to their index. @@ -49,12 +39,13 @@ private: std::vector > RegClass2VRegMap; /// RegAllocHints - This vector records register allocation hints for virtual - /// registers. For each virtual register, it keeps a register and type enum - /// pair making up the allocation hint. For example, if the hint type is - /// RA_Specified, it means the virtual register prefers the specified physical - /// register of the hint or the physical register allocated to the virtual + /// registers. For each virtual register, it keeps a register and hint type + /// pair making up the allocation hint. Hint type is target specific except + /// for the value 0 which means the second value of the pair is the preferred + /// register for allocation. For example, if the hint is <0, 1024>, it means + /// the allocator should prefer the physical register allocated to the virtual /// register of the hint. - std::vector > RegAllocHints; + std::vector > RegAllocHints; /// PhysRegUseDefLists - This is an array of the head of the use/def list for /// physical registers. @@ -191,8 +182,7 @@ public: /// setRegAllocationHint - Specify a register allocation hint for the /// specified virtual register. - void setRegAllocationHint(unsigned Reg, - RegAllocHintType Type, unsigned PrefReg) { + void setRegAllocationHint(unsigned Reg, unsigned Type, unsigned PrefReg) { Reg -= TargetRegisterInfo::FirstVirtualRegister; assert(Reg < VRegInfo.size() && "Invalid vreg!"); RegAllocHints[Reg].first = Type; @@ -201,7 +191,7 @@ public: /// getRegAllocationHint - Return the register allocation hint for the /// specified virtual register. - std::pair + std::pair getRegAllocationHint(unsigned Reg) const { Reg -= TargetRegisterInfo::FirstVirtualRegister; assert(Reg < VRegInfo.size() && "Invalid vreg!"); diff --git a/include/llvm/Target/TargetRegisterInfo.h b/include/llvm/Target/TargetRegisterInfo.h index 949185ac261..9da1134352b 100644 --- a/include/llvm/Target/TargetRegisterInfo.h +++ b/include/llvm/Target/TargetRegisterInfo.h @@ -484,20 +484,6 @@ public: return 0; } - /// getRegisterPairEven - Return the even register of the register pair that - /// contains the specified register. - virtual unsigned getRegisterPairEven(const MachineFunction &MF, - unsigned Reg) const { - return 0; - } - - /// getRegisterPairOdd - Return the odd register of the register pair that - /// contains the specified register. - virtual unsigned getRegisterPairOdd(const MachineFunction &MF, - unsigned Reg) const { - return 0; - } - //===--------------------------------------------------------------------===// // Register Class Information // @@ -533,6 +519,25 @@ public: return NULL; } + /// getAllocationOrder - Returns the register allocation order for a specified + /// register class in the form of a pair of TargetRegisterClass iterators. + virtual std::pair + getAllocationOrder(const TargetRegisterClass *RC, + std::pair Hint, + const MachineFunction &MF) const { + return std::make_pair(RC->allocation_order_begin(MF), + RC->allocation_order_end(MF)); + } + + /// ResolveRegAllocHint - Resolves the specified register allocation hint + /// to a physical register. Returns the physical register if it is successful. + unsigned ResolveRegAllocHint(unsigned Type, unsigned Reg, + const MachineFunction &MF) const { + if (Type == 0 && Reg && isPhysicalRegister(Reg)) + return Reg; + return 0; + } + /// targetHandlesStackFrameRounding - Returns true if the target is /// responsible for rounding up the stack frame (probably at emitPrologue /// time). diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index 97926dd6fd5..4d3ce3a9d43 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -507,12 +507,11 @@ void LiveInterval::join(LiveInterval &Other, const int *LHSValNoAssignments, // Update regalloc hint if currently there isn't one. if (TargetRegisterInfo::isVirtualRegister(reg) && TargetRegisterInfo::isVirtualRegister(Other.reg)) { - std::pair Hint = - MRI->getRegAllocationHint(reg); - if (Hint.first == MachineRegisterInfo::RA_None) { - std::pair OtherHint = + std::pair Hint = MRI->getRegAllocationHint(reg); + if (Hint.first == 0 && Hint.second == 0) { + std::pair OtherHint = MRI->getRegAllocationHint(Other.reg); - if (OtherHint.first != MachineRegisterInfo::RA_None) + if (OtherHint.first || OtherHint.second) MRI->setRegAllocationHint(reg, OtherHint.first, OtherHint.second); } } @@ -772,8 +771,7 @@ void LiveInterval::Copy(const LiveInterval &RHS, BumpPtrAllocator &VNInfoAllocator) { ranges.clear(); valnos.clear(); - std::pair Hint = - MRI->getRegAllocationHint(RHS.reg); + std::pair Hint = MRI->getRegAllocationHint(RHS.reg); MRI->setRegAllocationHint(reg, Hint.first, Hint.second); weight = RHS.weight; diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index 9649c4c44ce..544d83a33f7 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -65,7 +65,7 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){ // Add a reg, but keep track of whether the vector reallocated or not. void *ArrayBase = VRegInfo.empty() ? 0 : &VRegInfo[0]; VRegInfo.push_back(std::make_pair(RegClass, (MachineOperand*)0)); - RegAllocHints.push_back(std::make_pair(RA_None, 0)); + RegAllocHints.push_back(std::make_pair(0, 0)); if (!((&VRegInfo[0] == ArrayBase || VRegInfo.size() == 1))) // The vector reallocated, handle this now. diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp index 3c445e7942d..ec956c2c17c 100644 --- a/lib/CodeGen/RegAllocLinearScan.cpp +++ b/lib/CodeGen/RegAllocLinearScan.cpp @@ -281,7 +281,8 @@ namespace { /// getFreePhysReg - return a free physical register for this virtual /// register interval if we have one, otherwise return 0. unsigned getFreePhysReg(LiveInterval* cur); - unsigned getFreePhysReg(const TargetRegisterClass *RC, + unsigned getFreePhysReg(LiveInterval* cur, + const TargetRegisterClass *RC, unsigned MaxInactiveCount, SmallVector &inactiveCounts, bool SkipDGRegs); @@ -936,8 +937,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) if (DstSubReg) Reg = tri_->getMatchingSuperReg(Reg, DstSubReg, RC); if (Reg && allocatableRegs_[Reg] && RC->contains(Reg)) - mri_->setRegAllocationHint(cur->reg, - MachineRegisterInfo::RA_Preference, Reg); + mri_->setRegAllocationHint(cur->reg, 0, Reg); } } } @@ -1046,8 +1046,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) if (LiveInterval *NextReloadLI = hasNextReloadInterval(cur)) { // "Downgrade" physReg to try to keep physReg from being allocated until // the next reload from the same SS is allocated. - mri_->setRegAllocationHint(NextReloadLI->reg, - MachineRegisterInfo::RA_Preference, physReg); + mri_->setRegAllocationHint(NextReloadLI->reg, 0, physReg); DowngradeRegister(cur, physReg); } return; @@ -1293,7 +1292,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) // It interval has a preference, it must be defined by a copy. Clear the // preference now since the source interval allocation may have been // undone as well. - mri_->setRegAllocationHint(i->reg, MachineRegisterInfo::RA_None, 0); + mri_->setRegAllocationHint(i->reg, 0, 0); else { UpgradeRegister(ii->second); } @@ -1349,15 +1348,17 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) } } -unsigned RALinScan::getFreePhysReg(const TargetRegisterClass *RC, +unsigned RALinScan::getFreePhysReg(LiveInterval* cur, + const TargetRegisterClass *RC, unsigned MaxInactiveCount, SmallVector &inactiveCounts, bool SkipDGRegs) { unsigned FreeReg = 0; unsigned FreeRegInactiveCount = 0; - TargetRegisterClass::iterator I = RC->allocation_order_begin(*mf_); - TargetRegisterClass::iterator E = RC->allocation_order_end(*mf_); + TargetRegisterClass::iterator I, E; + tie(I, E) = tri_->getAllocationOrder(RC, + mri_->getRegAllocationHint(cur->reg), *mf_); assert(I != E && "No allocatable register in this register class!"); // Scan for the first available register. @@ -1380,7 +1381,7 @@ unsigned RALinScan::getFreePhysReg(const TargetRegisterClass *RC, // return this register. if (FreeReg == 0 || FreeRegInactiveCount == MaxInactiveCount) return FreeReg; - + // Continue scanning the registers, looking for the one with the highest // inactive count. Alkis found that this reduced register pressure very // slightly on X86 (in rev 1.94 of this file), though this should probably be @@ -1440,12 +1441,12 @@ unsigned RALinScan::getFreePhysReg(LiveInterval *cur) { } if (!DowngradedRegs.empty()) { - unsigned FreeReg = getFreePhysReg(RC, MaxInactiveCount, inactiveCounts, + unsigned FreeReg = getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts, true); if (FreeReg) return FreeReg; } - return getFreePhysReg(RC, MaxInactiveCount, inactiveCounts, false); + return getFreePhysReg(cur, RC, MaxInactiveCount, inactiveCounts, false); } FunctionPass* llvm::createLinearScanRegisterAllocator() { diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp index 95e5acaab41..7d9170a2fe4 100644 --- a/lib/CodeGen/SimpleRegisterCoalescing.cpp +++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp @@ -1272,26 +1272,8 @@ static unsigned getRegAllocPreference(unsigned Reg, MachineFunction &MF, const TargetRegisterInfo *TRI) { if (TargetRegisterInfo::isPhysicalRegister(Reg)) return 0; - - std::pair Hint = - MRI->getRegAllocationHint(Reg); - switch (Hint.first) { - default: assert(0); - case MachineRegisterInfo::RA_None: - return 0; - case MachineRegisterInfo::RA_Preference: - return Hint.second; - case MachineRegisterInfo::RA_PairEven: - if (TargetRegisterInfo::isPhysicalRegister(Hint.second)) - return TRI->getRegisterPairOdd(MF, Hint.second); - return Hint.second; - case MachineRegisterInfo::RA_PairOdd: - if (TargetRegisterInfo::isPhysicalRegister(Hint.second)) - return TRI->getRegisterPairEven(MF, Hint.second); - return Hint.second; - } - // Shouldn't reach here. - return 0; + std::pair Hint = MRI->getRegAllocationHint(Reg); + return TRI->ResolveRegAllocHint(Hint.first, Hint.second, MF); } /// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg, @@ -1595,8 +1577,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { if (PhysJoinTweak) { if (SrcIsPhys) { if (!isWinToJoinVRWithSrcPhysReg(CopyMI, CopyMBB, DstInt, SrcInt)) { - mri_->setRegAllocationHint(DstInt.reg, - MachineRegisterInfo::RA_Preference, SrcReg); + mri_->setRegAllocationHint(DstInt.reg, 0, SrcReg); ++numAborts; DOUT << "\tMay tie down a physical register, abort!\n"; Again = true; // May be possible to coalesce later. @@ -1604,8 +1585,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { } } else { if (!isWinToJoinVRWithDstPhysReg(CopyMI, CopyMBB, DstInt, SrcInt)) { - mri_->setRegAllocationHint(SrcInt.reg, - MachineRegisterInfo::RA_Preference, DstReg); + mri_->setRegAllocationHint(SrcInt.reg, 0, DstReg); ++numAborts; DOUT << "\tMay tie down a physical register, abort!\n"; Again = true; // May be possible to coalesce later. @@ -1629,8 +1609,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { if (Length > Threshold && (((float)std::distance(mri_->use_begin(JoinVReg), mri_->use_end()) / Length) < Ratio)) { - mri_->setRegAllocationHint(JoinVInt.reg, - MachineRegisterInfo::RA_Preference, JoinPReg); + mri_->setRegAllocationHint(JoinVInt.reg, 0, JoinPReg); ++numAborts; DOUT << "\tMay tie down a physical register, abort!\n"; Again = true; // May be possible to coalesce later. @@ -1815,8 +1794,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) { TargetRegisterInfo::isVirtualRegister(ResDstInt->reg)) { const TargetRegisterClass *RC = mri_->getRegClass(ResDstInt->reg); if (!RC->contains(Preference)) - mri_->setRegAllocationHint(ResDstInt->reg, - MachineRegisterInfo::RA_None, 0); + mri_->setRegAllocationHint(ResDstInt->reg, 0, 0); } DOUT << "\n\t\tJoined. Result = "; ResDstInt->print(DOUT, tri_); @@ -2067,12 +2045,9 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){ // Update regalloc hint if both are virtual registers. if (TargetRegisterInfo::isVirtualRegister(LHS.reg) && TargetRegisterInfo::isVirtualRegister(RHS.reg)) { - std::pair RHSPref = - mri_->getRegAllocationHint(RHS.reg); - std::pair LHSPref = - mri_->getRegAllocationHint(LHS.reg); - if (RHSPref.first != MachineRegisterInfo::RA_None && - LHSPref.first == MachineRegisterInfo::RA_None) + std::pair RHSPref = mri_->getRegAllocationHint(RHS.reg); + std::pair LHSPref = mri_->getRegAllocationHint(LHS.reg); + if (RHSPref != LHSPref) mri_->setRegAllocationHint(LHS.reg, RHSPref.first, RHSPref.second); } @@ -2846,8 +2821,8 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) { } // Slightly prefer live interval that has been assigned a preferred reg. - if (mri_->getRegAllocationHint(LI.reg).first != - MachineRegisterInfo::RA_None) + std::pair Hint = mri_->getRegAllocationHint(LI.reg); + if (Hint.first || Hint.second) LI.weight *= 1.01F; // Divide the weight of the interval by its size. This encourages diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index 87d75dd006d..4d3417fdff5 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -100,36 +100,15 @@ void VirtRegMap::grow() { } unsigned VirtRegMap::getRegAllocPref(unsigned virtReg) { - std::pair Hint = - MRI->getRegAllocationHint(virtReg); - switch (Hint.first) { - default: assert(0); - case MachineRegisterInfo::RA_None: - return 0; - case MachineRegisterInfo::RA_Preference: - if (TargetRegisterInfo::isPhysicalRegister(Hint.second)) - return Hint.second; - if (hasPhys(Hint.second)) - return getPhys(Hint.second); - case MachineRegisterInfo::RA_PairEven: { - unsigned physReg = Hint.second; - if (TargetRegisterInfo::isPhysicalRegister(physReg)) - return TRI->getRegisterPairEven(*MF, physReg); - else if (hasPhys(physReg)) - return TRI->getRegisterPairEven(*MF, getPhys(physReg)); - return 0; - } - case MachineRegisterInfo::RA_PairOdd: { - unsigned physReg = Hint.second; - if (TargetRegisterInfo::isPhysicalRegister(physReg)) - return TRI->getRegisterPairOdd(*MF, physReg); - else if (hasPhys(physReg)) - return TRI->getRegisterPairOdd(*MF, getPhys(physReg)); - return 0; - } - } - // Shouldn't reach here. - return 0; + std::pair Hint = MRI->getRegAllocationHint(virtReg); + unsigned physReg = Hint.second; + if (physReg && + TargetRegisterInfo::isVirtualRegister(physReg) && hasPhys(physReg)) + physReg = getPhys(physReg); + if (Hint.first == 0) + return (physReg && TargetRegisterInfo::isPhysicalRegister(physReg)) + ? physReg : 0; + return TRI->ResolveRegAllocHint(Hint.first, physReg, *MF); } int VirtRegMap::assignVirt2StackSlot(unsigned virtReg) { diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp index 44fac12019b..f6629fe3c87 100644 --- a/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/lib/Target/ARM/ARMCodeEmitter.cpp @@ -900,6 +900,10 @@ void Emitter::emitMiscLoadStoreInstruction(const MachineInstr &MI, // Set first operand Binary |= getMachineOpValue(MI, OpIdx++) << ARMII::RegRdShift; + // Skip LDRD and STRD's second operand. + if (TID.Opcode == ARM::LDRD || TID.Opcode == ARM::STRD) + ++OpIdx; + // Set second operand if (ImplicitRn) // Special handling for implicit use (e.g. PC). diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index cc9f1a5759d..d089f03b578 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -647,9 +647,8 @@ def LDRSB : AI3ldsb<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm, let mayLoad = 1 in { // Load doubleword -def LDRD : AI3ldd<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm, - "ldr", "d $dst, $addr", - []>, Requires<[IsARM, HasV5T]>; +def LDRD : AI3ldd<(outs GPR:$dst1, GPR:$dst2), (ins addrmode3:$addr), LdMiscFrm, + "ldr", "d $dst1, $addr", []>, Requires<[IsARM, HasV5T]>; // Indexed loads def LDR_PRE : AI2ldwpr<(outs GPR:$dst, GPR:$base_wb), @@ -709,9 +708,8 @@ def STRB : AI2stb<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, // Store doubleword let mayStore = 1 in -def STRD : AI3std<(outs), (ins GPR:$src, addrmode3:$addr), StMiscFrm, - "str", "d $src, $addr", - []>, Requires<[IsARM, HasV5T]>; +def STRD : AI3std<(outs), (ins GPR:$src1, GPR:$src2, addrmode3:$addr),StMiscFrm, + "str", "d $src1, $addr", []>, Requires<[IsARM, HasV5T]>; // Indexed stores def STR_PRE : AI2stwpr<(outs GPR:$base_wb), diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index 684ecb4ce79..cf460b3eb21 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -17,15 +17,17 @@ #include "ARMAddressingModes.h" #include "ARMMachineFunctionInfo.h" #include "ARMRegisterInfo.h" +#include "llvm/DerivedTypes.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetData.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/Compiler.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" @@ -82,6 +84,8 @@ namespace { SmallVector &Merges); void AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps); + bool FixInvalidRegPairOp(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI); bool LoadStoreMultipleOpti(MachineBasicBlock &MBB); bool MergeReturnIntoLDM(MachineBasicBlock &MBB); }; @@ -586,13 +590,19 @@ void ARMLoadStoreOpt::AdvanceRS(MachineBasicBlock &MBB, MemOpQueue &MemOps) { static int getMemoryOpOffset(const MachineInstr *MI) { int Opcode = MI->getOpcode(); bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR; + bool isAM3 = Opcode == ARM::LDRD || Opcode == ARM::STRD; unsigned NumOperands = MI->getDesc().getNumOperands(); unsigned OffField = MI->getOperand(NumOperands-3).getImm(); int Offset = isAM2 - ? ARM_AM::getAM2Offset(OffField) : ARM_AM::getAM5Offset(OffField) * 4; + ? ARM_AM::getAM2Offset(OffField) + : (isAM3 ? ARM_AM::getAM3Offset(OffField) + : ARM_AM::getAM5Offset(OffField) * 4); if (isAM2) { if (ARM_AM::getAM2Op(OffField) == ARM_AM::sub) Offset = -Offset; + } else if (isAM3) { + if (ARM_AM::getAM3Op(OffField) == ARM_AM::sub) + Offset = -Offset; } else { if (ARM_AM::getAM5Op(OffField) == ARM_AM::sub) Offset = -Offset; @@ -600,6 +610,100 @@ static int getMemoryOpOffset(const MachineInstr *MI) { return Offset; } +static void InsertLDR_STR(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, + int OffImm, bool isDef, + DebugLoc dl, unsigned NewOpc, + unsigned Reg, bool RegKill, + unsigned BaseReg, bool BaseKill, + unsigned OffReg, bool OffKill, + ARMCC::CondCodes Pred, unsigned PredReg, + const TargetInstrInfo *TII) { + unsigned Offset; + if (OffImm < 0) + Offset = ARM_AM::getAM2Opc(ARM_AM::sub, -OffImm, ARM_AM::no_shift); + else + Offset = ARM_AM::getAM2Opc(ARM_AM::add, OffImm, ARM_AM::no_shift); + if (isDef) + BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc), Reg) + .addReg(BaseReg, getKillRegState(BaseKill)) + .addReg(OffReg, getKillRegState(OffKill)) + .addImm(Offset) + .addImm(Pred).addReg(PredReg); + else + BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) + .addReg(Reg, getKillRegState(RegKill)) + .addReg(BaseReg, getKillRegState(BaseKill)) + .addReg(OffReg, getKillRegState(OffKill)) + .addImm(Offset) + .addImm(Pred).addReg(PredReg); +} + +bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI) { + MachineInstr *MI = &*MBBI; + unsigned Opcode = MI->getOpcode(); + if (Opcode == ARM::LDRD || Opcode == ARM::STRD) { + unsigned EvenReg = MI->getOperand(0).getReg(); + unsigned OddReg = MI->getOperand(1).getReg(); + unsigned EvenRegNum = TRI->getDwarfRegNum(EvenReg, false); + unsigned OddRegNum = TRI->getDwarfRegNum(OddReg, false); + if ((EvenRegNum & 1) == 0 && (EvenRegNum + 1) == OddRegNum) + return false; + + bool isDef = Opcode == ARM::LDRD; + bool EvenKill = isDef ? false : MI->getOperand(0).isKill(); + bool OddKill = isDef ? false : MI->getOperand(1).isKill(); + const MachineOperand &BaseOp = MI->getOperand(2); + unsigned BaseReg = BaseOp.getReg(); + bool BaseKill = BaseOp.isKill(); + const MachineOperand &OffOp = MI->getOperand(3); + unsigned OffReg = OffOp.getReg(); + bool OffKill = OffOp.isKill(); + int OffImm = getMemoryOpOffset(MI); + unsigned PredReg = 0; + ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); + + if (OddRegNum > EvenRegNum && OffReg == 0 && OffImm == 0) { + // Ascending register numbers and no offset. It's safe to change it to a + // ldm or stm. + unsigned NewOpc = (Opcode == ARM::LDRD) ? ARM::LDM : ARM::STM; + BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(NewOpc)) + .addReg(BaseReg, getKillRegState(BaseKill)) + .addImm(ARM_AM::getAM4ModeImm(ARM_AM::ia)) + .addImm(Pred).addReg(PredReg) + .addReg(EvenReg, getDefRegState(isDef)) + .addReg(OddReg, getDefRegState(isDef)); + } else { + // Split into two instructions. + unsigned NewOpc = (Opcode == ARM::LDRD) ? ARM::LDR : ARM::STR; + DebugLoc dl = MBBI->getDebugLoc(); + // If this is a load and base register is killed, it may have been + // re-defed by the load, make sure the first load does not clobber it. + if (isDef && + (BaseKill || OffKill) && + (TRI->regsOverlap(EvenReg, BaseReg) || + (OffReg && TRI->regsOverlap(EvenReg, OffReg)))) { + assert(!TRI->regsOverlap(OddReg, BaseReg) && + (!OffReg || !TRI->regsOverlap(OddReg, OffReg))); + InsertLDR_STR(MBB, MBBI, OffImm+4, isDef, dl, NewOpc, OddReg, OddKill, + BaseReg, false, OffReg, false, Pred, PredReg, TII); + InsertLDR_STR(MBB, MBBI, OffImm, isDef, dl, NewOpc, EvenReg, EvenKill, + BaseReg, BaseKill, OffReg, OffKill, Pred, PredReg, TII); + } else { + InsertLDR_STR(MBB, MBBI, OffImm, isDef, dl, NewOpc, EvenReg, EvenKill, + BaseReg, false, OffReg, false, Pred, PredReg, TII); + InsertLDR_STR(MBB, MBBI, OffImm+4, isDef, dl, NewOpc, OddReg, OddKill, + BaseReg, BaseKill, OffReg, OffKill, Pred, PredReg, TII); + } + } + + MBBI = prior(MBBI); + MBB.erase(MI); + } + return false; +} + /// LoadStoreMultipleOpti - An optimization pass to turn multiple LDR / STR /// ops of the same base and incrementing offset into LDM / STM ops. bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { @@ -617,6 +721,9 @@ bool ARMLoadStoreOpt::LoadStoreMultipleOpti(MachineBasicBlock &MBB) { RS->enterBasicBlock(&MBB); MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); while (MBBI != E) { + if (FixInvalidRegPairOp(MBB, MBBI)) + continue; + bool Advance = false; bool TryMerge = false; bool Clobber = false; @@ -817,8 +924,10 @@ namespace { static char ID; ARMPreAllocLoadStoreOpt() : MachineFunctionPass(&ID) {} + const TargetData *TD; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; + const ARMSubtarget *STI; MachineRegisterInfo *MRI; virtual bool runOnMachineFunction(MachineFunction &Fn); @@ -828,6 +937,7 @@ namespace { } private: + bool SatisfyLdStDWordlignment(MachineInstr *MI); bool RescheduleOps(MachineBasicBlock *MBB, SmallVector &Ops, unsigned Base, bool isLd, @@ -838,8 +948,10 @@ namespace { } bool ARMPreAllocLoadStoreOpt::runOnMachineFunction(MachineFunction &Fn) { + TD = Fn.getTarget().getTargetData(); TII = Fn.getTarget().getInstrInfo(); TRI = Fn.getTarget().getRegisterInfo(); + STI = &Fn.getTarget().getSubtarget(); MRI = &Fn.getRegInfo(); bool Modified = false; @@ -883,6 +995,18 @@ static bool IsSafeToMove(bool isLd, unsigned Base, return true; } +bool ARMPreAllocLoadStoreOpt::SatisfyLdStDWordlignment(MachineInstr *MI) { + if (!MI->hasOneMemOperand() || + !MI->memoperands_begin()->getValue() || + MI->memoperands_begin()->isVolatile()) + return false; + + unsigned Align = MI->memoperands_begin()->getAlignment(); + unsigned ReqAlign = STI->hasV6Ops() + ? TD->getPrefTypeAlignment(Type::Int64Ty) : 8; // Pre-v6 need 8-byte align + return Align >= ReqAlign; +} + bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, SmallVector &Ops, unsigned Base, bool isLd, @@ -948,10 +1072,77 @@ bool ARMPreAllocLoadStoreOpt::RescheduleOps(MachineBasicBlock *MBB, MachineBasicBlock::iterator InsertPos = isLd ? FirstOp : LastOp; while (InsertPos != MBB->end() && MoveOps.count(InsertPos)) ++InsertPos; - for (unsigned i = 0; i != NumMove; ++i) { - MachineInstr *Op = Ops.back(); - Ops.pop_back(); - MBB->splice(InsertPos, MBB, Op); + + // If we are moving a pair of loads / stores, see if it makes sense + // to try to allocate a pair of registers that can form register pairs. + unsigned PairOpcode = 0; + unsigned Offset = 0; + + // Make sure the alignment requirement is met. + if (NumMove == 2 && SatisfyLdStDWordlignment(Ops.back())) { + int Opcode = Ops.back()->getOpcode(); + // FIXME: FLDS / FSTS -> FLDD / FSTD + if (Opcode == ARM::LDR) + PairOpcode = ARM::LDRD; + else if (Opcode == ARM::STR) + PairOpcode = ARM::STRD; + } + // Then make sure the immediate offset fits. + if (PairOpcode) { + int OffImm = getMemoryOpOffset(Ops.back()); + ARM_AM::AddrOpc AddSub = ARM_AM::add; + if (OffImm < 0) { + AddSub = ARM_AM::sub; + OffImm = - OffImm; + } + if (OffImm >= 256) // 8 bits + PairOpcode = 0; + else + Offset = ARM_AM::getAM3Opc(AddSub, OffImm); + } + + if (!PairOpcode) { + for (unsigned i = 0; i != NumMove; ++i) { + MachineInstr *Op = Ops.back(); + Ops.pop_back(); + MBB->splice(InsertPos, MBB, Op); + } + } else { + // Form the pair instruction instead. + unsigned EvenReg = 0, OddReg = 0; + unsigned BaseReg = 0, OffReg = 0, PredReg = 0; + ARMCC::CondCodes Pred; + DebugLoc dl; + for (unsigned i = 0; i != NumMove; ++i) { + MachineInstr *Op = Ops.back(); + Ops.pop_back(); + unsigned Reg = Op->getOperand(0).getReg(); + if (i == 0) { + EvenReg = Reg; + BaseReg = Op->getOperand(1).getReg(); + OffReg = Op->getOperand(2).getReg(); + Pred = getInstrPredicate(Op, PredReg); + dl = Op->getDebugLoc(); + } else + OddReg = Reg; + MBB->erase(Op); + } + if (isLd) + BuildMI(*MBB, InsertPos, dl, TII->get(PairOpcode)) + .addReg(EvenReg, RegState::Define) + .addReg(OddReg, RegState::Define) + .addReg(BaseReg).addReg(0).addImm(Offset) + .addImm(Pred).addReg(PredReg); + else + BuildMI(*MBB, InsertPos, dl, TII->get(PairOpcode)) + .addReg(EvenReg) + .addReg(OddReg) + .addReg(BaseReg).addReg(0).addImm(Offset) + .addImm(Pred).addReg(PredReg); + + // Add register allocation hints to form register pairs. + MRI->setRegAllocationHint(EvenReg, ARMRI::RegPairEven, OddReg); + MRI->setRegAllocationHint(OddReg, ARMRI::RegPairOdd, EvenReg); } NumLdStMoved += NumMove; diff --git a/lib/Target/ARM/ARMRegisterInfo.cpp b/lib/Target/ARM/ARMRegisterInfo.cpp index 199858f525c..d06da67a5f8 100644 --- a/lib/Target/ARM/ARMRegisterInfo.cpp +++ b/lib/Target/ARM/ARMRegisterInfo.cpp @@ -194,10 +194,6 @@ void ARMRegisterInfo::emitLoadConstPool(MachineBasicBlock &MBB, .addReg(0).addImm(0).addImm(Pred).addReg(PredReg); } -const TargetRegisterClass *ARMRegisterInfo::getPointerRegClass() const { - return &ARM::GPRRegClass; -} - /// isLowRegister - Returns true if the register is low register r0-r7. /// bool ARMRegisterInfo::isLowRegister(unsigned Reg) const { @@ -304,6 +300,159 @@ ARMRegisterInfo::isReservedReg(const MachineFunction &MF, unsigned Reg) const { return false; } +const TargetRegisterClass *ARMRegisterInfo::getPointerRegClass() const { + return &ARM::GPRRegClass; +} + +/// getAllocationOrder - Returns the register allocation order for a specified +/// register class in the form of a pair of TargetRegisterClass iterators. +std::pair +ARMRegisterInfo::getAllocationOrder(const TargetRegisterClass *RC, + std::pair Hint, + const MachineFunction &MF) const { + // Alternative register allocation orders when favoring even / odd registers + // of register pairs. + + // No FP, R9 is available. + static const unsigned GPREven1[] = { + ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8, ARM::R10, + ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7, + ARM::R9, ARM::R11 + }; + static const unsigned GPROdd1[] = { + ARM::R1, ARM::R3, ARM::R5, ARM::R7, ARM::R9, ARM::R11, + ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, + ARM::R8, ARM::R10 + }; + + // FP is R7, R9 is available. + static const unsigned GPREven2[] = { + ARM::R0, ARM::R2, ARM::R4, ARM::R8, ARM::R10, + ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6, + ARM::R9, ARM::R11 + }; + static const unsigned GPROdd2[] = { + ARM::R1, ARM::R3, ARM::R5, ARM::R9, ARM::R11, + ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, + ARM::R8, ARM::R10 + }; + + // FP is R11, R9 is available. + static const unsigned GPREven3[] = { + ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R8, + ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7, + ARM::R9 + }; + static const unsigned GPROdd3[] = { + ARM::R1, ARM::R3, ARM::R5, ARM::R6, ARM::R9, + ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R7, + ARM::R8 + }; + + // No FP, R9 is not available. + static const unsigned GPREven4[] = { + ARM::R0, ARM::R2, ARM::R4, ARM::R6, ARM::R10, + ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8, + ARM::R11 + }; + static const unsigned GPROdd4[] = { + ARM::R1, ARM::R3, ARM::R5, ARM::R7, ARM::R11, + ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8, + ARM::R10 + }; + + // FP is R7, R9 is not available. + static const unsigned GPREven5[] = { + ARM::R0, ARM::R2, ARM::R4, ARM::R10, + ARM::R1, ARM::R3, ARM::R12,ARM::LR, ARM::R5, ARM::R6, ARM::R8, + ARM::R11 + }; + static const unsigned GPROdd5[] = { + ARM::R1, ARM::R3, ARM::R5, ARM::R11, + ARM::R0, ARM::R2, ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8, + ARM::R10 + }; + + // FP is R11, R9 is not available. + static const unsigned GPREven6[] = { + ARM::R0, ARM::R2, ARM::R4, ARM::R6, + ARM::R1, ARM::R3, ARM::R10,ARM::R12,ARM::LR, ARM::R5, ARM::R7, ARM::R8 + }; + static const unsigned GPROdd6[] = { + ARM::R1, ARM::R3, ARM::R5, ARM::R7, + ARM::R0, ARM::R2, ARM::R10,ARM::R12,ARM::LR, ARM::R4, ARM::R6, ARM::R8 + }; + + + if (Hint.first == ARMRI::RegPairEven) { + if (!hasFP(MF)) { + if (!STI.isR9Reserved()) + return std::make_pair(GPREven1, + GPREven1 + (sizeof(GPREven1)/sizeof(unsigned))); + else + return std::make_pair(GPREven4, + GPREven4 + (sizeof(GPREven4)/sizeof(unsigned))); + } else if (FramePtr == ARM::R7) { + if (!STI.isR9Reserved()) + return std::make_pair(GPREven2, + GPREven2 + (sizeof(GPREven2)/sizeof(unsigned))); + else + return std::make_pair(GPREven5, + GPREven5 + (sizeof(GPREven5)/sizeof(unsigned))); + } else { // FramePtr == ARM::R11 + if (!STI.isR9Reserved()) + return std::make_pair(GPREven3, + GPREven3 + (sizeof(GPREven3)/sizeof(unsigned))); + else + return std::make_pair(GPREven6, + GPREven6 + (sizeof(GPREven6)/sizeof(unsigned))); + } + } else if (Hint.first == ARMRI::RegPairOdd) { + if (!hasFP(MF)) { + if (!STI.isR9Reserved()) + return std::make_pair(GPROdd1, + GPROdd1 + (sizeof(GPROdd1)/sizeof(unsigned))); + else + return std::make_pair(GPROdd4, + GPROdd4 + (sizeof(GPROdd4)/sizeof(unsigned))); + } else if (FramePtr == ARM::R7) { + if (!STI.isR9Reserved()) + return std::make_pair(GPROdd2, + GPROdd2 + (sizeof(GPROdd2)/sizeof(unsigned))); + else + return std::make_pair(GPROdd5, + GPROdd5 + (sizeof(GPROdd5)/sizeof(unsigned))); + } else { // FramePtr == ARM::R11 + if (!STI.isR9Reserved()) + return std::make_pair(GPROdd3, + GPROdd3 + (sizeof(GPROdd3)/sizeof(unsigned))); + else + return std::make_pair(GPROdd6, + GPROdd6 + (sizeof(GPROdd6)/sizeof(unsigned))); + } + } + return std::make_pair(RC->allocation_order_begin(MF), + RC->allocation_order_end(MF)); +} + +/// ResolveRegAllocHint - Resolves the specified register allocation hint +/// to a physical register. Returns the physical register if it is successful. +unsigned +ARMRegisterInfo::ResolveRegAllocHint(unsigned Type, unsigned Reg, + const MachineFunction &MF) const { + if (Reg == 0 || !isPhysicalRegister(Reg)) + return 0; + if (Type == 0) + return Reg; + else if (Type == (unsigned)ARMRI::RegPairOdd) + // Odd register. + return getRegisterPairOdd(Reg, MF); + else if (Type == (unsigned)ARMRI::RegPairEven) + // Even register. + return getRegisterPairEven(Reg, MF); + return 0; +} + bool ARMRegisterInfo::requiresRegisterScavenging(const MachineFunction &MF) const { const ARMFunctionInfo *AFI = MF.getInfo(); @@ -1525,4 +1674,152 @@ int ARMRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { return ARMGenRegisterInfo::getDwarfRegNumFull(RegNum, 0); } +unsigned ARMRegisterInfo::getRegisterPairEven(unsigned Reg, + const MachineFunction &MF) const { + switch (Reg) { + default: break; + // Return 0 if either register of the pair is a special register. + // So no R12, etc. + case ARM::R0: case ARM::R1: + return ARM::R0; + case ARM::R2: case ARM::R3: + // FIXME! + return STI.isThumb() ? 0 : ARM::R2; + case ARM::R4: case ARM::R5: + return ARM::R4; + case ARM::R6: case ARM::R7: + return isReservedReg(MF, ARM::R7) ? 0 : ARM::R6; + case ARM::R8: case ARM::R9: + return isReservedReg(MF, ARM::R9) ? 0 :ARM::R8; + case ARM::R10: case ARM::R11: + return isReservedReg(MF, ARM::R11) ? 0 : ARM::R10; + + case ARM::S0: case ARM::S1: + return ARM::S0; + case ARM::S2: case ARM::S3: + return ARM::S2; + case ARM::S4: case ARM::S5: + return ARM::S4; + case ARM::S6: case ARM::S7: + return ARM::S6; + case ARM::S8: case ARM::S9: + return ARM::S8; + case ARM::S10: case ARM::S11: + return ARM::S10; + case ARM::S12: case ARM::S13: + return ARM::S12; + case ARM::S14: case ARM::S15: + return ARM::S14; + case ARM::S16: case ARM::S17: + return ARM::S16; + case ARM::S18: case ARM::S19: + return ARM::S18; + case ARM::S20: case ARM::S21: + return ARM::S20; + case ARM::S22: case ARM::S23: + return ARM::S22; + case ARM::S24: case ARM::S25: + return ARM::S24; + case ARM::S26: case ARM::S27: + return ARM::S26; + case ARM::S28: case ARM::S29: + return ARM::S28; + case ARM::S30: case ARM::S31: + return ARM::S30; + + case ARM::D0: case ARM::D1: + return ARM::D0; + case ARM::D2: case ARM::D3: + return ARM::D2; + case ARM::D4: case ARM::D5: + return ARM::D4; + case ARM::D6: case ARM::D7: + return ARM::D6; + case ARM::D8: case ARM::D9: + return ARM::D8; + case ARM::D10: case ARM::D11: + return ARM::D10; + case ARM::D12: case ARM::D13: + return ARM::D12; + case ARM::D14: case ARM::D15: + return ARM::D14; + } + + return 0; +} + +unsigned ARMRegisterInfo::getRegisterPairOdd(unsigned Reg, + const MachineFunction &MF) const { + switch (Reg) { + default: break; + // Return 0 if either register of the pair is a special register. + // So no R12, etc. + case ARM::R0: case ARM::R1: + return ARM::R1; + case ARM::R2: case ARM::R3: + // FIXME! + return STI.isThumb() ? 0 : ARM::R3; + case ARM::R4: case ARM::R5: + return ARM::R5; + case ARM::R6: case ARM::R7: + return isReservedReg(MF, ARM::R7) ? 0 : ARM::R7; + case ARM::R8: case ARM::R9: + return isReservedReg(MF, ARM::R9) ? 0 :ARM::R9; + case ARM::R10: case ARM::R11: + return isReservedReg(MF, ARM::R11) ? 0 : ARM::R11; + + case ARM::S0: case ARM::S1: + return ARM::S1; + case ARM::S2: case ARM::S3: + return ARM::S3; + case ARM::S4: case ARM::S5: + return ARM::S5; + case ARM::S6: case ARM::S7: + return ARM::S7; + case ARM::S8: case ARM::S9: + return ARM::S9; + case ARM::S10: case ARM::S11: + return ARM::S11; + case ARM::S12: case ARM::S13: + return ARM::S13; + case ARM::S14: case ARM::S15: + return ARM::S15; + case ARM::S16: case ARM::S17: + return ARM::S17; + case ARM::S18: case ARM::S19: + return ARM::S19; + case ARM::S20: case ARM::S21: + return ARM::S21; + case ARM::S22: case ARM::S23: + return ARM::S23; + case ARM::S24: case ARM::S25: + return ARM::S25; + case ARM::S26: case ARM::S27: + return ARM::S27; + case ARM::S28: case ARM::S29: + return ARM::S29; + case ARM::S30: case ARM::S31: + return ARM::S31; + + case ARM::D0: case ARM::D1: + return ARM::D1; + case ARM::D2: case ARM::D3: + return ARM::D3; + case ARM::D4: case ARM::D5: + return ARM::D5; + case ARM::D6: case ARM::D7: + return ARM::D7; + case ARM::D8: case ARM::D9: + return ARM::D9; + case ARM::D10: case ARM::D11: + return ARM::D11; + case ARM::D12: case ARM::D13: + return ARM::D13; + case ARM::D14: case ARM::D15: + return ARM::D15; + } + + return 0; +} + #include "ARMGenRegisterInfo.inc" diff --git a/lib/Target/ARM/ARMRegisterInfo.h b/lib/Target/ARM/ARMRegisterInfo.h index e1d9efbcabf..b27e3b7c0ad 100644 --- a/lib/Target/ARM/ARMRegisterInfo.h +++ b/lib/Target/ARM/ARMRegisterInfo.h @@ -22,12 +22,17 @@ namespace llvm { class TargetInstrInfo; class Type; +/// Register allocation hints. +namespace ARMRI { + enum { + RegPairOdd = 1, + RegPairEven = 2 + }; +} + struct ARMRegisterInfo : public ARMGenRegisterInfo { const TargetInstrInfo &TII; const ARMSubtarget &STI; -private: - /// FramePtr - ARM physical register used as frame ptr. - unsigned FramePtr; public: ARMRegisterInfo(const TargetInstrInfo &tii, const ARMSubtarget &STI); @@ -49,10 +54,6 @@ public: /// if the register is a single precision VFP register. static unsigned getRegisterNumbering(unsigned RegEnum, bool &isSPVFP); - /// getPointerRegClass - Return the register class to use to hold pointers. - /// This is used for addressing modes. - const TargetRegisterClass *getPointerRegClass() const; - /// Code Generation virtual methods... const TargetRegisterClass * getPhysicalRegisterRegClass(unsigned Reg, MVT VT = MVT::Other) const; @@ -65,6 +66,16 @@ public: bool isReservedReg(const MachineFunction &MF, unsigned Reg) const; + const TargetRegisterClass *getPointerRegClass() const; + + std::pair + getAllocationOrder(const TargetRegisterClass *RC, + std::pair Hint, + const MachineFunction &MF) const; + + unsigned ResolveRegAllocHint(unsigned Type, unsigned Reg, + const MachineFunction &MF) const; + bool requiresRegisterScavenging(const MachineFunction &MF) const; bool hasFP(const MachineFunction &MF) const; @@ -95,6 +106,15 @@ public: int getDwarfRegNum(unsigned RegNum, bool isEH) const; bool isLowRegister(unsigned Reg) const; + +private: + /// FramePtr - ARM physical register used as frame ptr. + unsigned FramePtr; + + unsigned getRegisterPairEven(unsigned Reg, const MachineFunction &MF) const; + + unsigned getRegisterPairOdd(unsigned Reg, const MachineFunction &MF) const; + }; } // end namespace llvm diff --git a/test/CodeGen/ARM/ldrd.ll b/test/CodeGen/ARM/ldrd.ll new file mode 100644 index 00000000000..09bc5fccea0 --- /dev/null +++ b/test/CodeGen/ARM/ldrd.ll @@ -0,0 +1,14 @@ +; RUN: llvm-as < %s | llc -mtriple=armv6-apple-darwin -arm-pre-alloc-loadstore-opti | grep ldrd +; RUN: llvm-as < %s | llc -mtriple=armv5-apple-darwin -arm-pre-alloc-loadstore-opti | not grep ldrd +; RUN: llvm-as < %s | llc -mtriple=armv6-eabi -arm-pre-alloc-loadstore-opti | not grep ldrd +; rdar://r6949835 + +@b = external global i64* + +define i64 @t(i64 %a) nounwind readonly { +entry: + %0 = load i64** @b, align 4 + %1 = load i64* %0, align 4 + %2 = mul i64 %1, %a + ret i64 %2 +}