mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-27 13:40:30 +00:00
[PEI, AArch64] Use empty spaces in stack area for local stack slot allocation.
Summary: If the target requests it, use emptry spaces in the fixed and callee-save stack area to allocate local stack objects. AArch64: Change last callee-save reg stack object alignment instead of size to leave a gap to take advantage of above change. Reviewers: t.p.northover, qcolombet, MatzeB Subscribers: rengolin, mcrosier, llvm-commits, aemerson Differential Revision: http://reviews.llvm.org/D20220 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@271527 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
ec6796196a
commit
c22d8b1ed9
@ -151,6 +151,13 @@ public:
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Returns true if the stack slot holes in the fixed and callee-save stack
|
||||
/// area should be used when allocating other stack locations to reduce stack
|
||||
/// size.
|
||||
virtual bool enableStackSlotScavenging(const MachineFunction &MF) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
|
||||
/// the function.
|
||||
virtual void emitPrologue(MachineFunction &MF,
|
||||
|
@ -577,6 +577,108 @@ AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx,
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute which bytes of fixed and callee-save stack area are unused and keep
|
||||
/// track of them in StackBytesFree.
|
||||
///
|
||||
static inline void
|
||||
computeFreeStackSlots(MachineFrameInfo *MFI, bool StackGrowsDown,
|
||||
unsigned MinCSFrameIndex, unsigned MaxCSFrameIndex,
|
||||
int64_t FixedCSEnd, BitVector &StackBytesFree) {
|
||||
// Avoid undefined int64_t -> int conversion below in extreme case.
|
||||
if (FixedCSEnd > std::numeric_limits<int>::max())
|
||||
return;
|
||||
|
||||
StackBytesFree.resize(FixedCSEnd, true);
|
||||
|
||||
SmallVector<int, 16> AllocatedFrameSlots;
|
||||
// Add fixed objects.
|
||||
for (int i = MFI->getObjectIndexBegin(); i != 0; ++i)
|
||||
AllocatedFrameSlots.push_back(i);
|
||||
// Add callee-save objects.
|
||||
for (int i = MinCSFrameIndex; i <= (int)MaxCSFrameIndex; ++i)
|
||||
AllocatedFrameSlots.push_back(i);
|
||||
|
||||
for (int i : AllocatedFrameSlots) {
|
||||
// These are converted from int64_t, but they should always fit in int
|
||||
// because of the FixedCSEnd check above.
|
||||
int ObjOffset = MFI->getObjectOffset(i);
|
||||
int ObjSize = MFI->getObjectSize(i);
|
||||
int ObjStart, ObjEnd;
|
||||
if (StackGrowsDown) {
|
||||
// ObjOffset is negative when StackGrowsDown is true.
|
||||
ObjStart = -ObjOffset - ObjSize;
|
||||
ObjEnd = -ObjOffset;
|
||||
} else {
|
||||
ObjStart = ObjOffset;
|
||||
ObjEnd = ObjOffset + ObjSize;
|
||||
}
|
||||
// Ignore fixed holes that are in the previous stack frame.
|
||||
if (ObjEnd > 0)
|
||||
StackBytesFree.reset(ObjStart, ObjEnd);
|
||||
}
|
||||
}
|
||||
|
||||
/// Assign frame object to an unused portion of the stack in the fixed stack
|
||||
/// object range. Return true if the allocation was successful.
|
||||
///
|
||||
static inline bool scavengeStackSlot(MachineFrameInfo *MFI, int FrameIdx,
|
||||
bool StackGrowsDown, unsigned MaxAlign,
|
||||
BitVector &StackBytesFree) {
|
||||
if (MFI->isVariableSizedObjectIndex(FrameIdx))
|
||||
return false;
|
||||
|
||||
if (StackBytesFree.none()) {
|
||||
// clear it to speed up later scavengeStackSlot calls to
|
||||
// StackBytesFree.none()
|
||||
StackBytesFree.clear();
|
||||
return false;
|
||||
}
|
||||
|
||||
unsigned ObjAlign = MFI->getObjectAlignment(FrameIdx);
|
||||
if (ObjAlign > MaxAlign)
|
||||
return false;
|
||||
|
||||
int64_t ObjSize = MFI->getObjectSize(FrameIdx);
|
||||
int FreeStart;
|
||||
for (FreeStart = StackBytesFree.find_first(); FreeStart != -1;
|
||||
FreeStart = StackBytesFree.find_next(FreeStart)) {
|
||||
|
||||
// Check that free space has suitable alignment.
|
||||
unsigned ObjStart = StackGrowsDown ? FreeStart + ObjSize : FreeStart;
|
||||
if (alignTo(ObjStart, ObjAlign) != ObjStart)
|
||||
continue;
|
||||
|
||||
if (FreeStart + ObjSize > StackBytesFree.size())
|
||||
return false;
|
||||
|
||||
bool AllBytesFree = true;
|
||||
for (unsigned Byte = 0; Byte < ObjSize; ++Byte)
|
||||
if (!StackBytesFree.test(FreeStart + Byte)) {
|
||||
AllBytesFree = false;
|
||||
break;
|
||||
}
|
||||
if (AllBytesFree)
|
||||
break;
|
||||
}
|
||||
|
||||
if (FreeStart == -1)
|
||||
return false;
|
||||
|
||||
if (StackGrowsDown) {
|
||||
int ObjStart = -(FreeStart + ObjSize);
|
||||
DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") scavenged at SP[" << ObjStart
|
||||
<< "]\n");
|
||||
MFI->setObjectOffset(FrameIdx, ObjStart);
|
||||
} else {
|
||||
DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") scavenged at SP[" << FreeStart
|
||||
<< "]\n");
|
||||
MFI->setObjectOffset(FrameIdx, FreeStart);
|
||||
}
|
||||
|
||||
StackBytesFree.reset(FreeStart, FreeStart + ObjSize);
|
||||
return true;
|
||||
}
|
||||
|
||||
/// AssignProtectedObjSet - Helper function to assign large stack objects (i.e.,
|
||||
/// those required to be close to the Stack Protector) to stack offsets.
|
||||
static void
|
||||
@ -621,9 +723,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
|
||||
|
||||
// If there are fixed sized objects that are preallocated in the local area,
|
||||
// non-fixed objects can't be allocated right at the start of local area.
|
||||
// We currently don't support filling in holes in between fixed sized
|
||||
// objects, so we adjust 'Offset' to point to the end of last fixed sized
|
||||
// preallocated object.
|
||||
// Adjust 'Offset' to point to the end of last fixed sized preallocated
|
||||
// object.
|
||||
for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) {
|
||||
int64_t FixedOff;
|
||||
if (StackGrowsDown) {
|
||||
@ -667,6 +768,9 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
|
||||
}
|
||||
}
|
||||
|
||||
// FixedCSEnd is the stack offset to the end of the fixed and callee-save
|
||||
// stack area.
|
||||
int64_t FixedCSEnd = Offset;
|
||||
unsigned MaxAlign = MFI->getMaxAlignment();
|
||||
|
||||
// Make sure the special register scavenging spill slot is closest to the
|
||||
@ -798,10 +902,23 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
|
||||
if (Fn.getTarget().getOptLevel() != CodeGenOpt::None &&
|
||||
Fn.getTarget().Options.StackSymbolOrdering)
|
||||
TFI.orderFrameObjects(Fn, ObjectsToAllocate);
|
||||
|
||||
|
||||
// Keep track of which bytes in the fixed and callee-save range are used so we
|
||||
// can use the holes when allocating later stack objects. Only do this if
|
||||
// stack protector isn't being used and the target requests it and we're
|
||||
// optimizing.
|
||||
BitVector StackBytesFree;
|
||||
if (!ObjectsToAllocate.empty() &&
|
||||
Fn.getTarget().getOptLevel() != CodeGenOpt::None &&
|
||||
MFI->getStackProtectorIndex() < 0 && TFI.enableStackSlotScavenging(Fn))
|
||||
computeFreeStackSlots(MFI, StackGrowsDown, MinCSFrameIndex, MaxCSFrameIndex,
|
||||
FixedCSEnd, StackBytesFree);
|
||||
|
||||
// Now walk the objects and actually assign base offsets to them.
|
||||
for (auto &Object : ObjectsToAllocate)
|
||||
AdjustStackOffset(MFI, Object, StackGrowsDown, Offset, MaxAlign, Skew);
|
||||
if (!scavengeStackSlot(MFI, Object, StackGrowsDown, MaxAlign,
|
||||
StackBytesFree))
|
||||
AdjustStackOffset(MFI, Object, StackGrowsDown, Offset, MaxAlign, Skew);
|
||||
|
||||
// Make sure the special register scavenging spill slot is closest to the
|
||||
// stack pointer.
|
||||
|
@ -942,7 +942,8 @@ static void computeCalleeSaveRegisterPairs(
|
||||
// callee-save area to ensure 16-byte alignment.
|
||||
Offset -= 16;
|
||||
assert(MFI->getObjectAlignment(RPI.FrameIdx) <= 16);
|
||||
MFI->setObjectSize(RPI.FrameIdx, 16);
|
||||
MFI->setObjectAlignment(RPI.FrameIdx, 16);
|
||||
AFI->setCalleeSaveStackHasFreeSpace(true);
|
||||
} else
|
||||
Offset -= RPI.isPaired() ? 16 : 8;
|
||||
assert(Offset % 8 == 0);
|
||||
@ -1190,3 +1191,9 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
|
||||
// instructions.
|
||||
AFI->setCalleeSavedStackSize(alignTo(8 * NumRegsSpilled, 16));
|
||||
}
|
||||
|
||||
bool AArch64FrameLowering::enableStackSlotScavenging(
|
||||
const MachineFunction &MF) const {
|
||||
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
|
||||
return AFI->hasCalleeSaveStackFreeSpace();
|
||||
}
|
||||
|
@ -67,6 +67,8 @@ public:
|
||||
return true;
|
||||
}
|
||||
|
||||
bool enableStackSlotScavenging(const MachineFunction &MF) const override;
|
||||
|
||||
private:
|
||||
bool shouldCombineCSRLocalStackBump(MachineFunction &MF,
|
||||
unsigned StackBumpBytes) const;
|
||||
|
@ -83,18 +83,24 @@ class AArch64FunctionInfo : public MachineFunctionInfo {
|
||||
/// frame is unknown at compile time. e.g., in case of VLAs.
|
||||
bool StackRealigned;
|
||||
|
||||
/// True when the callee-save stack area has unused gaps that may be used for
|
||||
/// other stack allocations.
|
||||
bool CalleeSaveStackHasFreeSpace;
|
||||
|
||||
public:
|
||||
AArch64FunctionInfo()
|
||||
: BytesInStackArgArea(0), ArgumentStackToRestore(0), HasStackFrame(false),
|
||||
NumLocalDynamicTLSAccesses(0), VarArgsStackIndex(0), VarArgsGPRIndex(0),
|
||||
VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0),
|
||||
IsSplitCSR(false), StackRealigned(false) {}
|
||||
IsSplitCSR(false), StackRealigned(false),
|
||||
CalleeSaveStackHasFreeSpace(false) {}
|
||||
|
||||
explicit AArch64FunctionInfo(MachineFunction &MF)
|
||||
: BytesInStackArgArea(0), ArgumentStackToRestore(0), HasStackFrame(false),
|
||||
NumLocalDynamicTLSAccesses(0), VarArgsStackIndex(0), VarArgsGPRIndex(0),
|
||||
VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0),
|
||||
IsSplitCSR(false), StackRealigned(false) {
|
||||
IsSplitCSR(false), StackRealigned(false),
|
||||
CalleeSaveStackHasFreeSpace(false) {
|
||||
(void)MF;
|
||||
}
|
||||
|
||||
@ -112,6 +118,13 @@ public:
|
||||
bool isStackRealigned() const { return StackRealigned; }
|
||||
void setStackRealigned(bool s) { StackRealigned = s; }
|
||||
|
||||
bool hasCalleeSaveStackFreeSpace() const {
|
||||
return CalleeSaveStackHasFreeSpace;
|
||||
}
|
||||
void setCalleeSaveStackHasFreeSpace(bool s) {
|
||||
CalleeSaveStackHasFreeSpace = s;
|
||||
}
|
||||
|
||||
bool isSplitCSR() const { return IsSplitCSR; }
|
||||
void setIsSplitCSR(bool s) { IsSplitCSR = s; }
|
||||
|
||||
|
@ -674,7 +674,7 @@ bb1:
|
||||
|
||||
define void @realign_conditional2(i1 %b) {
|
||||
entry:
|
||||
%tmp = alloca i8, i32 4
|
||||
%tmp = alloca i8, i32 16
|
||||
br i1 %b, label %bb0, label %bb1
|
||||
|
||||
bb0:
|
||||
|
@ -14,14 +14,12 @@
|
||||
; CHECK-NEXT: ret
|
||||
|
||||
; CHECK-LINUX-LABEL: main:
|
||||
; CHECK-LINUX: sub sp, sp, #32
|
||||
; CHECK-LINUX-NEXT: str x30, [sp, #16]
|
||||
; CHECK-LINUX: str x30, [sp, #-16]!
|
||||
; CHECK-LINUX-NEXT: str wzr, [sp, #12]
|
||||
; CHECK-LINUX: adrp x0, .L.str
|
||||
; CHECK-LINUX: add x0, x0, :lo12:.L.str
|
||||
; CHECK-LINUX-NEXT: bl puts
|
||||
; CHECK-LINUX-NEXT: ldr x30, [sp, #16]
|
||||
; CHECK-LINUX-NEXT: add sp, sp, #32
|
||||
; CHECK-LINUX-NEXT: ldr x30, [sp], #16
|
||||
; CHECK-LINUX-NEXT: ret
|
||||
|
||||
@.str = private unnamed_addr constant [7 x i8] c"hello\0A\00"
|
||||
|
Loading…
Reference in New Issue
Block a user