[PEI, AArch64] Use empty spaces in stack area for local stack slot allocation.

Summary:
If the target requests it, use emptry spaces in the fixed and
callee-save stack area to allocate local stack objects.

AArch64: Change last callee-save reg stack object alignment instead of
size to leave a gap to take advantage of above change.

Reviewers: t.p.northover, qcolombet, MatzeB

Subscribers: rengolin, mcrosier, llvm-commits, aemerson

Differential Revision: http://reviews.llvm.org/D20220

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@271527 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Geoff Berry 2016-06-02 16:22:07 +00:00
parent ec6796196a
commit c22d8b1ed9
7 changed files with 157 additions and 13 deletions

View File

@ -151,6 +151,13 @@ public:
return false;
}
/// Returns true if the stack slot holes in the fixed and callee-save stack
/// area should be used when allocating other stack locations to reduce stack
/// size.
virtual bool enableStackSlotScavenging(const MachineFunction &MF) const {
return false;
}
/// emitProlog/emitEpilog - These methods insert prolog and epilog code into
/// the function.
virtual void emitPrologue(MachineFunction &MF,

View File

@ -577,6 +577,108 @@ AdjustStackOffset(MachineFrameInfo *MFI, int FrameIdx,
}
}
/// Compute which bytes of fixed and callee-save stack area are unused and keep
/// track of them in StackBytesFree.
///
static inline void
computeFreeStackSlots(MachineFrameInfo *MFI, bool StackGrowsDown,
unsigned MinCSFrameIndex, unsigned MaxCSFrameIndex,
int64_t FixedCSEnd, BitVector &StackBytesFree) {
// Avoid undefined int64_t -> int conversion below in extreme case.
if (FixedCSEnd > std::numeric_limits<int>::max())
return;
StackBytesFree.resize(FixedCSEnd, true);
SmallVector<int, 16> AllocatedFrameSlots;
// Add fixed objects.
for (int i = MFI->getObjectIndexBegin(); i != 0; ++i)
AllocatedFrameSlots.push_back(i);
// Add callee-save objects.
for (int i = MinCSFrameIndex; i <= (int)MaxCSFrameIndex; ++i)
AllocatedFrameSlots.push_back(i);
for (int i : AllocatedFrameSlots) {
// These are converted from int64_t, but they should always fit in int
// because of the FixedCSEnd check above.
int ObjOffset = MFI->getObjectOffset(i);
int ObjSize = MFI->getObjectSize(i);
int ObjStart, ObjEnd;
if (StackGrowsDown) {
// ObjOffset is negative when StackGrowsDown is true.
ObjStart = -ObjOffset - ObjSize;
ObjEnd = -ObjOffset;
} else {
ObjStart = ObjOffset;
ObjEnd = ObjOffset + ObjSize;
}
// Ignore fixed holes that are in the previous stack frame.
if (ObjEnd > 0)
StackBytesFree.reset(ObjStart, ObjEnd);
}
}
/// Assign frame object to an unused portion of the stack in the fixed stack
/// object range. Return true if the allocation was successful.
///
static inline bool scavengeStackSlot(MachineFrameInfo *MFI, int FrameIdx,
bool StackGrowsDown, unsigned MaxAlign,
BitVector &StackBytesFree) {
if (MFI->isVariableSizedObjectIndex(FrameIdx))
return false;
if (StackBytesFree.none()) {
// clear it to speed up later scavengeStackSlot calls to
// StackBytesFree.none()
StackBytesFree.clear();
return false;
}
unsigned ObjAlign = MFI->getObjectAlignment(FrameIdx);
if (ObjAlign > MaxAlign)
return false;
int64_t ObjSize = MFI->getObjectSize(FrameIdx);
int FreeStart;
for (FreeStart = StackBytesFree.find_first(); FreeStart != -1;
FreeStart = StackBytesFree.find_next(FreeStart)) {
// Check that free space has suitable alignment.
unsigned ObjStart = StackGrowsDown ? FreeStart + ObjSize : FreeStart;
if (alignTo(ObjStart, ObjAlign) != ObjStart)
continue;
if (FreeStart + ObjSize > StackBytesFree.size())
return false;
bool AllBytesFree = true;
for (unsigned Byte = 0; Byte < ObjSize; ++Byte)
if (!StackBytesFree.test(FreeStart + Byte)) {
AllBytesFree = false;
break;
}
if (AllBytesFree)
break;
}
if (FreeStart == -1)
return false;
if (StackGrowsDown) {
int ObjStart = -(FreeStart + ObjSize);
DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") scavenged at SP[" << ObjStart
<< "]\n");
MFI->setObjectOffset(FrameIdx, ObjStart);
} else {
DEBUG(dbgs() << "alloc FI(" << FrameIdx << ") scavenged at SP[" << FreeStart
<< "]\n");
MFI->setObjectOffset(FrameIdx, FreeStart);
}
StackBytesFree.reset(FreeStart, FreeStart + ObjSize);
return true;
}
/// AssignProtectedObjSet - Helper function to assign large stack objects (i.e.,
/// those required to be close to the Stack Protector) to stack offsets.
static void
@ -621,9 +723,8 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
// If there are fixed sized objects that are preallocated in the local area,
// non-fixed objects can't be allocated right at the start of local area.
// We currently don't support filling in holes in between fixed sized
// objects, so we adjust 'Offset' to point to the end of last fixed sized
// preallocated object.
// Adjust 'Offset' to point to the end of last fixed sized preallocated
// object.
for (int i = MFI->getObjectIndexBegin(); i != 0; ++i) {
int64_t FixedOff;
if (StackGrowsDown) {
@ -667,6 +768,9 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
}
}
// FixedCSEnd is the stack offset to the end of the fixed and callee-save
// stack area.
int64_t FixedCSEnd = Offset;
unsigned MaxAlign = MFI->getMaxAlignment();
// Make sure the special register scavenging spill slot is closest to the
@ -798,10 +902,23 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) {
if (Fn.getTarget().getOptLevel() != CodeGenOpt::None &&
Fn.getTarget().Options.StackSymbolOrdering)
TFI.orderFrameObjects(Fn, ObjectsToAllocate);
// Keep track of which bytes in the fixed and callee-save range are used so we
// can use the holes when allocating later stack objects. Only do this if
// stack protector isn't being used and the target requests it and we're
// optimizing.
BitVector StackBytesFree;
if (!ObjectsToAllocate.empty() &&
Fn.getTarget().getOptLevel() != CodeGenOpt::None &&
MFI->getStackProtectorIndex() < 0 && TFI.enableStackSlotScavenging(Fn))
computeFreeStackSlots(MFI, StackGrowsDown, MinCSFrameIndex, MaxCSFrameIndex,
FixedCSEnd, StackBytesFree);
// Now walk the objects and actually assign base offsets to them.
for (auto &Object : ObjectsToAllocate)
AdjustStackOffset(MFI, Object, StackGrowsDown, Offset, MaxAlign, Skew);
if (!scavengeStackSlot(MFI, Object, StackGrowsDown, MaxAlign,
StackBytesFree))
AdjustStackOffset(MFI, Object, StackGrowsDown, Offset, MaxAlign, Skew);
// Make sure the special register scavenging spill slot is closest to the
// stack pointer.

View File

@ -942,7 +942,8 @@ static void computeCalleeSaveRegisterPairs(
// callee-save area to ensure 16-byte alignment.
Offset -= 16;
assert(MFI->getObjectAlignment(RPI.FrameIdx) <= 16);
MFI->setObjectSize(RPI.FrameIdx, 16);
MFI->setObjectAlignment(RPI.FrameIdx, 16);
AFI->setCalleeSaveStackHasFreeSpace(true);
} else
Offset -= RPI.isPaired() ? 16 : 8;
assert(Offset % 8 == 0);
@ -1190,3 +1191,9 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
// instructions.
AFI->setCalleeSavedStackSize(alignTo(8 * NumRegsSpilled, 16));
}
bool AArch64FrameLowering::enableStackSlotScavenging(
const MachineFunction &MF) const {
const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
return AFI->hasCalleeSaveStackFreeSpace();
}

View File

@ -67,6 +67,8 @@ public:
return true;
}
bool enableStackSlotScavenging(const MachineFunction &MF) const override;
private:
bool shouldCombineCSRLocalStackBump(MachineFunction &MF,
unsigned StackBumpBytes) const;

View File

@ -83,18 +83,24 @@ class AArch64FunctionInfo : public MachineFunctionInfo {
/// frame is unknown at compile time. e.g., in case of VLAs.
bool StackRealigned;
/// True when the callee-save stack area has unused gaps that may be used for
/// other stack allocations.
bool CalleeSaveStackHasFreeSpace;
public:
AArch64FunctionInfo()
: BytesInStackArgArea(0), ArgumentStackToRestore(0), HasStackFrame(false),
NumLocalDynamicTLSAccesses(0), VarArgsStackIndex(0), VarArgsGPRIndex(0),
VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0),
IsSplitCSR(false), StackRealigned(false) {}
IsSplitCSR(false), StackRealigned(false),
CalleeSaveStackHasFreeSpace(false) {}
explicit AArch64FunctionInfo(MachineFunction &MF)
: BytesInStackArgArea(0), ArgumentStackToRestore(0), HasStackFrame(false),
NumLocalDynamicTLSAccesses(0), VarArgsStackIndex(0), VarArgsGPRIndex(0),
VarArgsGPRSize(0), VarArgsFPRIndex(0), VarArgsFPRSize(0),
IsSplitCSR(false), StackRealigned(false) {
IsSplitCSR(false), StackRealigned(false),
CalleeSaveStackHasFreeSpace(false) {
(void)MF;
}
@ -112,6 +118,13 @@ public:
bool isStackRealigned() const { return StackRealigned; }
void setStackRealigned(bool s) { StackRealigned = s; }
bool hasCalleeSaveStackFreeSpace() const {
return CalleeSaveStackHasFreeSpace;
}
void setCalleeSaveStackHasFreeSpace(bool s) {
CalleeSaveStackHasFreeSpace = s;
}
bool isSplitCSR() const { return IsSplitCSR; }
void setIsSplitCSR(bool s) { IsSplitCSR = s; }

View File

@ -674,7 +674,7 @@ bb1:
define void @realign_conditional2(i1 %b) {
entry:
%tmp = alloca i8, i32 4
%tmp = alloca i8, i32 16
br i1 %b, label %bb0, label %bb1
bb0:

View File

@ -14,14 +14,12 @@
; CHECK-NEXT: ret
; CHECK-LINUX-LABEL: main:
; CHECK-LINUX: sub sp, sp, #32
; CHECK-LINUX-NEXT: str x30, [sp, #16]
; CHECK-LINUX: str x30, [sp, #-16]!
; CHECK-LINUX-NEXT: str wzr, [sp, #12]
; CHECK-LINUX: adrp x0, .L.str
; CHECK-LINUX: add x0, x0, :lo12:.L.str
; CHECK-LINUX-NEXT: bl puts
; CHECK-LINUX-NEXT: ldr x30, [sp, #16]
; CHECK-LINUX-NEXT: add sp, sp, #32
; CHECK-LINUX-NEXT: ldr x30, [sp], #16
; CHECK-LINUX-NEXT: ret
@.str = private unnamed_addr constant [7 x i8] c"hello\0A\00"