mirror of
https://github.com/RPCSX/llvm.git
synced 2024-11-23 11:49:50 +00:00
ARM: Compute MaxCallFrame size early
This exposes a method in MachineFrameInfo that calculates MaxCallFrameSize and calls it after instruction selection in the ARM target. This avoids ARMBaseRegisterInfo::canRealignStack()/ARMFrameLowering::hasReservedCallFrame() giving different answers in early/late phases of codegen. The testcase shows a particular nasty example result of that where we would fail to properly align an alloca. Differential Revision: https://reviews.llvm.org/D32622 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@302303 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
4a6f9ee16e
commit
97beda0626
@ -520,6 +520,14 @@ public:
|
||||
bool hasTailCall() const { return HasTailCall; }
|
||||
void setHasTailCall() { HasTailCall = true; }
|
||||
|
||||
/// Computes the maximum size of a callframe and the AdjustsStack property.
|
||||
/// This only works for targets defining
|
||||
/// TargetInstrInfo::getCallFrameSetupOpcode(), getCallFrameDestroyOpcode(),
|
||||
/// and getFrameSize().
|
||||
/// This is usually computed by the prologue epilogue inserter but some
|
||||
/// targets may call this to compute it earlier.
|
||||
void computeMaxCallFrameSize(const MachineFunction &MF);
|
||||
|
||||
/// Return the maximum size of a call frame that must be
|
||||
/// allocated for an outgoing function call. This is only available if
|
||||
/// CallFrameSetup/Destroy pseudo instructions are used by the target, and
|
||||
|
@ -1207,9 +1207,6 @@ bool IRTranslator::runOnMachineFunction(MachineFunction &CurMF) {
|
||||
|
||||
finishPendingPhis();
|
||||
|
||||
auto &TLI = *MF->getSubtarget().getTargetLowering();
|
||||
TLI.finalizeLowering(*MF);
|
||||
|
||||
// Merge the argument lowering and constants block with its single
|
||||
// successor, the LLVM-IR entry block. We want the basic block to
|
||||
// be maximal.
|
||||
|
@ -24,6 +24,7 @@
|
||||
#include "llvm/IR/Function.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Target/TargetLowering.h"
|
||||
#include "llvm/Target/TargetSubtargetInfo.h"
|
||||
|
||||
#define DEBUG_TYPE "instruction-select"
|
||||
@ -70,8 +71,7 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
|
||||
// An optimization remark emitter. Used to report failures.
|
||||
MachineOptimizationRemarkEmitter MORE(MF, /*MBFI=*/nullptr);
|
||||
|
||||
// FIXME: freezeReservedRegs is now done in IRTranslator, but there are many
|
||||
// other MF/MFI fields we need to initialize.
|
||||
// FIXME: There are many other MF/MFI fields we need to initialize.
|
||||
|
||||
#ifndef NDEBUG
|
||||
// Check that our input is fully legal: we require the function to have the
|
||||
@ -184,6 +184,9 @@ bool InstructionSelect::runOnMachineFunction(MachineFunction &MF) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto &TLI = *MF.getSubtarget().getTargetLowering();
|
||||
TLI.finalizeLowering(MF);
|
||||
|
||||
// FIXME: Should we accurately track changes?
|
||||
return true;
|
||||
}
|
||||
|
@ -19,6 +19,7 @@
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/Target/TargetFrameLowering.h"
|
||||
#include "llvm/Target/TargetInstrInfo.h"
|
||||
#include "llvm/Target/TargetRegisterInfo.h"
|
||||
#include "llvm/Target/TargetSubtargetInfo.h"
|
||||
#include <cassert>
|
||||
@ -175,6 +176,31 @@ unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const {
|
||||
return (unsigned)Offset;
|
||||
}
|
||||
|
||||
void MachineFrameInfo::computeMaxCallFrameSize(const MachineFunction &MF) {
|
||||
const TargetInstrInfo &TII = *MF.getSubtarget().getInstrInfo();
|
||||
unsigned FrameSetupOpcode = TII.getCallFrameSetupOpcode();
|
||||
unsigned FrameDestroyOpcode = TII.getCallFrameDestroyOpcode();
|
||||
assert(FrameSetupOpcode != ~0u && FrameDestroyOpcode != ~0u &&
|
||||
"Can only compute MaxCallFrameSize if Setup/Destroy opcode are known");
|
||||
|
||||
MaxCallFrameSize = 0;
|
||||
for (const MachineBasicBlock &MBB : MF) {
|
||||
for (const MachineInstr &MI : MBB) {
|
||||
unsigned Opcode = MI.getOpcode();
|
||||
if (Opcode == FrameSetupOpcode || Opcode == FrameDestroyOpcode) {
|
||||
unsigned Size = TII.getFrameSize(MI);
|
||||
MaxCallFrameSize = std::max(MaxCallFrameSize, Size);
|
||||
AdjustsStack = true;
|
||||
} else if (MI.isInlineAsm()) {
|
||||
// Some inline asm's need a stack frame, as indicated by operand 1.
|
||||
unsigned ExtraInfo = MI.getOperand(InlineAsm::MIOp_ExtraInfo).getImm();
|
||||
if (ExtraInfo & InlineAsm::Extra_IsAlignStack)
|
||||
AdjustsStack = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{
|
||||
if (Objects.empty()) return;
|
||||
|
||||
|
@ -188,8 +188,9 @@ namespace {
|
||||
return Reg < regsReserved.size() && regsReserved.test(Reg);
|
||||
}
|
||||
|
||||
bool isAllocatable(unsigned Reg) {
|
||||
return Reg < TRI->getNumRegs() && MRI->isAllocatable(Reg);
|
||||
bool isAllocatable(unsigned Reg) const {
|
||||
return Reg < TRI->getNumRegs() && TRI->isInAllocatableClass(Reg) &&
|
||||
!regsReserved.test(Reg);
|
||||
}
|
||||
|
||||
// Analysis information if available
|
||||
@ -526,7 +527,8 @@ void MachineVerifier::markReachable(const MachineBasicBlock *MBB) {
|
||||
|
||||
void MachineVerifier::visitMachineFunctionBefore() {
|
||||
lastIndex = SlotIndex();
|
||||
regsReserved = MRI->getReservedRegs();
|
||||
regsReserved = MRI->reservedRegsFrozen() ? MRI->getReservedRegs()
|
||||
: TRI->getReservedRegs(*MF);
|
||||
|
||||
if (!MF->empty())
|
||||
markReachable(&MF->front());
|
||||
|
@ -277,6 +277,9 @@ void PEI::calculateCallFrameInfo(MachineFunction &Fn) {
|
||||
AdjustsStack = true;
|
||||
}
|
||||
|
||||
assert(!MFI.isMaxCallFrameSizeComputed() ||
|
||||
(MFI.getMaxCallFrameSize() == MaxCallFrameSize &&
|
||||
MFI.adjustsStack() == AdjustsStack));
|
||||
MFI.setAdjustsStack(AdjustsStack);
|
||||
MFI.setMaxCallFrameSize(MaxCallFrameSize);
|
||||
|
||||
|
@ -245,11 +245,18 @@ ARMBaseRegisterInfo::getRegPressureLimit(const TargetRegisterClass *RC,
|
||||
switch (RC->getID()) {
|
||||
default:
|
||||
return 0;
|
||||
case ARM::tGPRRegClassID:
|
||||
return TFI->hasFP(MF) ? 4 : 5;
|
||||
case ARM::tGPRRegClassID: {
|
||||
// hasFP ends up calling getMaxCallFrameComputed() which may not be
|
||||
// available when getPressureLimit() is called as part of
|
||||
// ScheduleDAGRRList.
|
||||
bool HasFP = MF.getFrameInfo().isMaxCallFrameSizeComputed()
|
||||
? TFI->hasFP(MF) : true;
|
||||
return 5 - HasFP;
|
||||
}
|
||||
case ARM::GPRRegClassID: {
|
||||
unsigned FP = TFI->hasFP(MF) ? 1 : 0;
|
||||
return 10 - FP - (STI.isR9Reserved() ? 1 : 0);
|
||||
bool HasFP = MF.getFrameInfo().isMaxCallFrameSizeComputed()
|
||||
? TFI->hasFP(MF) : true;
|
||||
return 10 - HasFP - (STI.isR9Reserved() ? 1 : 0);
|
||||
}
|
||||
case ARM::SPRRegClassID: // Currently not used as 'rep' register class.
|
||||
case ARM::DPRRegClassID:
|
||||
|
@ -14054,3 +14054,8 @@ void ARMTargetLowering::insertCopiesSplitCSR(
|
||||
.addReg(NewVR);
|
||||
}
|
||||
}
|
||||
|
||||
void ARMTargetLowering::finalizeLowering(MachineFunction &MF) const {
|
||||
MF.getFrameInfo().computeMaxCallFrameSize(MF);
|
||||
TargetLoweringBase::finalizeLowering(MF);
|
||||
}
|
||||
|
@ -544,6 +544,8 @@ class InstrItineraryData;
|
||||
unsigned getNumInterleavedAccesses(VectorType *VecTy,
|
||||
const DataLayout &DL) const;
|
||||
|
||||
void finalizeLowering(MachineFunction &MF) const override;
|
||||
|
||||
protected:
|
||||
std::pair<const TargetRegisterClass *, uint8_t>
|
||||
findRepresentativeClass(const TargetRegisterInfo *TRI,
|
||||
|
24
test/CodeGen/ARM/alloca-align.ll
Normal file
24
test/CodeGen/ARM/alloca-align.ll
Normal file
@ -0,0 +1,24 @@
|
||||
; RUN: llc -o - %s | FileCheck %s
|
||||
target triple="arm--"
|
||||
|
||||
@glob = external global i32*
|
||||
|
||||
declare void @bar(i32*, [20000 x i8]* byval)
|
||||
|
||||
; CHECK-LABEL: foo:
|
||||
; We should see the stack getting additional alignment
|
||||
; CHECK: sub sp, sp, #16
|
||||
; CHECK: bic sp, sp, #31
|
||||
; And a base pointer getting used.
|
||||
; CHECK: mov r6, sp
|
||||
; Which is passed to the call
|
||||
; CHECK: add [[REG:r[0-9]+]], r6, #19456
|
||||
; CHECK: add r0, [[REG]], #536
|
||||
; CHECK: bl bar
|
||||
define void @foo([20000 x i8]* %addr) {
|
||||
%tmp = alloca [4 x i32], align 32
|
||||
%tmp0 = getelementptr [4 x i32], [4 x i32]* %tmp, i32 0, i32 0
|
||||
call void @bar(i32* %tmp0, [20000 x i8]* byval %addr)
|
||||
ret void
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user