mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2024-12-05 18:27:59 +00:00
[AArch64] Implement aarch64_vector_pcs codegen support.
This patch adds codegen support for the saving/restoring V8-V23 for functions specified with the aarch64_vector_pcs calling convention attribute, as added in patch D51477. Reviewers: t.p.northover, gberry, thegameg, rengolin, javed.absar, MatzeB Reviewed By: thegameg Differential Revision: https://reviews.llvm.org/D51479 llvm-svn: 342049
This commit is contained in:
parent
c366257a21
commit
1e20d76347
@ -288,6 +288,12 @@ def CSR_AArch64_AAPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22,
|
||||
D8, D9, D10, D11,
|
||||
D12, D13, D14, D15)>;
|
||||
|
||||
// AArch64 PCS for vector functions (VPCS)
|
||||
// must (additionally) preserve full Q8-Q23 registers
|
||||
def CSR_AArch64_AAVPCS : CalleeSavedRegs<(add LR, FP, X19, X20, X21, X22,
|
||||
X23, X24, X25, X26, X27, X28,
|
||||
(sequence "Q%u", 8, 23))>;
|
||||
|
||||
// Constructors and destructors return 'this' in the iOS 64-bit C++ ABI; since
|
||||
// 'this' and the pointer return value are both passed in X0 in these cases,
|
||||
// this can be partially modelled by treating X0 as a callee-saved register;
|
||||
@ -362,5 +368,7 @@ def CSR_AArch64_AAPCS_SwiftError_SCS
|
||||
: CalleeSavedRegs<(add CSR_AArch64_AAPCS_SwiftError, X18)>;
|
||||
def CSR_AArch64_RT_MostRegs_SCS
|
||||
: CalleeSavedRegs<(add CSR_AArch64_RT_MostRegs, X18)>;
|
||||
def CSR_AArch64_AAVPCS_SCS
|
||||
: CalleeSavedRegs<(add CSR_AArch64_AAVPCS, X18)>;
|
||||
def CSR_AArch64_AAPCS_SCS
|
||||
: CalleeSavedRegs<(add CSR_AArch64_AAPCS, X18)>;
|
||||
|
@ -461,12 +461,19 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
|
||||
NewOpc = AArch64::STPDpre;
|
||||
Scale = 8;
|
||||
break;
|
||||
case AArch64::STPQi:
|
||||
NewOpc = AArch64::STPQpre;
|
||||
Scale = 16;
|
||||
break;
|
||||
case AArch64::STRXui:
|
||||
NewOpc = AArch64::STRXpre;
|
||||
break;
|
||||
case AArch64::STRDui:
|
||||
NewOpc = AArch64::STRDpre;
|
||||
break;
|
||||
case AArch64::STRQui:
|
||||
NewOpc = AArch64::STRQpre;
|
||||
break;
|
||||
case AArch64::LDPXi:
|
||||
NewOpc = AArch64::LDPXpost;
|
||||
Scale = 8;
|
||||
@ -475,12 +482,19 @@ static MachineBasicBlock::iterator convertCalleeSaveRestoreToSPPrePostIncDec(
|
||||
NewOpc = AArch64::LDPDpost;
|
||||
Scale = 8;
|
||||
break;
|
||||
case AArch64::LDPQi:
|
||||
NewOpc = AArch64::LDPQpost;
|
||||
Scale = 16;
|
||||
break;
|
||||
case AArch64::LDRXui:
|
||||
NewOpc = AArch64::LDRXpost;
|
||||
break;
|
||||
case AArch64::LDRDui:
|
||||
NewOpc = AArch64::LDRDpost;
|
||||
break;
|
||||
case AArch64::LDRQui:
|
||||
NewOpc = AArch64::LDRQpost;
|
||||
break;
|
||||
}
|
||||
|
||||
MachineInstrBuilder MIB = BuildMI(MBB, MBBI, DL, TII->get(NewOpc));
|
||||
@ -531,6 +545,12 @@ static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
|
||||
case AArch64::LDRDui:
|
||||
Scale = 8;
|
||||
break;
|
||||
case AArch64::STPQi:
|
||||
case AArch64::STRQui:
|
||||
case AArch64::LDPQi:
|
||||
case AArch64::LDRQui:
|
||||
Scale = 16;
|
||||
break;
|
||||
default:
|
||||
llvm_unreachable("Unexpected callee-save save/restore opcode!");
|
||||
}
|
||||
@ -541,7 +561,7 @@ static void fixupCalleeSaveRestoreStackOffset(MachineInstr &MI,
|
||||
// Last operand is immediate offset that needs fixing.
|
||||
MachineOperand &OffsetOpnd = MI.getOperand(OffsetIdx);
|
||||
// All generated opcodes have scaled offsets.
|
||||
assert(LocalStackSize % 8 == 0);
|
||||
assert(LocalStackSize % Scale == 0);
|
||||
OffsetOpnd.setImm(OffsetOpnd.getImm() + LocalStackSize / Scale);
|
||||
}
|
||||
|
||||
@ -1208,7 +1228,7 @@ struct RegPairInfo {
|
||||
unsigned Reg2 = AArch64::NoRegister;
|
||||
int FrameIdx;
|
||||
int Offset;
|
||||
enum RegType { GPR, FPR64 } Type;
|
||||
enum RegType { GPR, FPR64, FPR128 } Type;
|
||||
|
||||
RegPairInfo() = default;
|
||||
|
||||
@ -1246,6 +1266,8 @@ static void computeCalleeSaveRegisterPairs(
|
||||
RPI.Type = RegPairInfo::GPR;
|
||||
else if (AArch64::FPR64RegClass.contains(RPI.Reg1))
|
||||
RPI.Type = RegPairInfo::FPR64;
|
||||
else if (AArch64::FPR128RegClass.contains(RPI.Reg1))
|
||||
RPI.Type = RegPairInfo::FPR128;
|
||||
else
|
||||
llvm_unreachable("Unsupported register class.");
|
||||
|
||||
@ -1261,6 +1283,10 @@ static void computeCalleeSaveRegisterPairs(
|
||||
if (AArch64::FPR64RegClass.contains(NextReg))
|
||||
RPI.Reg2 = NextReg;
|
||||
break;
|
||||
case RegPairInfo::FPR128:
|
||||
if (AArch64::FPR128RegClass.contains(NextReg))
|
||||
RPI.Reg2 = NextReg;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@ -1294,17 +1320,21 @@ static void computeCalleeSaveRegisterPairs(
|
||||
|
||||
RPI.FrameIdx = CSI[i].getFrameIdx();
|
||||
|
||||
if (Count * 8 != AFI->getCalleeSavedStackSize() && !RPI.isPaired()) {
|
||||
// Round up size of non-pair to pair size if we need to pad the
|
||||
// callee-save area to ensure 16-byte alignment.
|
||||
Offset -= 16;
|
||||
int Scale = RPI.Type == RegPairInfo::FPR128 ? 16 : 8;
|
||||
Offset -= RPI.isPaired() ? 2 * Scale : Scale;
|
||||
|
||||
// Round up size of non-pair to pair size if we need to pad the
|
||||
// callee-save area to ensure 16-byte alignment.
|
||||
if (AFI->hasCalleeSaveStackFreeSpace() &&
|
||||
RPI.Type != RegPairInfo::FPR128 && !RPI.isPaired()) {
|
||||
Offset -= 8;
|
||||
assert(Offset % 16 == 0);
|
||||
assert(MFI.getObjectAlignment(RPI.FrameIdx) <= 16);
|
||||
MFI.setObjectAlignment(RPI.FrameIdx, 16);
|
||||
AFI->setCalleeSaveStackHasFreeSpace(true);
|
||||
} else
|
||||
Offset -= RPI.isPaired() ? 16 : 8;
|
||||
assert(Offset % 8 == 0);
|
||||
RPI.Offset = Offset / 8;
|
||||
}
|
||||
|
||||
assert(Offset % Scale == 0);
|
||||
RPI.Offset = Offset / Scale;
|
||||
assert((RPI.Offset >= -64 && RPI.Offset <= 63) &&
|
||||
"Offset out of bounds for LDP/STP immediate");
|
||||
|
||||
@ -1370,6 +1400,11 @@ bool AArch64FrameLowering::spillCalleeSavedRegisters(
|
||||
Size = 8;
|
||||
Align = 8;
|
||||
break;
|
||||
case RegPairInfo::FPR128:
|
||||
StrOpc = RPI.isPaired() ? AArch64::STPQi : AArch64::STRQui;
|
||||
Size = 16;
|
||||
Align = 16;
|
||||
break;
|
||||
}
|
||||
LLVM_DEBUG(dbgs() << "CSR spill: (" << printReg(Reg1, TRI);
|
||||
if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
|
||||
@ -1441,6 +1476,11 @@ bool AArch64FrameLowering::restoreCalleeSavedRegisters(
|
||||
Size = 8;
|
||||
Align = 8;
|
||||
break;
|
||||
case RegPairInfo::FPR128:
|
||||
LdrOpc = RPI.isPaired() ? AArch64::LDPQi : AArch64::LDRQui;
|
||||
Size = 16;
|
||||
Align = 16;
|
||||
break;
|
||||
}
|
||||
LLVM_DEBUG(dbgs() << "CSR restore: (" << printReg(Reg1, TRI);
|
||||
if (RPI.isPaired()) dbgs() << ", " << printReg(Reg2, TRI);
|
||||
@ -1507,24 +1547,6 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
|
||||
? RegInfo->getBaseRegister()
|
||||
: (unsigned)AArch64::NoRegister;
|
||||
|
||||
unsigned SpillEstimate = SavedRegs.count();
|
||||
for (unsigned i = 0; CSRegs[i]; ++i) {
|
||||
unsigned Reg = CSRegs[i];
|
||||
unsigned PairedReg = CSRegs[i ^ 1];
|
||||
if (Reg == BasePointerReg)
|
||||
SpillEstimate++;
|
||||
if (produceCompactUnwindFrame(MF) && !SavedRegs.test(PairedReg))
|
||||
SpillEstimate++;
|
||||
}
|
||||
SpillEstimate += 2; // Conservatively include FP+LR in the estimate
|
||||
unsigned StackEstimate = MFI.estimateStackSize(MF) + 8 * SpillEstimate;
|
||||
|
||||
// The frame record needs to be created by saving the appropriate registers
|
||||
if (hasFP(MF) || windowsRequiresStackProbe(MF, StackEstimate)) {
|
||||
SavedRegs.set(AArch64::FP);
|
||||
SavedRegs.set(AArch64::LR);
|
||||
}
|
||||
|
||||
unsigned ExtraCSSpill = 0;
|
||||
// Figure out which callee-saved registers to save/restore.
|
||||
for (unsigned i = 0; CSRegs[i]; ++i) {
|
||||
@ -1548,7 +1570,8 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
|
||||
// MachO's compact unwind format relies on all registers being stored in
|
||||
// pairs.
|
||||
// FIXME: the usual format is actually better if unwinding isn't needed.
|
||||
if (produceCompactUnwindFrame(MF) && !SavedRegs.test(PairedReg)) {
|
||||
if (produceCompactUnwindFrame(MF) && PairedReg != AArch64::NoRegister &&
|
||||
!SavedRegs.test(PairedReg)) {
|
||||
SavedRegs.set(PairedReg);
|
||||
if (AArch64::GPR64RegClass.contains(PairedReg) &&
|
||||
!RegInfo->isReservedReg(MF, PairedReg))
|
||||
@ -1556,6 +1579,24 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
|
||||
}
|
||||
}
|
||||
|
||||
// Calculates the callee saved stack size.
|
||||
unsigned CSStackSize = 0;
|
||||
const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
|
||||
const MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
for (unsigned Reg : SavedRegs.set_bits())
|
||||
CSStackSize += TRI->getRegSizeInBits(Reg, MRI) / 8;
|
||||
|
||||
// Save number of saved regs, so we can easily update CSStackSize later.
|
||||
unsigned NumSavedRegs = SavedRegs.count();
|
||||
|
||||
// The frame record needs to be created by saving the appropriate registers
|
||||
unsigned EstimatedStackSize = MFI.estimateStackSize(MF);
|
||||
if (hasFP(MF) ||
|
||||
windowsRequiresStackProbe(MF, EstimatedStackSize + CSStackSize + 16)) {
|
||||
SavedRegs.set(AArch64::FP);
|
||||
SavedRegs.set(AArch64::LR);
|
||||
}
|
||||
|
||||
LLVM_DEBUG(dbgs() << "*** determineCalleeSaves\nUsed CSRs:";
|
||||
for (unsigned Reg
|
||||
: SavedRegs.set_bits()) dbgs()
|
||||
@ -1563,15 +1604,12 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
|
||||
dbgs() << "\n";);
|
||||
|
||||
// If any callee-saved registers are used, the frame cannot be eliminated.
|
||||
unsigned NumRegsSpilled = SavedRegs.count();
|
||||
bool CanEliminateFrame = NumRegsSpilled == 0;
|
||||
bool CanEliminateFrame = SavedRegs.count() == 0;
|
||||
|
||||
// The CSR spill slots have not been allocated yet, so estimateStackSize
|
||||
// won't include them.
|
||||
unsigned CFSize = MFI.estimateStackSize(MF) + 8 * NumRegsSpilled;
|
||||
LLVM_DEBUG(dbgs() << "Estimated stack frame size: " << CFSize << " bytes.\n");
|
||||
unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF);
|
||||
bool BigStack = (CFSize > EstimatedStackSizeLimit);
|
||||
bool BigStack = (EstimatedStackSize + CSStackSize) > EstimatedStackSizeLimit;
|
||||
if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
|
||||
AFI->setHasStackFrame(true);
|
||||
|
||||
@ -1592,7 +1630,6 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
|
||||
if (produceCompactUnwindFrame(MF))
|
||||
SavedRegs.set(UnspilledCSGPRPaired);
|
||||
ExtraCSSpill = UnspilledCSGPRPaired;
|
||||
NumRegsSpilled = SavedRegs.count();
|
||||
}
|
||||
|
||||
// If we didn't find an extra callee-saved register to spill, create
|
||||
@ -1609,9 +1646,17 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
|
||||
}
|
||||
}
|
||||
|
||||
// Adding the size of additional 64bit GPR saves.
|
||||
CSStackSize += 8 * (SavedRegs.count() - NumSavedRegs);
|
||||
unsigned AlignedCSStackSize = alignTo(CSStackSize, 16);
|
||||
LLVM_DEBUG(dbgs() << "Estimated stack frame size: "
|
||||
<< EstimatedStackSize + AlignedCSStackSize
|
||||
<< " bytes.\n");
|
||||
|
||||
// Round up to register pair alignment to avoid additional SP adjustment
|
||||
// instructions.
|
||||
AFI->setCalleeSavedStackSize(alignTo(8 * NumRegsSpilled, 16));
|
||||
AFI->setCalleeSavedStackSize(AlignedCSStackSize);
|
||||
AFI->setCalleeSaveStackHasFreeSpace(AlignedCSStackSize != CSStackSize);
|
||||
}
|
||||
|
||||
bool AArch64FrameLowering::enableStackSlotScavenging(
|
||||
|
@ -50,8 +50,7 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
|
||||
if (MF->getFunction().getCallingConv() == CallingConv::AnyReg)
|
||||
return CSR_AArch64_AllRegs_SaveList;
|
||||
if (MF->getFunction().getCallingConv() == CallingConv::AArch64_VectorCall)
|
||||
// FIXME: default to AAPCS until we add full support.
|
||||
return CSR_AArch64_AAPCS_SaveList;
|
||||
return CSR_AArch64_AAVPCS_SaveList;
|
||||
if (MF->getFunction().getCallingConv() == CallingConv::CXX_FAST_TLS)
|
||||
return MF->getInfo<AArch64FunctionInfo>()->isSplitCSR() ?
|
||||
CSR_AArch64_CXX_TLS_Darwin_PE_SaveList :
|
||||
@ -102,8 +101,7 @@ AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF,
|
||||
return SCS ? CSR_AArch64_CXX_TLS_Darwin_SCS_RegMask
|
||||
: CSR_AArch64_CXX_TLS_Darwin_RegMask;
|
||||
if (CC == CallingConv::AArch64_VectorCall)
|
||||
// FIXME: default to AAPCS until we add full support.
|
||||
return SCS ? CSR_AArch64_AAPCS_SCS_RegMask : CSR_AArch64_AAPCS_RegMask;
|
||||
return SCS ? CSR_AArch64_AAVPCS_SCS_RegMask : CSR_AArch64_AAVPCS_RegMask;
|
||||
if (MF.getSubtarget<AArch64Subtarget>().getTargetLowering()
|
||||
->supportSwiftError() &&
|
||||
MF.getFunction().getAttributes().hasAttrSomewhere(Attribute::SwiftError))
|
||||
|
253
test/CodeGen/AArch64/aarch64-vector-pcs.mir
Normal file
253
test/CodeGen/AArch64/aarch64-vector-pcs.mir
Normal file
@ -0,0 +1,253 @@
|
||||
# RUN: llc -mtriple=aarch64-linux-gnu -run-pass=prologepilog %s -o - | FileCheck %s
|
||||
|
||||
# The tests below test the allocation of 128bit callee-saves
|
||||
# on the stack, specifically their offsets.
|
||||
|
||||
# Padding of GPR64-registers is needed to ensure 16 byte alignment of
|
||||
# the stack pointer after the GPR64/FPR64 block (which is also needed
|
||||
# for the FPR128 saves when present).
|
||||
|
||||
# This file also tests whether an emergency stack slot is allocated
|
||||
# when the stack frame is over a given size, caused by a series of
|
||||
# FPR128 saves. The alignment can leave a gap that can be scavenged
|
||||
# for stack slot scavenging, so it is important that the stack size
|
||||
# is properly estimated.
|
||||
|
||||
|
||||
--- |
|
||||
|
||||
; ModuleID = '<stdin>'
|
||||
source_filename = "<stdin>"
|
||||
target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
|
||||
target triple = "aarch64-unknown-linux-gnu"
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define aarch64_vector_pcs void @test_q10_q11_x19() nounwind { entry: unreachable }
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define aarch64_vector_pcs void @test_q10_q11_x19_x20() nounwind { entry: unreachable }
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define aarch64_vector_pcs void @test_q10_q11_x19_x20_x21() nounwind { entry: unreachable }
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define aarch64_vector_pcs void @test_q8_to_q23_x19_to_x30() nounwind { entry: unreachable }
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define aarch64_vector_pcs void @test_q8_to_q23_x19_to_x30_preinc() nounwind { entry: unreachable }
|
||||
|
||||
...
|
||||
---
|
||||
name: test_q10_q11_x19
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0.entry:
|
||||
$x19 = IMPLICIT_DEF
|
||||
$q10 = IMPLICIT_DEF
|
||||
$q11 = IMPLICIT_DEF
|
||||
|
||||
; Check that the alignment gap for the 8-byte x19 is padded
|
||||
; with another 8 bytes. The CSR region will look like this:
|
||||
; +-------------------+
|
||||
; |/////padding///////| (8 bytes)
|
||||
; | X19 | (8 bytes)
|
||||
; +-------------------+ <- SP -16
|
||||
; | Q10, Q11 | (32 bytes)
|
||||
; +-------------------+ <- SP -48
|
||||
|
||||
; CHECK-LABEL: test_q10_q11_x19{{[[:space:]]}}
|
||||
; CHECK-DAG: $sp = frame-setup STPQpre killed $q11, killed $q10, $sp, -3 :: (store 16 into %stack.[[Q11:[0-9]+]]), (store 16 into %stack.[[Q10:[0-9]+]])
|
||||
; CHECK-DAG: - { id: [[Q11]], {{.*}}, offset: -48, size: 16, alignment: 16
|
||||
; CHECK-DAG: - { id: [[Q10]], {{.*}}, offset: -32, size: 16, alignment: 16
|
||||
; CHECK-DAG: frame-setup STRXui killed $x19, $sp, 4 :: (store 8 into %stack.[[X19:[0-9]+]])
|
||||
; CHECK-DAG: - { id: [[X19]], {{.*}}, offset: -16, size: 8, alignment: 16
|
||||
|
||||
...
|
||||
---
|
||||
name: test_q10_q11_x19_x20
|
||||
alignment: 2
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0.entry:
|
||||
$x19 = IMPLICIT_DEF
|
||||
$x20 = IMPLICIT_DEF
|
||||
$q10 = IMPLICIT_DEF
|
||||
$q11 = IMPLICIT_DEF
|
||||
|
||||
; +-------------------+
|
||||
; | X19, X20 | (16 bytes)
|
||||
; +-------------------+ <- SP -16
|
||||
; | Q10, Q11 | (32 bytes)
|
||||
; +-------------------+ <- SP -48
|
||||
|
||||
; CHECK-LABEL: test_q10_q11_x19_x20{{[[:space:]]}}
|
||||
; CHECK-DAG: $sp = frame-setup STPQpre killed $q11, killed $q10, $sp, -3 :: (store 16 into %stack.[[Q11:[0-9]+]]), (store 16 into %stack.[[Q10:[0-9]+]])
|
||||
; CHECK-DAG: frame-setup STPXi killed $x20, killed $x19, $sp, 4 :: (store 8 into %stack.[[X20:[0-9]+]]), (store 8 into %stack.[[X19:[0-9]+]])
|
||||
; CHECK-DAG: - { id: [[Q11]], {{.*}}, offset: -48, size: 16, alignment: 16
|
||||
; CHECK-DAG: - { id: [[Q10]], {{.*}}, offset: -32, size: 16, alignment: 16
|
||||
; CHECK-DAG: - { id: [[X20]], {{.*}}, offset: -16, size: 8, alignment: 8
|
||||
; CHECK-DAG: - { id: [[X19]], {{.*}}, offset: -8, size: 8, alignment: 8
|
||||
|
||||
...
|
||||
---
|
||||
name: test_q10_q11_x19_x20_x21
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0.entry:
|
||||
$x19 = IMPLICIT_DEF
|
||||
$x20 = IMPLICIT_DEF
|
||||
$x21 = IMPLICIT_DEF
|
||||
$q10 = IMPLICIT_DEF
|
||||
$q11 = IMPLICIT_DEF
|
||||
|
||||
; Check that the alignment gap is padded with another 8 bytes.
|
||||
; The CSR region will look like this:
|
||||
; +-------------------+
|
||||
; | X19, X20 | (16 bytes)
|
||||
; +-------------------+ <- SP -16
|
||||
; |/////padding///////| (8 bytes)
|
||||
; | X21 | (8 bytes)
|
||||
; +-------------------+ <- SP -32
|
||||
; | Q10, Q11 | (32 bytes)
|
||||
; +-------------------+ <- SP -64
|
||||
|
||||
; CHECK-LABEL: test_q10_q11_x19_x20_x21
|
||||
; CHECK-DAG: $sp = frame-setup STPQpre killed $q11, killed $q10, $sp, -4 :: (store 16 into %stack.[[Q11:[0-9]+]]), (store 16 into %stack.[[Q10:[0-9]+]])
|
||||
; CHECK-DAG: frame-setup STRXui killed $x21, $sp, 4 :: (store 8 into %stack.[[X21:[0-9]+]])
|
||||
; CHECK-DAG: frame-setup STPXi killed $x20, killed $x19, $sp, 6
|
||||
; CHECK-DAG: - { id: [[Q11]], {{.*}}, offset: -64, size: 16, alignment: 16
|
||||
; CHECK-DAG: - { id: [[Q10]], {{.*}}, offset: -48, size: 16, alignment: 16
|
||||
; CHECK-DAG: - { id: [[X21]], {{.*}}, offset: -32, size: 8, alignment: 16
|
||||
|
||||
...
|
||||
---
|
||||
name: test_q8_to_q23_x19_to_x30
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0.entry:
|
||||
$x19 = IMPLICIT_DEF
|
||||
$x20 = IMPLICIT_DEF
|
||||
$x21 = IMPLICIT_DEF
|
||||
$x22 = IMPLICIT_DEF
|
||||
$x23 = IMPLICIT_DEF
|
||||
$x24 = IMPLICIT_DEF
|
||||
$x25 = IMPLICIT_DEF
|
||||
$x26 = IMPLICIT_DEF
|
||||
$x27 = IMPLICIT_DEF
|
||||
$x28 = IMPLICIT_DEF
|
||||
$fp = IMPLICIT_DEF
|
||||
$lr = IMPLICIT_DEF
|
||||
$q8 = IMPLICIT_DEF
|
||||
$q9 = IMPLICIT_DEF
|
||||
$q10 = IMPLICIT_DEF
|
||||
$q11 = IMPLICIT_DEF
|
||||
$q12 = IMPLICIT_DEF
|
||||
$q13 = IMPLICIT_DEF
|
||||
$q14 = IMPLICIT_DEF
|
||||
$q15 = IMPLICIT_DEF
|
||||
$q16 = IMPLICIT_DEF
|
||||
$q17 = IMPLICIT_DEF
|
||||
$q18 = IMPLICIT_DEF
|
||||
$q19 = IMPLICIT_DEF
|
||||
$q20 = IMPLICIT_DEF
|
||||
$q21 = IMPLICIT_DEF
|
||||
$q22 = IMPLICIT_DEF
|
||||
$q23 = IMPLICIT_DEF
|
||||
|
||||
; Test with more callee saves, which triggers 'BigStack' in
|
||||
; AArch64FrameLowering which in turn causes an emergency spill
|
||||
; slot to be allocated. The emergency spill slot is allocated
|
||||
; as close as possible to SP, so at SP + 0.
|
||||
; +-------------------+
|
||||
; | X19..X30 | (96 bytes)
|
||||
; +-------------------+ <- SP -96
|
||||
; | Q8..Q23 | (256 bytes)
|
||||
; +-------------------+ <- SP -352
|
||||
; | emergency slot | (16 bytes)
|
||||
; +-------------------+ <- SP -368
|
||||
|
||||
; CHECK-LABEL: test_q8_to_q23_x19_to_x30
|
||||
; CHECK: $sp = frame-setup SUBXri $sp, 368, 0
|
||||
; CHECK-NEXT: frame-setup STPQi killed $q23, killed $q22, $sp, 1 :: (store 16 into %stack.{{[0-9]+}}), (store 16 into %stack.{{[0-9]+}})
|
||||
; CHECK-NEXT: frame-setup STPQi killed $q21, killed $q20, $sp, 3
|
||||
; CHECK-NEXT: frame-setup STPQi killed $q19, killed $q18, $sp, 5
|
||||
; CHECK-NEXT: frame-setup STPQi killed $q17, killed $q16, $sp, 7
|
||||
; CHECK-NEXT: frame-setup STPQi killed $q15, killed $q14, $sp, 9
|
||||
; CHECK-NEXT: frame-setup STPQi killed $q13, killed $q12, $sp, 11
|
||||
; CHECK-NEXT: frame-setup STPQi killed $q11, killed $q10, $sp, 13
|
||||
; CHECK-NEXT: frame-setup STPQi killed $q9, killed $q8, $sp, 15
|
||||
; CHECK-NEXT: frame-setup STPXi killed $x28, killed $x27, $sp, 34 :: (store 8 into %stack.{{[0-9]+}}), (store 8 into %stack.{{[0-9]+}})
|
||||
; CHECK-NEXT: frame-setup STPXi killed $x26, killed $x25, $sp, 36
|
||||
; CHECK-NEXT: frame-setup STPXi killed $x24, killed $x23, $sp, 38
|
||||
; CHECK-NEXT: frame-setup STPXi killed $x22, killed $x21, $sp, 40
|
||||
; CHECK-NEXT: frame-setup STPXi killed $x20, killed $x19, $sp, 42
|
||||
; CHECK-NEXT: frame-setup STPXi killed $fp, killed $lr, $sp, 44
|
||||
|
||||
...
|
||||
---
|
||||
name: test_q8_to_q23_x19_to_x30_preinc
|
||||
tracksRegLiveness: true
|
||||
stack:
|
||||
- { id: 0, size: 160, alignment: 4, local-offset: 0 }
|
||||
constants:
|
||||
body: |
|
||||
bb.0.entry:
|
||||
$x19 = IMPLICIT_DEF
|
||||
$x20 = IMPLICIT_DEF
|
||||
$x21 = IMPLICIT_DEF
|
||||
$x22 = IMPLICIT_DEF
|
||||
$x23 = IMPLICIT_DEF
|
||||
$x24 = IMPLICIT_DEF
|
||||
$x25 = IMPLICIT_DEF
|
||||
$x26 = IMPLICIT_DEF
|
||||
$x27 = IMPLICIT_DEF
|
||||
$x28 = IMPLICIT_DEF
|
||||
$fp = IMPLICIT_DEF
|
||||
$lr = IMPLICIT_DEF
|
||||
$q8 = IMPLICIT_DEF
|
||||
$q9 = IMPLICIT_DEF
|
||||
$q10 = IMPLICIT_DEF
|
||||
$q11 = IMPLICIT_DEF
|
||||
$q12 = IMPLICIT_DEF
|
||||
$q13 = IMPLICIT_DEF
|
||||
$q14 = IMPLICIT_DEF
|
||||
$q15 = IMPLICIT_DEF
|
||||
$q16 = IMPLICIT_DEF
|
||||
$q17 = IMPLICIT_DEF
|
||||
$q18 = IMPLICIT_DEF
|
||||
$q19 = IMPLICIT_DEF
|
||||
$q20 = IMPLICIT_DEF
|
||||
$q21 = IMPLICIT_DEF
|
||||
$q22 = IMPLICIT_DEF
|
||||
$q23 = IMPLICIT_DEF
|
||||
|
||||
; When the total stack size >= 512, it will use the pre-increment
|
||||
; rather than the 'sub sp, sp, <size>'.
|
||||
; +-------------------+
|
||||
; | X19..X30 | (96 bytes)
|
||||
; +-------------------+ <- SP -96
|
||||
; | Q8..Q23 | (256 bytes)
|
||||
; +-------------------+ <- SP -352
|
||||
; | 'obj' | (32 bytes)
|
||||
; +-------------------+ <- SP -384
|
||||
; | emergency slot | (16 bytes)
|
||||
; +-------------------+ <- SP -400
|
||||
|
||||
; CHECK-LABEL: test_q8_to_q23_x19_to_x30_preinc
|
||||
; CHECK: $sp = frame-setup STPQpre killed $q23, killed $q22, $sp, -22 :: (store 16 into %stack.{{[0-9]+}}), (store 16 into %stack.{{[0-9]+}})
|
||||
; CHECK-NEXT: frame-setup STPQi killed $q21, killed $q20, $sp, 2
|
||||
; CHECK-NEXT: frame-setup STPQi killed $q19, killed $q18, $sp, 4
|
||||
; CHECK-NEXT: frame-setup STPQi killed $q17, killed $q16, $sp, 6
|
||||
; CHECK-NEXT: frame-setup STPQi killed $q15, killed $q14, $sp, 8
|
||||
; CHECK-NEXT: frame-setup STPQi killed $q13, killed $q12, $sp, 10
|
||||
; CHECK-NEXT: frame-setup STPQi killed $q11, killed $q10, $sp, 12
|
||||
; CHECK-NEXT: frame-setup STPQi killed $q9, killed $q8, $sp, 14
|
||||
; CHECK-NEXT: frame-setup STPXi killed $x28, killed $x27, $sp, 32 :: (store 8 into %stack.{{[0-9]+}}), (store 8 into %stack.{{[0-9]+}})
|
||||
; CHECK-NEXT: frame-setup STPXi killed $x26, killed $x25, $sp, 34
|
||||
; CHECK-NEXT: frame-setup STPXi killed $x24, killed $x23, $sp, 36
|
||||
; CHECK-NEXT: frame-setup STPXi killed $x22, killed $x21, $sp, 38
|
||||
; CHECK-NEXT: frame-setup STPXi killed $x20, killed $x19, $sp, 40
|
||||
; CHECK-NEXT: frame-setup STPXi killed $fp, killed $lr, $sp, 42
|
||||
; CHECK-NEXT: $sp = frame-setup SUBXri $sp, 176, 0
|
||||
|
||||
...
|
Loading…
Reference in New Issue
Block a user