mirror of
https://github.com/RPCS3/llvm.git
synced 2026-07-01 21:04:04 -04:00
[AArch64] Stackframe accesses to SVE objects.
Materialize accesses to SVE frame objects from SP or FP, whichever is available and beneficial. This patch still assumes the objects are pre-allocated. The automatic layout of SVE objects within the stackframe will be added in a separate patch. Reviewers: greened, cameron.mcinally, efriedma, rengolin, thegameg, rovka Reviewed By: cameron.mcinally Differential Revision: https://reviews.llvm.org/D67749 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@374772 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@@ -674,7 +674,7 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
|
||||
int BaseOffset = -AFI->getTaggedBasePointerOffset();
|
||||
unsigned FrameReg;
|
||||
StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference(
|
||||
MF, BaseOffset, false /*isFixed*/, FrameReg,
|
||||
MF, BaseOffset, false /*isFixed*/, false /*isSVE*/, FrameReg,
|
||||
/*PreferFP=*/false,
|
||||
/*ForSimm=*/true);
|
||||
Register SrcReg = FrameReg;
|
||||
|
||||
@@ -1610,12 +1610,13 @@ StackOffset AArch64FrameLowering::resolveFrameIndexReference(
|
||||
const auto &MFI = MF.getFrameInfo();
|
||||
int ObjectOffset = MFI.getObjectOffset(FI);
|
||||
bool isFixed = MFI.isFixedObjectIndex(FI);
|
||||
return resolveFrameOffsetReference(MF, ObjectOffset, isFixed, FrameReg,
|
||||
bool isSVE = MFI.getStackID(FI) == TargetStackID::SVEVector;
|
||||
return resolveFrameOffsetReference(MF, ObjectOffset, isFixed, isSVE, FrameReg,
|
||||
PreferFP, ForSimm);
|
||||
}
|
||||
|
||||
StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
|
||||
const MachineFunction &MF, int ObjectOffset, bool isFixed,
|
||||
const MachineFunction &MF, int ObjectOffset, bool isFixed, bool isSVE,
|
||||
unsigned &FrameReg, bool PreferFP, bool ForSimm) const {
|
||||
const auto &MFI = MF.getFrameInfo();
|
||||
const auto *RegInfo = static_cast<const AArch64RegisterInfo *>(
|
||||
@@ -1629,16 +1630,17 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
|
||||
!isFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize());
|
||||
|
||||
const StackOffset &SVEStackSize = getSVEStackSize(MF);
|
||||
if (SVEStackSize)
|
||||
llvm_unreachable("Accessing frame indices in presence of SVE "
|
||||
"not yet supported");
|
||||
|
||||
// Use frame pointer to reference fixed objects. Use it for locals if
|
||||
// there are VLAs or a dynamically realigned SP (and thus the SP isn't
|
||||
// reliable as a base). Make sure useFPForScavengingIndex() does the
|
||||
// right thing for the emergency spill slot.
|
||||
bool UseFP = false;
|
||||
if (AFI->hasStackFrame()) {
|
||||
if (AFI->hasStackFrame() && !isSVE) {
|
||||
// We shouldn't prefer using the FP when there is an SVE area
|
||||
// in between the FP and the non-SVE locals/spills.
|
||||
PreferFP &= !SVEStackSize;
|
||||
|
||||
// Note: Keeping the following as multiple 'if' statements rather than
|
||||
// merging to a single expression for readability.
|
||||
//
|
||||
@@ -1666,8 +1668,10 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
|
||||
bool CanUseBP = RegInfo->hasBasePointer(MF);
|
||||
if (FPOffsetFits && CanUseBP) // Both are ok. Pick the best.
|
||||
UseFP = PreferFP;
|
||||
else if (!CanUseBP) // Can't use BP. Forced to use FP.
|
||||
else if (!CanUseBP) { // Can't use BP. Forced to use FP.
|
||||
assert(!SVEStackSize && "Expected BP to be available");
|
||||
UseFP = true;
|
||||
}
|
||||
// else we can use BP and FP, but the offset from FP won't fit.
|
||||
// That will make us scavenge registers which we can probably avoid by
|
||||
// using BP. If it won't fit for BP either, we'll scavenge anyway.
|
||||
@@ -1697,9 +1701,36 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
|
||||
"In the presence of dynamic stack pointer realignment, "
|
||||
"non-argument/CSR objects cannot be accessed through the frame pointer");
|
||||
|
||||
if (isSVE) {
|
||||
int64_t OffsetToSVEArea =
|
||||
MFI.getStackSize() - AFI->getCalleeSavedStackSize();
|
||||
StackOffset FPOffset = {ObjectOffset, MVT::nxv1i8};
|
||||
StackOffset SPOffset = SVEStackSize +
|
||||
StackOffset(ObjectOffset, MVT::nxv1i8) +
|
||||
StackOffset(OffsetToSVEArea, MVT::i8);
|
||||
// Always use the FP for SVE spills if available and beneficial.
|
||||
if (hasFP(MF) &&
|
||||
(SPOffset.getBytes() ||
|
||||
FPOffset.getScalableBytes() < SPOffset.getScalableBytes() ||
|
||||
RegInfo->needsStackRealignment(MF))) {
|
||||
FrameReg = RegInfo->getFrameRegister(MF);
|
||||
return FPOffset;
|
||||
}
|
||||
|
||||
FrameReg = RegInfo->hasBasePointer(MF) ? RegInfo->getBaseRegister()
|
||||
: (unsigned)AArch64::SP;
|
||||
return SPOffset;
|
||||
}
|
||||
|
||||
StackOffset ScalableOffset = {};
|
||||
if (UseFP && !(isFixed || isCSR))
|
||||
ScalableOffset = -SVEStackSize;
|
||||
if (!UseFP && (isFixed || isCSR))
|
||||
ScalableOffset = SVEStackSize;
|
||||
|
||||
if (UseFP) {
|
||||
FrameReg = RegInfo->getFrameRegister(MF);
|
||||
return StackOffset(FPOffset, MVT::i8);
|
||||
return StackOffset(FPOffset, MVT::i8) + ScalableOffset;
|
||||
}
|
||||
|
||||
// Use the base pointer if we have one.
|
||||
@@ -1716,7 +1747,7 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
|
||||
Offset -= AFI->getLocalStackSize();
|
||||
}
|
||||
|
||||
return StackOffset(Offset, MVT::i8);
|
||||
return StackOffset(Offset, MVT::i8) + ScalableOffset;
|
||||
}
|
||||
|
||||
static unsigned getPrologueDeath(MachineFunction &MF, unsigned Reg) {
|
||||
@@ -2213,24 +2244,20 @@ void AArch64FrameLowering::determineCalleeSaves(MachineFunction &MF,
|
||||
<< ' ' << printReg(Reg, RegInfo);
|
||||
dbgs() << "\n";);
|
||||
|
||||
bool HasSVEStackObjects = [&MFI]() {
|
||||
for (int I = MFI.getObjectIndexBegin(); I != 0; ++I)
|
||||
if (MFI.getStackID(I) == TargetStackID::SVEVector &&
|
||||
MFI.getObjectOffset(I) < 0)
|
||||
return true;
|
||||
// Note: We don't take allocatable stack objects into
|
||||
// account yet, because allocation for those is not yet
|
||||
// implemented.
|
||||
return false;
|
||||
}();
|
||||
|
||||
// If any callee-saved registers are used, the frame cannot be eliminated.
|
||||
bool CanEliminateFrame = (SavedRegs.count() == 0) && !HasSVEStackObjects;
|
||||
unsigned MaxAlign = getStackAlignment();
|
||||
int64_t SVEStackSize =
|
||||
alignTo(determineSVEStackSize(MFI, MaxAlign), MaxAlign);
|
||||
assert(MaxAlign <= 16 && "Cannot align scalable vectors more than 16 bytes");
|
||||
bool CanEliminateFrame = (SavedRegs.count() == 0) && !SVEStackSize;
|
||||
|
||||
// The CSR spill slots have not been allocated yet, so estimateStackSize
|
||||
// won't include them.
|
||||
unsigned EstimatedStackSizeLimit = estimateRSStackSizeLimit(MF);
|
||||
bool BigStack = (EstimatedStackSize + CSStackSize) > EstimatedStackSizeLimit;
|
||||
|
||||
// Conservatively always assume BigStack when there are SVE spills.
|
||||
bool BigStack = SVEStackSize ||
|
||||
(EstimatedStackSize + CSStackSize) > EstimatedStackSizeLimit;
|
||||
if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF))
|
||||
AFI->setHasStackFrame(true);
|
||||
|
||||
@@ -2286,6 +2313,23 @@ bool AArch64FrameLowering::enableStackSlotScavenging(
|
||||
return AFI->hasCalleeSaveStackFreeSpace();
|
||||
}
|
||||
|
||||
int64_t AArch64FrameLowering::determineSVEStackSize(MachineFrameInfo &MFI,
|
||||
unsigned &MaxAlign) const {
|
||||
// Process all fixed stack objects.
|
||||
int64_t Offset = 0;
|
||||
for (int I = MFI.getObjectIndexBegin(); I != 0; ++I)
|
||||
if (MFI.getStackID(I) == TargetStackID::SVEVector) {
|
||||
int64_t FixedOffset = -MFI.getObjectOffset(I);
|
||||
if (FixedOffset > Offset)
|
||||
Offset = FixedOffset;
|
||||
}
|
||||
|
||||
// Note: We don't take allocatable stack objects into
|
||||
// account yet, because allocation for those is not yet
|
||||
// implemented.
|
||||
return Offset;
|
||||
}
|
||||
|
||||
void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
|
||||
MachineFunction &MF, RegScavenger *RS) const {
|
||||
MachineFrameInfo &MFI = MF.getFrameInfo();
|
||||
@@ -2293,22 +2337,11 @@ void AArch64FrameLowering::processFunctionBeforeFrameFinalized(
|
||||
assert(getStackGrowthDirection() == TargetFrameLowering::StackGrowsDown &&
|
||||
"Upwards growing stack unsupported");
|
||||
|
||||
// Process all fixed stack SVE objects.
|
||||
int64_t Offset = 0;
|
||||
for (int I = MFI.getObjectIndexBegin(); I != 0; ++I) {
|
||||
unsigned StackID = MFI.getStackID(I);
|
||||
if (StackID == TargetStackID::SVEVector) {
|
||||
int64_t FixedOffset = -MFI.getObjectOffset(I);
|
||||
if (FixedOffset > Offset)
|
||||
Offset = FixedOffset;
|
||||
}
|
||||
}
|
||||
|
||||
unsigned MaxAlign = getStackAlignment();
|
||||
uint64_t SVEStackSize = alignTo(Offset, MaxAlign);
|
||||
int64_t SVEStackSize = determineSVEStackSize(MFI, MaxAlign);
|
||||
|
||||
AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
|
||||
AFI->setStackSizeSVE(SVEStackSize);
|
||||
AFI->setStackSizeSVE(alignTo(SVEStackSize, MaxAlign));
|
||||
assert(MaxAlign <= 16 && "Cannot align scalable vectors more than 16 bytes");
|
||||
|
||||
// If this function isn't doing Win64-style C++ EH, we don't need to do
|
||||
|
||||
@@ -45,8 +45,8 @@ public:
|
||||
bool ForSimm) const;
|
||||
StackOffset resolveFrameOffsetReference(const MachineFunction &MF,
|
||||
int ObjectOffset, bool isFixed,
|
||||
unsigned &FrameReg, bool PreferFP,
|
||||
bool ForSimm) const;
|
||||
bool isSVE, unsigned &FrameReg,
|
||||
bool PreferFP, bool ForSimm) const;
|
||||
bool spillCalleeSavedRegisters(MachineBasicBlock &MBB,
|
||||
MachineBasicBlock::iterator MI,
|
||||
const std::vector<CalleeSavedInfo> &CSI,
|
||||
@@ -101,6 +101,7 @@ public:
|
||||
private:
|
||||
bool shouldCombineCSRLocalStackBump(MachineFunction &MF,
|
||||
unsigned StackBumpBytes) const;
|
||||
int64_t determineSVEStackSize(MachineFrameInfo &MF, unsigned &MaxAlign) const;
|
||||
};
|
||||
|
||||
} // End llvm namespace
|
||||
|
||||
@@ -2198,6 +2198,18 @@ bool AArch64InstrInfo::getMemOpInfo(unsigned Opcode, unsigned &Scale,
|
||||
MinOffset = -256;
|
||||
MaxOffset = 255;
|
||||
break;
|
||||
case AArch64::LDR_PXI:
|
||||
case AArch64::STR_PXI:
|
||||
Scale = Width = 2;
|
||||
MinOffset = -256;
|
||||
MaxOffset = 255;
|
||||
break;
|
||||
case AArch64::LDR_ZXI:
|
||||
case AArch64::STR_ZXI:
|
||||
Scale = Width = 16;
|
||||
MinOffset = -256;
|
||||
MaxOffset = 255;
|
||||
break;
|
||||
case AArch64::ST2GOffset:
|
||||
case AArch64::STZ2GOffset:
|
||||
Scale = 16;
|
||||
@@ -3340,6 +3352,18 @@ MachineInstr *AArch64InstrInfo::foldMemoryOperandImpl(
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
static bool isSVEScaledImmInstruction(unsigned Opcode) {
|
||||
switch (Opcode) {
|
||||
case AArch64::LDR_ZXI:
|
||||
case AArch64::STR_ZXI:
|
||||
case AArch64::LDR_PXI:
|
||||
case AArch64::STR_PXI:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
|
||||
StackOffset &SOffset,
|
||||
bool *OutUseUnscaledOp,
|
||||
@@ -3383,9 +3407,13 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
|
||||
llvm_unreachable("unhandled opcode in isAArch64FrameOffsetLegal");
|
||||
|
||||
// Construct the complete offset.
|
||||
bool IsMulVL = isSVEScaledImmInstruction(MI.getOpcode());
|
||||
int64_t Offset =
|
||||
IsMulVL ? (SOffset.getScalableBytes()) : (SOffset.getBytes());
|
||||
|
||||
const MachineOperand &ImmOpnd =
|
||||
MI.getOperand(AArch64InstrInfo::getLoadStoreImmIdx(MI.getOpcode()));
|
||||
int Offset = SOffset.getBytes() + ImmOpnd.getImm() * Scale;
|
||||
Offset += ImmOpnd.getImm() * Scale;
|
||||
|
||||
// If the offset doesn't match the scale, we rewrite the instruction to
|
||||
// use the unscaled instruction instead. Likewise, if we have a negative
|
||||
@@ -3417,9 +3445,14 @@ int llvm::isAArch64FrameOffsetLegal(const MachineInstr &MI,
|
||||
if (OutUnscaledOp && UnscaledOp)
|
||||
*OutUnscaledOp = *UnscaledOp;
|
||||
|
||||
SOffset = StackOffset(Offset, MVT::i8);
|
||||
if (IsMulVL)
|
||||
SOffset = StackOffset(Offset, MVT::nxv1i8) +
|
||||
StackOffset(SOffset.getBytes(), MVT::i8);
|
||||
else
|
||||
SOffset = StackOffset(Offset, MVT::i8) +
|
||||
StackOffset(SOffset.getScalableBytes(), MVT::nxv1i8);
|
||||
return AArch64FrameOffsetCanUpdate |
|
||||
(Offset == 0 ? AArch64FrameOffsetIsLegal : 0);
|
||||
(SOffset ? 0 : AArch64FrameOffsetIsLegal);
|
||||
}
|
||||
|
||||
bool llvm::rewriteAArch64FrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
|
||||
|
||||
@@ -26,9 +26,15 @@
|
||||
define void @test_allocate_sve() nounwind { entry: unreachable }
|
||||
define void @test_allocate_sve_gpr_callee_saves() nounwind { entry: unreachable }
|
||||
define void @test_allocate_sve_gpr_realigned() nounwind { entry: unreachable }
|
||||
define void @test_address_sve() nounwind { entry: unreachable }
|
||||
define void @test_address_sve_fp() nounwind { entry: unreachable }
|
||||
define void @test_stack_arg_sve() nounwind { entry: unreachable }
|
||||
define void @test_address_sve_out_of_range() nounwind { entry: unreachable }
|
||||
|
||||
...
|
||||
# +----------+
|
||||
# |scratchreg| // x29 is used as scratch reg.
|
||||
# +----------+
|
||||
# | %fixed- | // scalable SVE object of n * 18 bytes, aligned to 16 bytes,
|
||||
# | stack.0 | // to be materialized with 2*ADDVL (<=> 2 * n * 16bytes)
|
||||
# +----------+
|
||||
@@ -36,14 +42,16 @@
|
||||
# +----------+ <- SP
|
||||
|
||||
# CHECK-LABEL: name: test_allocate_sve
|
||||
# CHECK: stackSize: 16
|
||||
# CHECK: stackSize: 32
|
||||
|
||||
# CHECK: bb.0.entry:
|
||||
# CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -16
|
||||
# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2
|
||||
# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
|
||||
|
||||
# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2
|
||||
# CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0
|
||||
# CHECK-NEXT: $sp, $[[SCRATCH]] = frame-destroy LDRXpost $sp, 16
|
||||
# CHECK-NEXT: RET_ReallyLR
|
||||
name: test_allocate_sve
|
||||
fixedStack:
|
||||
@@ -57,6 +65,7 @@ body: |
|
||||
...
|
||||
# +----------+
|
||||
# | x20, x21 | // callee saves
|
||||
# |scratchreg| // x29 is used as scratch reg.
|
||||
# +----------+
|
||||
# | %fixed- | // scalable objects
|
||||
# | stack.0 |
|
||||
@@ -65,17 +74,19 @@ body: |
|
||||
# +----------+ <- SP
|
||||
|
||||
# CHECK-LABEL: name: test_allocate_sve_gpr_callee_saves
|
||||
# CHECK: stackSize: 32
|
||||
# CHECK: stackSize: 48
|
||||
|
||||
# CHECK: bb.0.entry:
|
||||
# CHECK-NEXT: $sp = frame-setup STPXpre killed $x21, killed $x20, $sp, -2
|
||||
# CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -32
|
||||
# CHECK-NEXT: frame-setup STPXi killed $x21, killed $x20, $sp, 2
|
||||
# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -2
|
||||
# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
|
||||
# CHECK-NEXT: $x20 = IMPLICIT_DEF
|
||||
# CHECK-NEXT: $x21 = IMPLICIT_DEF
|
||||
# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 2
|
||||
# CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0
|
||||
# CHECK-NEXT: $sp, $x21, $x20 = frame-destroy LDPXpost $sp, 2
|
||||
# CHECK-NEXT: $x21, $x20 = frame-destroy LDPXi $sp, 2
|
||||
# CHECK-NEXT: $sp, $[[SCRATCH]] = frame-destroy LDRXpost $sp, 32
|
||||
# CHECK-NEXT: RET_ReallyLR
|
||||
name: test_allocate_sve_gpr_callee_saves
|
||||
fixedStack:
|
||||
@@ -119,3 +130,201 @@ body: |
|
||||
bb.0.entry:
|
||||
RET_ReallyLR
|
||||
---
|
||||
...
|
||||
# +----------+
|
||||
# | x20, x21 | // callee saves
|
||||
# +----------+
|
||||
# | %stack.0 | // scalable @ SP + 16b + 32 scalable bytes
|
||||
# | %stack.1 | // scalable @ SP + 16b + 16 scalable bytes
|
||||
# | %stack.2 | // scalable @ SP + 16b + 14 scalable bytes
|
||||
# +----------+
|
||||
# | %stack.0 | // not scalable
|
||||
# +----------+ <- SP
|
||||
|
||||
# CHECK-LABEL: name: test_address_sve
|
||||
# CHECK: stackSize: 32
|
||||
|
||||
# CHECK: bb.0.entry:
|
||||
# CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -16
|
||||
# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3
|
||||
# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
|
||||
|
||||
# CHECK-NEXT: $[[TMP:x[0-9]+]] = ADDXri $sp, 16
|
||||
# CHECK-NEXT: STR_ZXI $z0, killed $[[TMP]], 2
|
||||
# CHECK-NEXT: $[[TMP:x[0-9]+]] = ADDXri $sp, 16
|
||||
# CHECK-NEXT: STR_ZXI $z1, killed $[[TMP]], 1
|
||||
# CHECK-NEXT: $[[TMP:x[0-9]+]] = ADDXri $sp, 16
|
||||
# CHECK-NEXT: STR_PXI $p0, killed $[[TMP]], 7
|
||||
|
||||
# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 3
|
||||
# CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0
|
||||
# CHECK-NEXT: $sp, $[[SCRATCH]] = frame-destroy LDRXpost $sp, 16
|
||||
# CHECK-NEXT: RET_ReallyLR
|
||||
name: test_address_sve
|
||||
frameInfo:
|
||||
maxAlignment: 16
|
||||
fixedStack:
|
||||
- { id: 0, stack-id: sve-vec, size: 16, alignment: 8, offset: -16 }
|
||||
- { id: 1, stack-id: sve-vec, size: 16, alignment: 8, offset: -32 }
|
||||
- { id: 2, stack-id: sve-vec, size: 2, alignment: 2, offset: -34 }
|
||||
stack:
|
||||
- { id: 0, stack-id: default, size: 16, alignment: 8 }
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $z0, $z1, $p0
|
||||
|
||||
STR_ZXI $z0, %fixed-stack.0, 0
|
||||
STR_ZXI $z1, %fixed-stack.1, 0
|
||||
STR_PXI $p0, %fixed-stack.2, 0
|
||||
|
||||
RET_ReallyLR
|
||||
---
|
||||
...
|
||||
# +-----------+
|
||||
# | x20, x21 | // callee saves
|
||||
# | lr, fp | // frame record
|
||||
# +-----------+ <- FP
|
||||
# | %fstack.0 | // scalable @ FP - 16 scalable bytes
|
||||
# | %fstack.1 | // scalable @ FP - 32 scalable bytes
|
||||
# | %fstack.2 | // scalable @ FP - 34 scalable bytes
|
||||
# +-----------+
|
||||
# | %stack.0 | // not scalable
|
||||
# +-----------+ <- SP
|
||||
|
||||
# CHECK-LABEL: name: test_address_sve_fp
|
||||
# CHECK: stackSize: 32
|
||||
|
||||
# CHECK: bb.0.entry:
|
||||
# CHECK-NEXT: $sp = frame-setup STPXpre killed $fp, killed $lr, $sp, -2
|
||||
# CHECK-NEXT: $fp = frame-setup ADDXri $sp, 0, 0
|
||||
# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -3
|
||||
# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
|
||||
|
||||
# CHECK-NEXT: STR_ZXI $z0, $fp, -1
|
||||
# CHECK-NEXT: STR_ZXI $z1, $fp, -2
|
||||
# CHECK-NEXT: STR_PXI $p0, $fp, -17
|
||||
|
||||
# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 3
|
||||
# CHECK: $sp = frame-destroy ADDXri $sp, 16, 0
|
||||
# CHECK-NEXT: $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2
|
||||
# CHECK-NEXT: RET_ReallyLR
|
||||
name: test_address_sve_fp
|
||||
frameInfo:
|
||||
maxAlignment: 16
|
||||
isFrameAddressTaken: true
|
||||
fixedStack:
|
||||
- { id: 0, stack-id: sve-vec, size: 16, alignment: 8, offset: -16 }
|
||||
- { id: 1, stack-id: sve-vec, size: 16, alignment: 8, offset: -32 }
|
||||
- { id: 2, stack-id: sve-vec, size: 2, alignment: 2, offset: -34 }
|
||||
stack:
|
||||
- { id: 0, stack-id: default, size: 16, alignment: 8 }
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $z0, $z1, $p0
|
||||
|
||||
STR_ZXI $z0, %fixed-stack.0, 0
|
||||
STR_ZXI $z1, %fixed-stack.1, 0
|
||||
STR_PXI $p0, %fixed-stack.2, 0
|
||||
|
||||
RET_ReallyLR
|
||||
---
|
||||
...
|
||||
# +-----------+
|
||||
# | %fstack.1 | // stack arg @ SP + 16 scalable bytes + 32 bytes.
|
||||
# +-----------+
|
||||
# |callee save| // register saved as scratch reg.
|
||||
# +-----------+
|
||||
# | %fstack.1 | // vector of 16 scalable bytes
|
||||
# +---------- +
|
||||
# | %stack.0 | // not scalable, 16 bytes
|
||||
# +-----------+ <- SP
|
||||
# CHECK-LABEL: name: test_stack_arg_sve
|
||||
# CHECK: stackSize: 32
|
||||
|
||||
# CHECK: bb.0.entry:
|
||||
# CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -16
|
||||
# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1
|
||||
# CHECK-NEXT: $sp = frame-setup SUBXri $sp, 16, 0
|
||||
|
||||
# CHECK: $[[TMP:x[0-9]+]] = ADDVL_XXI $sp, 1
|
||||
# CHECK-NEXT: $x0 = LDRXui killed $[[TMP]], 4
|
||||
|
||||
# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 1
|
||||
# CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0
|
||||
# CHECK-NEXT: $sp, $[[SCRATCH]] = frame-destroy LDRXpost $sp, 16
|
||||
# CHECK-NEXT: RET_ReallyLR
|
||||
name: test_stack_arg_sve
|
||||
fixedStack:
|
||||
- { id: 0, stack-id: default, size: 16, alignment: 16, offset: 0 }
|
||||
- { id: 1, stack-id: sve-vec, size: 16, alignment: 16, offset: -16 }
|
||||
stack:
|
||||
- { id: 0, stack-id: default, size: 16, alignment: 16 }
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $x0
|
||||
|
||||
$x0 = LDRXui %fixed-stack.0, 0
|
||||
RET_ReallyLR
|
||||
---
|
||||
...
|
||||
# Test that the address to access an SVE data vector at an offset that
|
||||
# does not fit its immediate, is correctly materialized.
|
||||
# +----------+
|
||||
# |calleesave| // register saved as scratch reg.
|
||||
# +----------+
|
||||
# | %stack.0 | // one SVE data object @ SP + 256 scalable bytes.
|
||||
# |::::::::::|
|
||||
# |: :|
|
||||
# |:%stack.1:| // Large object
|
||||
# |: :|
|
||||
# |::::::::::|
|
||||
# +----------+ <- SP
|
||||
# CHECK-LABEL: name: test_address_sve_out_of_range
|
||||
# CHECK: stackSize: 16
|
||||
|
||||
# CHECK: bb.0.entry:
|
||||
# CHECK-NEXT: $sp = frame-setup STRXpre killed $[[SCRATCH:[a-z0-9]+]], $sp, -16
|
||||
# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -32
|
||||
# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -32
|
||||
# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -32
|
||||
# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -32
|
||||
# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -32
|
||||
# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -32
|
||||
# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -32
|
||||
# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -32
|
||||
# CHECK-NEXT: $sp = frame-setup ADDVL_XXI $sp, -1
|
||||
|
||||
# CHECK-NEXT: $[[TMP2:x[0-9]+]] = ADDVL_XXI $sp, 1
|
||||
# CHECK-NEXT: STR_ZXI $z0, killed $[[TMP2]], 255
|
||||
|
||||
# CHECK-NEXT: $[[TMP2:x[0-9]+]] = ADDPL_XXI $sp, 1
|
||||
# CHECK-NEXT: STR_PXI $p0, killed $[[TMP2]], 255
|
||||
|
||||
# CHECK: $sp = frame-destroy ADDVL_XXI $sp, 31
|
||||
# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 31
|
||||
# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 31
|
||||
# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 31
|
||||
# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 31
|
||||
# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 31
|
||||
# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 31
|
||||
# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 31
|
||||
# CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 9
|
||||
# CHECK-NEXT: $sp, $[[SCRATCH]] = frame-destroy LDRXpost $sp, 16
|
||||
# CHECK-NEXT: RET_ReallyLR
|
||||
name: test_address_sve_out_of_range
|
||||
frameInfo:
|
||||
maxAlignment: 16
|
||||
fixedStack:
|
||||
- { id: 0, stack-id: sve-vec, size: 16, alignment: 16, offset: -16 }
|
||||
- { id: 1, stack-id: sve-vec, size: 3584, alignment: 16, offset: -3600 }
|
||||
- { id: 2, stack-id: sve-vec, size: 512, alignment: 16, offset: -4112 }
|
||||
|
||||
body: |
|
||||
bb.0.entry:
|
||||
liveins: $z0, $p0
|
||||
|
||||
STR_ZXI $z0, %fixed-stack.0, 0
|
||||
STR_PXI $p0, %fixed-stack.1, 0
|
||||
|
||||
RET_ReallyLR
|
||||
---
|
||||
|
||||
Reference in New Issue
Block a user