mirror of
https://github.com/RPCS3/llvm-mirror.git
synced 2025-01-25 05:15:58 +00:00
[AArch64][GlobalISel] Support -tailcallopt
This adds support for `-tailcallopt` tail calls to CallLowering. This piggy-backs off the changes from D67577, since doing it without a bit of refactoring gets extremely ugly. Support is basically ported from AArch64ISelLowering. The main difference here is that tail calls in `-tailcallopt` change the ABI, so there's some extra bookkeeping for the stack. Show that we are correctly lowering these by updating tail-call.ll. Also show that we don't do anything strange in general by updating fastcc-reserved.ll, which passes `-tailcallopt`, but doesn't emit any tail calls. Differential Revision: https://reviews.llvm.org/D67580 llvm-svn: 372177
This commit is contained in:
parent
02bf2ff5f8
commit
a00d53313e
@ -130,9 +130,11 @@ struct CallReturnHandler : public IncomingArgHandler {
|
||||
struct OutgoingArgHandler : public CallLowering::ValueHandler {
|
||||
OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
|
||||
MachineInstrBuilder MIB, CCAssignFn *AssignFn,
|
||||
CCAssignFn *AssignFnVarArg, bool IsTailCall = false)
|
||||
CCAssignFn *AssignFnVarArg, bool IsTailCall = false,
|
||||
int FPDiff = 0)
|
||||
: ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB),
|
||||
AssignFnVarArg(AssignFnVarArg), IsTailCall(IsTailCall), StackSize(0) {}
|
||||
AssignFnVarArg(AssignFnVarArg), IsTailCall(IsTailCall), FPDiff(FPDiff),
|
||||
StackSize(0) {}
|
||||
|
||||
Register getStackAddress(uint64_t Size, int64_t Offset,
|
||||
MachinePointerInfo &MPO) override {
|
||||
@ -141,8 +143,7 @@ struct OutgoingArgHandler : public CallLowering::ValueHandler {
|
||||
LLT s64 = LLT::scalar(64);
|
||||
|
||||
if (IsTailCall) {
|
||||
// TODO: For -tailcallopt tail calls, Offset will need FPDiff like in
|
||||
// ISelLowering.
|
||||
Offset += FPDiff;
|
||||
int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true);
|
||||
Register FIReg = MRI.createGenericVirtualRegister(p0);
|
||||
MIRBuilder.buildFrameIndex(FIReg, FI);
|
||||
@ -201,10 +202,18 @@ struct OutgoingArgHandler : public CallLowering::ValueHandler {
|
||||
MachineInstrBuilder MIB;
|
||||
CCAssignFn *AssignFnVarArg;
|
||||
bool IsTailCall;
|
||||
|
||||
/// For tail calls, the byte offset of the call's argument area from the
|
||||
/// callee's. Unused elsewhere.
|
||||
int FPDiff;
|
||||
uint64_t StackSize;
|
||||
};
|
||||
} // namespace
|
||||
|
||||
static bool doesCalleeRestoreStack(CallingConv::ID CallConv, bool TailCallOpt) {
|
||||
return CallConv == CallingConv::Fast && TailCallOpt;
|
||||
}
|
||||
|
||||
void AArch64CallLowering::splitToValueTypes(
|
||||
const ArgInfo &OrigArg, SmallVectorImpl<ArgInfo> &SplitArgs,
|
||||
const DataLayout &DL, MachineRegisterInfo &MRI, CallingConv::ID CallConv) const {
|
||||
@ -408,9 +417,21 @@ bool AArch64CallLowering::lowerFormalArguments(
|
||||
FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
|
||||
}
|
||||
|
||||
// TODO: Port checks for stack to restore for -tailcallopt from ISelLowering.
|
||||
// We need to keep track of the size of function stacks for tail call
|
||||
// optimization. When we tail call, we need to check if the callee's arguments
|
||||
if (doesCalleeRestoreStack(F.getCallingConv(),
|
||||
MF.getTarget().Options.GuaranteedTailCallOpt)) {
|
||||
// We have a non-standard ABI, so why not make full use of the stack that
|
||||
// we're going to pop? It must be aligned to 16 B in any case.
|
||||
StackOffset = alignTo(StackOffset, 16);
|
||||
|
||||
// If we're expected to restore the stack (e.g. fastcc), then we'll be
|
||||
// adding a multiple of 16.
|
||||
FuncInfo->setArgumentStackToRestore(StackOffset);
|
||||
|
||||
// Our own callers will guarantee that the space is free by giving an
|
||||
// aligned value to CALLSEQ_START.
|
||||
}
|
||||
|
||||
// When we tail call, we need to check if the callee's arguments
|
||||
// will fit on the caller's stack. So, whenever we lower formal arguments,
|
||||
// we should keep track of this information, since we might lower a tail call
|
||||
// in this function later.
|
||||
@ -639,9 +660,12 @@ bool AArch64CallLowering::isEligibleForTailCallOptimization(
|
||||
}
|
||||
}
|
||||
|
||||
// If we have -tailcallopt and matching CCs, at this point, we could return
|
||||
// true. However, we don't have full tail call support yet. So, continue
|
||||
// checking. We want to emit a sibling call.
|
||||
// If we have -tailcallopt, then we're done.
|
||||
if (MF.getTarget().Options.GuaranteedTailCallOpt)
|
||||
return canGuaranteeTCO(CalleeCC) && CalleeCC == CallerF.getCallingConv();
|
||||
|
||||
// We don't have -tailcallopt, so we're allowed to change the ABI (sibcall).
|
||||
// Try to find cases where we can do that.
|
||||
|
||||
// I want anyone implementing a new calling convention to think long and hard
|
||||
// about this assert.
|
||||
@ -695,6 +719,9 @@ bool AArch64CallLowering::lowerTailCall(
|
||||
MachineRegisterInfo &MRI = MF.getRegInfo();
|
||||
const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
|
||||
|
||||
// True when we're tail calling, but without -tailcallopt.
|
||||
bool IsSibCall = !MF.getTarget().Options.GuaranteedTailCallOpt;
|
||||
|
||||
// TODO: Right now, regbankselect doesn't know how to handle the rtcGPR64
|
||||
// register class. Until we can do that, we should fall back here.
|
||||
if (F.hasFnAttribute("branch-target-enforcement")) {
|
||||
@ -704,18 +731,22 @@ bool AArch64CallLowering::lowerTailCall(
|
||||
}
|
||||
|
||||
// Find out which ABI gets to decide where things go.
|
||||
CallingConv::ID CalleeCC = Info.CallConv;
|
||||
CCAssignFn *AssignFnFixed =
|
||||
TLI.CCAssignFnForCall(Info.CallConv, /*IsVarArg=*/false);
|
||||
TLI.CCAssignFnForCall(CalleeCC, /*IsVarArg=*/false);
|
||||
CCAssignFn *AssignFnVarArg =
|
||||
TLI.CCAssignFnForCall(Info.CallConv, /*IsVarArg=*/true);
|
||||
TLI.CCAssignFnForCall(CalleeCC, /*IsVarArg=*/true);
|
||||
|
||||
MachineInstrBuilder CallSeqStart;
|
||||
if (!IsSibCall)
|
||||
CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN);
|
||||
|
||||
unsigned Opc = getCallOpcode(F, Info.Callee.isReg(), true);
|
||||
auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
|
||||
MIB.add(Info.Callee);
|
||||
|
||||
// Add the byte offset for the tail call. We only have sibling calls, so this
|
||||
// is always 0.
|
||||
// TODO: Handle tail calls where we will have a different value here.
|
||||
// Byte offset for the tail call. When we are sibcalling, this will always
|
||||
// be 0.
|
||||
MIB.addImm(0);
|
||||
|
||||
// Tell the call which registers are clobbered.
|
||||
@ -728,13 +759,64 @@ bool AArch64CallLowering::lowerTailCall(
|
||||
if (TRI->isAnyArgRegReserved(MF))
|
||||
TRI->emitReservedArgRegCallError(MF);
|
||||
|
||||
// FPDiff is the byte offset of the call's argument area from the callee's.
|
||||
// Stores to callee stack arguments will be placed in FixedStackSlots offset
|
||||
// by this amount for a tail call. In a sibling call it must be 0 because the
|
||||
// caller will deallocate the entire stack and the callee still expects its
|
||||
// arguments to begin at SP+0.
|
||||
int FPDiff = 0;
|
||||
|
||||
// This will be 0 for sibcalls, potentially nonzero for tail calls produced
|
||||
// by -tailcallopt. For sibcalls, the memory operands for the call are
|
||||
// already available in the caller's incoming argument space.
|
||||
unsigned NumBytes = 0;
|
||||
if (!IsSibCall) {
|
||||
// We aren't sibcalling, so we need to compute FPDiff. We need to do this
|
||||
// before handling assignments, because FPDiff must be known for memory
|
||||
// arguments.
|
||||
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
|
||||
unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
|
||||
SmallVector<CCValAssign, 16> OutLocs;
|
||||
CCState OutInfo(CalleeCC, false, MF, OutLocs, F.getContext());
|
||||
analyzeArgInfo(OutInfo, OutArgs,
|
||||
*TLI.CCAssignFnForCall(CalleeCC, Info.IsVarArg));
|
||||
|
||||
// The callee will pop the argument stack as a tail call. Thus, we must
|
||||
// keep it 16-byte aligned.
|
||||
NumBytes = alignTo(OutInfo.getNextStackOffset(), 16);
|
||||
|
||||
// FPDiff will be negative if this tail call requires more space than we
|
||||
// would automatically have in our incoming argument space. Positive if we
|
||||
// actually shrink the stack.
|
||||
FPDiff = NumReusableBytes - NumBytes;
|
||||
|
||||
// The stack pointer must be 16-byte aligned at all times it's used for a
|
||||
// memory operation, which in practice means at *all* times and in
|
||||
// particular across call boundaries. Therefore our own arguments started at
|
||||
// a 16-byte aligned SP and the delta applied for the tail call should
|
||||
// satisfy the same constraint.
|
||||
assert(FPDiff % 16 == 0 && "unaligned stack on tail call");
|
||||
}
|
||||
|
||||
// Do the actual argument marshalling.
|
||||
SmallVector<unsigned, 8> PhysRegs;
|
||||
OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFnFixed,
|
||||
AssignFnVarArg, true);
|
||||
AssignFnVarArg, true, FPDiff);
|
||||
if (!handleAssignments(MIRBuilder, OutArgs, Handler))
|
||||
return false;
|
||||
|
||||
// If we have -tailcallopt, we need to adjust the stack. We'll do the call
|
||||
// sequence start and end here.
|
||||
if (!IsSibCall) {
|
||||
MIB->getOperand(1).setImm(FPDiff);
|
||||
CallSeqStart.addImm(NumBytes).addImm(0);
|
||||
// End the call sequence *before* emitting the call. Normally, we would
|
||||
// tidy the frame up after the call. However, here, we've laid out the
|
||||
// parameters so that when SP is reset, they will be in the correct
|
||||
// location.
|
||||
MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP).addImm(NumBytes).addImm(0);
|
||||
}
|
||||
|
||||
// Now we can add the actual call instruction to the correct basic block.
|
||||
MIRBuilder.insertInstr(MIB);
|
||||
|
||||
@ -765,12 +847,6 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
|
||||
return false;
|
||||
}
|
||||
|
||||
if (Info.IsTailCall && MF.getTarget().Options.GuaranteedTailCallOpt) {
|
||||
// TODO: Until we lower all tail calls, we should fall back on this.
|
||||
LLVM_DEBUG(dbgs() << "Cannot handle -tailcallopt yet.\n");
|
||||
return false;
|
||||
}
|
||||
|
||||
SmallVector<ArgInfo, 8> OutArgs;
|
||||
for (auto &OrigArg : Info.OrigArgs) {
|
||||
splitToValueTypes(OrigArg, OutArgs, DL, MRI, Info.CallConv);
|
||||
@ -848,10 +924,16 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
|
||||
MIRBuilder.buildCopy(Info.SwiftErrorVReg, Register(AArch64::X21));
|
||||
}
|
||||
|
||||
uint64_t CalleePopBytes =
|
||||
doesCalleeRestoreStack(Info.CallConv,
|
||||
MF.getTarget().Options.GuaranteedTailCallOpt)
|
||||
? alignTo(Handler.StackSize, 16)
|
||||
: 0;
|
||||
|
||||
CallSeqStart.addImm(Handler.StackSize).addImm(0);
|
||||
MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP)
|
||||
.addImm(Handler.StackSize)
|
||||
.addImm(0);
|
||||
.addImm(CalleePopBytes);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -1,4 +1,5 @@
|
||||
; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s
|
||||
; RUN: llc -global-isel -global-isel-abort=1 -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s
|
||||
|
||||
; This test is designed to be run in the situation where the
|
||||
; call-frame is not reserved (hence disable-fp-elim), but where
|
||||
|
@ -1,5 +1,5 @@
|
||||
; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s --check-prefixes=SDAG,COMMON
|
||||
; RUN: llc -global-isel -global-isel-abort=2 -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s --check-prefixes=GISEL,COMMON
|
||||
; RUN: llc -global-isel -global-isel-abort=1 -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s --check-prefixes=GISEL,COMMON
|
||||
|
||||
declare fastcc void @callee_stack0()
|
||||
declare fastcc void @callee_stack8([8 x i64], i64)
|
||||
|
Loading…
x
Reference in New Issue
Block a user