[AArch64][GlobalISel] Support -tailcallopt

This adds support for `-tailcallopt` tail calls to CallLowering. This
piggy-backs off the changes from D67577, since doing it without a bit of
refactoring gets extremely ugly.

Support is basically ported from AArch64ISelLowering. The main difference here
is that tail calls in `-tailcallopt` change the ABI, so there's some extra
bookkeeping for the stack.

Show that we are correctly lowering these by updating tail-call.ll.

Also show that we don't do anything strange in general by updating
fastcc-reserved.ll, which passes `-tailcallopt`, but doesn't emit any tail
calls.

Differential Revision: https://reviews.llvm.org/D67580

llvm-svn: 372177
This commit is contained in:
Jessica Paquette 2019-09-17 20:24:23 +00:00
parent 02bf2ff5f8
commit a00d53313e
3 changed files with 107 additions and 24 deletions

View File

@ -130,9 +130,11 @@ struct CallReturnHandler : public IncomingArgHandler {
struct OutgoingArgHandler : public CallLowering::ValueHandler {
OutgoingArgHandler(MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI,
MachineInstrBuilder MIB, CCAssignFn *AssignFn,
CCAssignFn *AssignFnVarArg, bool IsTailCall = false)
CCAssignFn *AssignFnVarArg, bool IsTailCall = false,
int FPDiff = 0)
: ValueHandler(MIRBuilder, MRI, AssignFn), MIB(MIB),
AssignFnVarArg(AssignFnVarArg), IsTailCall(IsTailCall), StackSize(0) {}
AssignFnVarArg(AssignFnVarArg), IsTailCall(IsTailCall), FPDiff(FPDiff),
StackSize(0) {}
Register getStackAddress(uint64_t Size, int64_t Offset,
MachinePointerInfo &MPO) override {
@ -141,8 +143,7 @@ struct OutgoingArgHandler : public CallLowering::ValueHandler {
LLT s64 = LLT::scalar(64);
if (IsTailCall) {
// TODO: For -tailcallopt tail calls, Offset will need FPDiff like in
// ISelLowering.
Offset += FPDiff;
int FI = MF.getFrameInfo().CreateFixedObject(Size, Offset, true);
Register FIReg = MRI.createGenericVirtualRegister(p0);
MIRBuilder.buildFrameIndex(FIReg, FI);
@ -201,10 +202,18 @@ struct OutgoingArgHandler : public CallLowering::ValueHandler {
MachineInstrBuilder MIB;
CCAssignFn *AssignFnVarArg;
bool IsTailCall;
/// For tail calls, the byte offset of the call's argument area from the
/// callee's. Unused elsewhere.
int FPDiff;
uint64_t StackSize;
};
} // namespace
static bool doesCalleeRestoreStack(CallingConv::ID CallConv, bool TailCallOpt) {
return CallConv == CallingConv::Fast && TailCallOpt;
}
void AArch64CallLowering::splitToValueTypes(
const ArgInfo &OrigArg, SmallVectorImpl<ArgInfo> &SplitArgs,
const DataLayout &DL, MachineRegisterInfo &MRI, CallingConv::ID CallConv) const {
@ -408,9 +417,21 @@ bool AArch64CallLowering::lowerFormalArguments(
FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
}
// TODO: Port checks for stack to restore for -tailcallopt from ISelLowering.
// We need to keep track of the size of function stacks for tail call
// optimization. When we tail call, we need to check if the callee's arguments
if (doesCalleeRestoreStack(F.getCallingConv(),
MF.getTarget().Options.GuaranteedTailCallOpt)) {
// We have a non-standard ABI, so why not make full use of the stack that
// we're going to pop? It must be aligned to 16 B in any case.
StackOffset = alignTo(StackOffset, 16);
// If we're expected to restore the stack (e.g. fastcc), then we'll be
// adding a multiple of 16.
FuncInfo->setArgumentStackToRestore(StackOffset);
// Our own callers will guarantee that the space is free by giving an
// aligned value to CALLSEQ_START.
}
// When we tail call, we need to check if the callee's arguments
// will fit on the caller's stack. So, whenever we lower formal arguments,
// we should keep track of this information, since we might lower a tail call
// in this function later.
@ -639,9 +660,12 @@ bool AArch64CallLowering::isEligibleForTailCallOptimization(
}
}
// If we have -tailcallopt and matching CCs, at this point, we could return
// true. However, we don't have full tail call support yet. So, continue
// checking. We want to emit a sibling call.
// If we have -tailcallopt, then we're done.
if (MF.getTarget().Options.GuaranteedTailCallOpt)
return canGuaranteeTCO(CalleeCC) && CalleeCC == CallerF.getCallingConv();
// We don't have -tailcallopt, so we're allowed to change the ABI (sibcall).
// Try to find cases where we can do that.
// I want anyone implementing a new calling convention to think long and hard
// about this assert.
@ -695,6 +719,9 @@ bool AArch64CallLowering::lowerTailCall(
MachineRegisterInfo &MRI = MF.getRegInfo();
const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
// True when we're tail calling, but without -tailcallopt.
bool IsSibCall = !MF.getTarget().Options.GuaranteedTailCallOpt;
// TODO: Right now, regbankselect doesn't know how to handle the rtcGPR64
// register class. Until we can do that, we should fall back here.
if (F.hasFnAttribute("branch-target-enforcement")) {
@ -704,18 +731,22 @@ bool AArch64CallLowering::lowerTailCall(
}
// Find out which ABI gets to decide where things go.
CallingConv::ID CalleeCC = Info.CallConv;
CCAssignFn *AssignFnFixed =
TLI.CCAssignFnForCall(Info.CallConv, /*IsVarArg=*/false);
TLI.CCAssignFnForCall(CalleeCC, /*IsVarArg=*/false);
CCAssignFn *AssignFnVarArg =
TLI.CCAssignFnForCall(Info.CallConv, /*IsVarArg=*/true);
TLI.CCAssignFnForCall(CalleeCC, /*IsVarArg=*/true);
MachineInstrBuilder CallSeqStart;
if (!IsSibCall)
CallSeqStart = MIRBuilder.buildInstr(AArch64::ADJCALLSTACKDOWN);
unsigned Opc = getCallOpcode(F, Info.Callee.isReg(), true);
auto MIB = MIRBuilder.buildInstrNoInsert(Opc);
MIB.add(Info.Callee);
// Add the byte offset for the tail call. We only have sibling calls, so this
// is always 0.
// TODO: Handle tail calls where we will have a different value here.
// Byte offset for the tail call. When we are sibcalling, this will always
// be 0.
MIB.addImm(0);
// Tell the call which registers are clobbered.
@ -728,13 +759,64 @@ bool AArch64CallLowering::lowerTailCall(
if (TRI->isAnyArgRegReserved(MF))
TRI->emitReservedArgRegCallError(MF);
// FPDiff is the byte offset of the call's argument area from the callee's.
// Stores to callee stack arguments will be placed in FixedStackSlots offset
// by this amount for a tail call. In a sibling call it must be 0 because the
// caller will deallocate the entire stack and the callee still expects its
// arguments to begin at SP+0.
int FPDiff = 0;
// This will be 0 for sibcalls, potentially nonzero for tail calls produced
// by -tailcallopt. For sibcalls, the memory operands for the call are
// already available in the caller's incoming argument space.
unsigned NumBytes = 0;
if (!IsSibCall) {
// We aren't sibcalling, so we need to compute FPDiff. We need to do this
// before handling assignments, because FPDiff must be known for memory
// arguments.
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
unsigned NumReusableBytes = FuncInfo->getBytesInStackArgArea();
SmallVector<CCValAssign, 16> OutLocs;
CCState OutInfo(CalleeCC, false, MF, OutLocs, F.getContext());
analyzeArgInfo(OutInfo, OutArgs,
*TLI.CCAssignFnForCall(CalleeCC, Info.IsVarArg));
// The callee will pop the argument stack as a tail call. Thus, we must
// keep it 16-byte aligned.
NumBytes = alignTo(OutInfo.getNextStackOffset(), 16);
// FPDiff will be negative if this tail call requires more space than we
// would automatically have in our incoming argument space. Positive if we
// actually shrink the stack.
FPDiff = NumReusableBytes - NumBytes;
// The stack pointer must be 16-byte aligned at all times it's used for a
// memory operation, which in practice means at *all* times and in
// particular across call boundaries. Therefore our own arguments started at
// a 16-byte aligned SP and the delta applied for the tail call should
// satisfy the same constraint.
assert(FPDiff % 16 == 0 && "unaligned stack on tail call");
}
// Do the actual argument marshalling.
SmallVector<unsigned, 8> PhysRegs;
OutgoingArgHandler Handler(MIRBuilder, MRI, MIB, AssignFnFixed,
AssignFnVarArg, true);
AssignFnVarArg, true, FPDiff);
if (!handleAssignments(MIRBuilder, OutArgs, Handler))
return false;
// If we have -tailcallopt, we need to adjust the stack. We'll do the call
// sequence start and end here.
if (!IsSibCall) {
MIB->getOperand(1).setImm(FPDiff);
CallSeqStart.addImm(NumBytes).addImm(0);
// End the call sequence *before* emitting the call. Normally, we would
// tidy the frame up after the call. However, here, we've laid out the
// parameters so that when SP is reset, they will be in the correct
// location.
MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP).addImm(NumBytes).addImm(0);
}
// Now we can add the actual call instruction to the correct basic block.
MIRBuilder.insertInstr(MIB);
@ -765,12 +847,6 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
return false;
}
if (Info.IsTailCall && MF.getTarget().Options.GuaranteedTailCallOpt) {
// TODO: Until we lower all tail calls, we should fall back on this.
LLVM_DEBUG(dbgs() << "Cannot handle -tailcallopt yet.\n");
return false;
}
SmallVector<ArgInfo, 8> OutArgs;
for (auto &OrigArg : Info.OrigArgs) {
splitToValueTypes(OrigArg, OutArgs, DL, MRI, Info.CallConv);
@ -848,10 +924,16 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
MIRBuilder.buildCopy(Info.SwiftErrorVReg, Register(AArch64::X21));
}
uint64_t CalleePopBytes =
doesCalleeRestoreStack(Info.CallConv,
MF.getTarget().Options.GuaranteedTailCallOpt)
? alignTo(Handler.StackSize, 16)
: 0;
CallSeqStart.addImm(Handler.StackSize).addImm(0);
MIRBuilder.buildInstr(AArch64::ADJCALLSTACKUP)
.addImm(Handler.StackSize)
.addImm(0);
.addImm(CalleePopBytes);
return true;
}

View File

@ -1,4 +1,5 @@
; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s
; RUN: llc -global-isel -global-isel-abort=1 -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s
; This test is designed to be run in the situation where the
; call-frame is not reserved (hence disable-fp-elim), but where

View File

@ -1,5 +1,5 @@
; RUN: llc -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s --check-prefixes=SDAG,COMMON
; RUN: llc -global-isel -global-isel-abort=2 -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s --check-prefixes=GISEL,COMMON
; RUN: llc -global-isel -global-isel-abort=1 -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu -tailcallopt | FileCheck %s --check-prefixes=GISEL,COMMON
declare fastcc void @callee_stack0()
declare fastcc void @callee_stack8([8 x i64], i64)