Improve tail call optimized call's argument lowering. Before this

commit all arguments where moved to the stack slot where they would
reside on a normal function call before the lowering to the tail call
stack slot. This was done to prevent arguments overwriting each other.
Now only arguments sourcing from a FORMAL_ARGUMENTS node or a
CopyFromReg node with virtual register (could also be a caller's
argument) are lowered indirectly.

 --This line, and those below, will be ignored--

M    X86/X86ISelLowering.cpp
M    X86/README.txt


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@45867 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Arnold Schwaighofer 2008-01-11 16:49:42 +00:00
parent 719eb02255
commit c8ab8cde43
2 changed files with 79 additions and 91 deletions

View File

@ -1330,10 +1330,11 @@ L5:
Tail call optimization improvements: Tail call optimization currently Tail call optimization improvements: Tail call optimization currently
pushes all arguments on the top of the stack (their normal place for pushes all arguments on the top of the stack (their normal place for
non-tail call optimized calls) before moving them to actual stack non-tail call optimized calls) that source from the callers arguments
slot. This is done to prevent overwriting of parameters (see example or that source from a virtual register (also possibly sourcing from
below) that might be used, since the arguments of the callee callers arguments).
overwrites caller's arguments. This is done to prevent overwriting of parameters (see example
below) that might be used later.
example: example:
@ -1352,13 +1353,6 @@ arg2 of the caller.
Possible optimizations: Possible optimizations:
- Only push those arguments to the top of the stack that are actual
parameters of the caller function and have no local value in the
caller.
In the above example local does not need to be pushed onto the top
of the stack as it is definitely not a caller's function
parameter.
- Analyse the actual parameters of the callee to see which would - Analyse the actual parameters of the callee to see which would
overwrite a caller parameter which is used by the callee and only overwrite a caller parameter which is used by the callee and only
@ -1380,35 +1374,6 @@ Possible optimizations:
Here we need to push the arguments because they overwrite each Here we need to push the arguments because they overwrite each
other. other.
Code for lowering directly onto callers arguments:
+ SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
+ SmallVector<SDOperand, 8> MemOpChains;
+
+ SDOperand FramePtr;
+ SDOperand PtrOff;
+ SDOperand FIN;
+ int FI = 0;
+ // Walk the register/memloc assignments, inserting copies/loads.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
+
+ ....
+
+ if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else {
+ assert(VA.isMemLoc());
+ // create frame index
+ int32_t Offset = VA.getLocMemOffset()+FPDiff;
+ uint32_t OpSize = (MVT::getSizeInBits(VA.getLocVT())+7)/8;
+ FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset);
+ FIN = DAG.getFrameIndex(FI, MVT::i32);
+ // store relative to framepointer
+ MemOpChains.push_back(DAG.getStore(Chain, Arg, FIN, NULL, 0));
+ }
+ }
//===---------------------------------------------------------------------===// //===---------------------------------------------------------------------===//
main () main ()

View File

@ -1007,6 +1007,45 @@ X86TargetLowering::NameDecorationForFORMAL_ARGUMENTS(SDOperand Op) {
return None; return None;
} }
// IsPossiblyOverriddenArgumentOfTailCall - Check if the operand could possibly
// be overridden when lowering the outgoing arguments in a tail call. Currently
// the implementation of this call is very conservative and assumes all
// arguments sourcing from FORMAL_ARGUMENTS or a CopyFromReg with virtual
// registers would be overridden by direct lowering.
// Possible improvement:
// Check FORMAL_ARGUMENTS corresponding MERGE_VALUES for CopyFromReg nodes
// indicating inreg passed arguments which also need not be lowered to a safe
// stack slot.
static bool IsPossiblyOverriddenArgumentOfTailCall(SDOperand Op) {
RegisterSDNode * OpReg = NULL;
if (Op.getOpcode() == ISD::FORMAL_ARGUMENTS ||
(Op.getOpcode()== ISD::CopyFromReg &&
(OpReg = cast<RegisterSDNode>(Op.getOperand(1))) &&
OpReg->getReg() >= MRegisterInfo::FirstVirtualRegister))
return true;
return false;
}
// GetMemCpyWithFlags - Create a MemCpy using function's parameter flag.
static SDOperand
GetMemCpyWithFlags(SelectionDAG &DAG, unsigned Flags, SDOperand From,
SDOperand To, SDOperand Chain) {
unsigned Align = 1 << ((Flags & ISD::ParamFlags::ByValAlign) >>
ISD::ParamFlags::ByValAlignOffs);
unsigned Size = (Flags & ISD::ParamFlags::ByValSize) >>
ISD::ParamFlags::ByValSizeOffs;
SDOperand AlignNode = DAG.getConstant(Align, MVT::i32);
SDOperand SizeNode = DAG.getConstant(Size, MVT::i32);
SDOperand AlwaysInline = DAG.getConstant(1, MVT::i32);
return DAG.getMemcpy(Chain, To, From, SizeNode, AlignNode,
AlwaysInline);
}
SDOperand X86TargetLowering::LowerMemArgument(SDOperand Op, SelectionDAG &DAG, SDOperand X86TargetLowering::LowerMemArgument(SDOperand Op, SelectionDAG &DAG,
const CCValAssign &VA, const CCValAssign &VA,
MachineFrameInfo *MFI, MachineFrameInfo *MFI,
@ -1221,18 +1260,7 @@ X86TargetLowering::LowerMemOpCallTo(SDOperand Op, SelectionDAG &DAG,
SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo()); SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo());
unsigned Flags = cast<ConstantSDNode>(FlagsOp)->getValue(); unsigned Flags = cast<ConstantSDNode>(FlagsOp)->getValue();
if (Flags & ISD::ParamFlags::ByVal) { if (Flags & ISD::ParamFlags::ByVal) {
unsigned Align = 1 << ((Flags & ISD::ParamFlags::ByValAlign) >> return GetMemCpyWithFlags(DAG, Flags, Arg, PtrOff, Chain);
ISD::ParamFlags::ByValAlignOffs);
unsigned Size = (Flags & ISD::ParamFlags::ByValSize) >>
ISD::ParamFlags::ByValSizeOffs;
SDOperand AlignNode = DAG.getConstant(Align, MVT::i32);
SDOperand SizeNode = DAG.getConstant(Size, MVT::i32);
SDOperand AlwaysInline = DAG.getConstant(1, MVT::i32);
return DAG.getMemcpy(Chain, PtrOff, Arg, SizeNode, AlignNode,
AlwaysInline);
} else { } else {
return DAG.getStore(Chain, Arg, PtrOff, NULL, 0); return DAG.getStore(Chain, Arg, PtrOff, NULL, 0);
} }
@ -1306,9 +1334,9 @@ SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
SDOperand StackPtr; SDOperand StackPtr;
// Walk the register/memloc assignments, inserting copies/loads. // Walk the register/memloc assignments, inserting copies/loads. For tail
// For tail calls, lower arguments first to the stack slot where they would // calls, lower arguments which could otherwise be possibly overwritten to the
// normally - in case of a normal function call - be. // stack slot where they would go on normal function calls.
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i]; CCValAssign &VA = ArgLocs[i];
SDOperand Arg = Op.getOperand(5+2*VA.getValNo()); SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
@ -1331,12 +1359,14 @@ SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
if (VA.isRegLoc()) { if (VA.isRegLoc()) {
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg)); RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
} else { } else {
assert(VA.isMemLoc()); if (!IsTailCall || IsPossiblyOverriddenArgumentOfTailCall(Arg)) {
if (StackPtr.Val == 0) assert(VA.isMemLoc());
StackPtr = DAG.getCopyFromReg(Chain, X86StackPtr, getPointerTy()); if (StackPtr.Val == 0)
StackPtr = DAG.getCopyFromReg(Chain, X86StackPtr, getPointerTy());
MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain,
Arg)); MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain,
Arg));
}
} }
} }
@ -1390,52 +1420,45 @@ SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
InFlag = Chain.getValue(1); InFlag = Chain.getValue(1);
} }
// Copy from stack slots to stack slot of a tail called function. This needs // For tail calls lower the arguments to the 'real' stack slot.
// to be done because if we would lower the arguments directly to their real
// stack slot we might end up overwriting each other.
// TODO: To make this more efficient (sometimes saving a store/load) we could
// analyse the arguments and emit this store/load/store sequence only for
// arguments which would be overwritten otherwise.
if (IsTailCall) { if (IsTailCall) {
SmallVector<SDOperand, 8> MemOpChains2; SmallVector<SDOperand, 8> MemOpChains2;
SDOperand PtrOff;
SDOperand FIN; SDOperand FIN;
int FI = 0; int FI = 0;
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) { for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i]; CCValAssign &VA = ArgLocs[i];
if (!VA.isRegLoc()) { if (!VA.isRegLoc()) {
assert(VA.isMemLoc());
SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo()); SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo());
unsigned Flags = cast<ConstantSDNode>(FlagsOp)->getValue(); unsigned Flags = cast<ConstantSDNode>(FlagsOp)->getValue();
// Get source stack slot.
SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(),
getPointerTy());
PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
// Create frame index. // Create frame index.
int32_t Offset = VA.getLocMemOffset()+FPDiff; int32_t Offset = VA.getLocMemOffset()+FPDiff;
uint32_t OpSize = (MVT::getSizeInBits(VA.getLocVT())+7)/8; uint32_t OpSize = (MVT::getSizeInBits(VA.getLocVT())+7)/8;
FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset); FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset);
FIN = DAG.getFrameIndex(FI, MVT::i32); FIN = DAG.getFrameIndex(FI, MVT::i32);
SDOperand Source = Arg;
if (IsPossiblyOverriddenArgumentOfTailCall(Arg)){
// Copy from stack slots to stack slot of a tail called function. This
// needs to be done because if we would lower the arguments directly
// to their real stack slot we might end up overwriting each other.
// Get source stack slot.
Source = DAG.getConstant(VA.getLocMemOffset(), getPointerTy());
if (StackPtr.Val == 0)
StackPtr = DAG.getCopyFromReg(Chain, X86StackPtr, getPointerTy());
Source = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, Source);
if ((Flags & ISD::ParamFlags::ByVal)==0)
Source = DAG.getLoad(VA.getValVT(), Chain, Source,NULL, 0);
}
if (Flags & ISD::ParamFlags::ByVal) { if (Flags & ISD::ParamFlags::ByVal) {
// Copy relative to framepointer. // Copy relative to framepointer.
unsigned Align = 1 << ((Flags & ISD::ParamFlags::ByValAlign) >> MemOpChains2.
ISD::ParamFlags::ByValAlignOffs); push_back(GetMemCpyWithFlags(DAG, Flags, Source, FIN, Chain));
unsigned Size = (Flags & ISD::ParamFlags::ByValSize) >>
ISD::ParamFlags::ByValSizeOffs;
SDOperand AlignNode = DAG.getConstant(Align, MVT::i32);
SDOperand SizeNode = DAG.getConstant(Size, MVT::i32);
SDOperand AlwaysInline = DAG.getConstant(1, MVT::i1);
MemOpChains2.push_back(DAG.getMemcpy(Chain, FIN, PtrOff, SizeNode,
AlignNode,AlwaysInline));
} else { } else {
SDOperand LoadedArg = DAG.getLoad(VA.getValVT(), Chain, PtrOff, // Store relative to framepointer.
NULL, 0); MemOpChains2.push_back(DAG.getStore(Chain, Source, FIN, NULL, 0));
// Store relative to framepointer. }
MemOpChains2.push_back(DAG.getStore(Chain, LoadedArg, FIN, NULL, 0));
}
} }
} }