mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-15 16:09:57 +00:00
Improve tail call optimized call's argument lowering. Before this
commit all arguments where moved to the stack slot where they would reside on a normal function call before the lowering to the tail call stack slot. This was done to prevent arguments overwriting each other. Now only arguments sourcing from a FORMAL_ARGUMENTS node or a CopyFromReg node with virtual register (could also be a caller's argument) are lowered indirectly. --This line, and those below, will be ignored-- M X86/X86ISelLowering.cpp M X86/README.txt git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@45867 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
719eb02255
commit
c8ab8cde43
@ -1330,10 +1330,11 @@ L5:
|
|||||||
|
|
||||||
Tail call optimization improvements: Tail call optimization currently
|
Tail call optimization improvements: Tail call optimization currently
|
||||||
pushes all arguments on the top of the stack (their normal place for
|
pushes all arguments on the top of the stack (their normal place for
|
||||||
non-tail call optimized calls) before moving them to actual stack
|
non-tail call optimized calls) that source from the callers arguments
|
||||||
slot. This is done to prevent overwriting of parameters (see example
|
or that source from a virtual register (also possibly sourcing from
|
||||||
below) that might be used, since the arguments of the callee
|
callers arguments).
|
||||||
overwrites caller's arguments.
|
This is done to prevent overwriting of parameters (see example
|
||||||
|
below) that might be used later.
|
||||||
|
|
||||||
example:
|
example:
|
||||||
|
|
||||||
@ -1352,13 +1353,6 @@ arg2 of the caller.
|
|||||||
|
|
||||||
Possible optimizations:
|
Possible optimizations:
|
||||||
|
|
||||||
- Only push those arguments to the top of the stack that are actual
|
|
||||||
parameters of the caller function and have no local value in the
|
|
||||||
caller.
|
|
||||||
|
|
||||||
In the above example local does not need to be pushed onto the top
|
|
||||||
of the stack as it is definitely not a caller's function
|
|
||||||
parameter.
|
|
||||||
|
|
||||||
- Analyse the actual parameters of the callee to see which would
|
- Analyse the actual parameters of the callee to see which would
|
||||||
overwrite a caller parameter which is used by the callee and only
|
overwrite a caller parameter which is used by the callee and only
|
||||||
@ -1380,35 +1374,6 @@ Possible optimizations:
|
|||||||
Here we need to push the arguments because they overwrite each
|
Here we need to push the arguments because they overwrite each
|
||||||
other.
|
other.
|
||||||
|
|
||||||
|
|
||||||
Code for lowering directly onto callers arguments:
|
|
||||||
+ SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
|
|
||||||
+ SmallVector<SDOperand, 8> MemOpChains;
|
|
||||||
+
|
|
||||||
+ SDOperand FramePtr;
|
|
||||||
+ SDOperand PtrOff;
|
|
||||||
+ SDOperand FIN;
|
|
||||||
+ int FI = 0;
|
|
||||||
+ // Walk the register/memloc assignments, inserting copies/loads.
|
|
||||||
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
|
|
||||||
+ CCValAssign &VA = ArgLocs[i];
|
|
||||||
+ SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
|
|
||||||
+
|
|
||||||
+ ....
|
|
||||||
+
|
|
||||||
+ if (VA.isRegLoc()) {
|
|
||||||
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
|
|
||||||
+ } else {
|
|
||||||
+ assert(VA.isMemLoc());
|
|
||||||
+ // create frame index
|
|
||||||
+ int32_t Offset = VA.getLocMemOffset()+FPDiff;
|
|
||||||
+ uint32_t OpSize = (MVT::getSizeInBits(VA.getLocVT())+7)/8;
|
|
||||||
+ FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset);
|
|
||||||
+ FIN = DAG.getFrameIndex(FI, MVT::i32);
|
|
||||||
+ // store relative to framepointer
|
|
||||||
+ MemOpChains.push_back(DAG.getStore(Chain, Arg, FIN, NULL, 0));
|
|
||||||
+ }
|
|
||||||
+ }
|
|
||||||
//===---------------------------------------------------------------------===//
|
//===---------------------------------------------------------------------===//
|
||||||
|
|
||||||
main ()
|
main ()
|
||||||
|
@ -1007,6 +1007,45 @@ X86TargetLowering::NameDecorationForFORMAL_ARGUMENTS(SDOperand Op) {
|
|||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// IsPossiblyOverriddenArgumentOfTailCall - Check if the operand could possibly
|
||||||
|
// be overridden when lowering the outgoing arguments in a tail call. Currently
|
||||||
|
// the implementation of this call is very conservative and assumes all
|
||||||
|
// arguments sourcing from FORMAL_ARGUMENTS or a CopyFromReg with virtual
|
||||||
|
// registers would be overridden by direct lowering.
|
||||||
|
// Possible improvement:
|
||||||
|
// Check FORMAL_ARGUMENTS corresponding MERGE_VALUES for CopyFromReg nodes
|
||||||
|
// indicating inreg passed arguments which also need not be lowered to a safe
|
||||||
|
// stack slot.
|
||||||
|
static bool IsPossiblyOverriddenArgumentOfTailCall(SDOperand Op) {
|
||||||
|
RegisterSDNode * OpReg = NULL;
|
||||||
|
if (Op.getOpcode() == ISD::FORMAL_ARGUMENTS ||
|
||||||
|
(Op.getOpcode()== ISD::CopyFromReg &&
|
||||||
|
(OpReg = cast<RegisterSDNode>(Op.getOperand(1))) &&
|
||||||
|
OpReg->getReg() >= MRegisterInfo::FirstVirtualRegister))
|
||||||
|
return true;
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetMemCpyWithFlags - Create a MemCpy using function's parameter flag.
|
||||||
|
static SDOperand
|
||||||
|
GetMemCpyWithFlags(SelectionDAG &DAG, unsigned Flags, SDOperand From,
|
||||||
|
SDOperand To, SDOperand Chain) {
|
||||||
|
|
||||||
|
unsigned Align = 1 << ((Flags & ISD::ParamFlags::ByValAlign) >>
|
||||||
|
ISD::ParamFlags::ByValAlignOffs);
|
||||||
|
|
||||||
|
unsigned Size = (Flags & ISD::ParamFlags::ByValSize) >>
|
||||||
|
ISD::ParamFlags::ByValSizeOffs;
|
||||||
|
|
||||||
|
SDOperand AlignNode = DAG.getConstant(Align, MVT::i32);
|
||||||
|
SDOperand SizeNode = DAG.getConstant(Size, MVT::i32);
|
||||||
|
SDOperand AlwaysInline = DAG.getConstant(1, MVT::i32);
|
||||||
|
|
||||||
|
return DAG.getMemcpy(Chain, To, From, SizeNode, AlignNode,
|
||||||
|
AlwaysInline);
|
||||||
|
}
|
||||||
|
|
||||||
SDOperand X86TargetLowering::LowerMemArgument(SDOperand Op, SelectionDAG &DAG,
|
SDOperand X86TargetLowering::LowerMemArgument(SDOperand Op, SelectionDAG &DAG,
|
||||||
const CCValAssign &VA,
|
const CCValAssign &VA,
|
||||||
MachineFrameInfo *MFI,
|
MachineFrameInfo *MFI,
|
||||||
@ -1221,18 +1260,7 @@ X86TargetLowering::LowerMemOpCallTo(SDOperand Op, SelectionDAG &DAG,
|
|||||||
SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo());
|
SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo());
|
||||||
unsigned Flags = cast<ConstantSDNode>(FlagsOp)->getValue();
|
unsigned Flags = cast<ConstantSDNode>(FlagsOp)->getValue();
|
||||||
if (Flags & ISD::ParamFlags::ByVal) {
|
if (Flags & ISD::ParamFlags::ByVal) {
|
||||||
unsigned Align = 1 << ((Flags & ISD::ParamFlags::ByValAlign) >>
|
return GetMemCpyWithFlags(DAG, Flags, Arg, PtrOff, Chain);
|
||||||
ISD::ParamFlags::ByValAlignOffs);
|
|
||||||
|
|
||||||
unsigned Size = (Flags & ISD::ParamFlags::ByValSize) >>
|
|
||||||
ISD::ParamFlags::ByValSizeOffs;
|
|
||||||
|
|
||||||
SDOperand AlignNode = DAG.getConstant(Align, MVT::i32);
|
|
||||||
SDOperand SizeNode = DAG.getConstant(Size, MVT::i32);
|
|
||||||
SDOperand AlwaysInline = DAG.getConstant(1, MVT::i32);
|
|
||||||
|
|
||||||
return DAG.getMemcpy(Chain, PtrOff, Arg, SizeNode, AlignNode,
|
|
||||||
AlwaysInline);
|
|
||||||
} else {
|
} else {
|
||||||
return DAG.getStore(Chain, Arg, PtrOff, NULL, 0);
|
return DAG.getStore(Chain, Arg, PtrOff, NULL, 0);
|
||||||
}
|
}
|
||||||
@ -1306,9 +1334,9 @@ SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
|
|||||||
|
|
||||||
SDOperand StackPtr;
|
SDOperand StackPtr;
|
||||||
|
|
||||||
// Walk the register/memloc assignments, inserting copies/loads.
|
// Walk the register/memloc assignments, inserting copies/loads. For tail
|
||||||
// For tail calls, lower arguments first to the stack slot where they would
|
// calls, lower arguments which could otherwise be possibly overwritten to the
|
||||||
// normally - in case of a normal function call - be.
|
// stack slot where they would go on normal function calls.
|
||||||
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
|
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
|
||||||
CCValAssign &VA = ArgLocs[i];
|
CCValAssign &VA = ArgLocs[i];
|
||||||
SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
|
SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
|
||||||
@ -1331,12 +1359,14 @@ SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
|
|||||||
if (VA.isRegLoc()) {
|
if (VA.isRegLoc()) {
|
||||||
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
|
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
|
||||||
} else {
|
} else {
|
||||||
assert(VA.isMemLoc());
|
if (!IsTailCall || IsPossiblyOverriddenArgumentOfTailCall(Arg)) {
|
||||||
if (StackPtr.Val == 0)
|
assert(VA.isMemLoc());
|
||||||
StackPtr = DAG.getCopyFromReg(Chain, X86StackPtr, getPointerTy());
|
if (StackPtr.Val == 0)
|
||||||
|
StackPtr = DAG.getCopyFromReg(Chain, X86StackPtr, getPointerTy());
|
||||||
MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain,
|
|
||||||
Arg));
|
MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain,
|
||||||
|
Arg));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1390,52 +1420,45 @@ SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
|
|||||||
InFlag = Chain.getValue(1);
|
InFlag = Chain.getValue(1);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Copy from stack slots to stack slot of a tail called function. This needs
|
// For tail calls lower the arguments to the 'real' stack slot.
|
||||||
// to be done because if we would lower the arguments directly to their real
|
|
||||||
// stack slot we might end up overwriting each other.
|
|
||||||
// TODO: To make this more efficient (sometimes saving a store/load) we could
|
|
||||||
// analyse the arguments and emit this store/load/store sequence only for
|
|
||||||
// arguments which would be overwritten otherwise.
|
|
||||||
if (IsTailCall) {
|
if (IsTailCall) {
|
||||||
SmallVector<SDOperand, 8> MemOpChains2;
|
SmallVector<SDOperand, 8> MemOpChains2;
|
||||||
SDOperand PtrOff;
|
|
||||||
SDOperand FIN;
|
SDOperand FIN;
|
||||||
int FI = 0;
|
int FI = 0;
|
||||||
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
|
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
|
||||||
CCValAssign &VA = ArgLocs[i];
|
CCValAssign &VA = ArgLocs[i];
|
||||||
if (!VA.isRegLoc()) {
|
if (!VA.isRegLoc()) {
|
||||||
|
assert(VA.isMemLoc());
|
||||||
|
SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
|
||||||
SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo());
|
SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo());
|
||||||
unsigned Flags = cast<ConstantSDNode>(FlagsOp)->getValue();
|
unsigned Flags = cast<ConstantSDNode>(FlagsOp)->getValue();
|
||||||
|
|
||||||
// Get source stack slot.
|
|
||||||
SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(),
|
|
||||||
getPointerTy());
|
|
||||||
PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
|
|
||||||
// Create frame index.
|
// Create frame index.
|
||||||
int32_t Offset = VA.getLocMemOffset()+FPDiff;
|
int32_t Offset = VA.getLocMemOffset()+FPDiff;
|
||||||
uint32_t OpSize = (MVT::getSizeInBits(VA.getLocVT())+7)/8;
|
uint32_t OpSize = (MVT::getSizeInBits(VA.getLocVT())+7)/8;
|
||||||
FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset);
|
FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset);
|
||||||
FIN = DAG.getFrameIndex(FI, MVT::i32);
|
FIN = DAG.getFrameIndex(FI, MVT::i32);
|
||||||
|
SDOperand Source = Arg;
|
||||||
|
if (IsPossiblyOverriddenArgumentOfTailCall(Arg)){
|
||||||
|
// Copy from stack slots to stack slot of a tail called function. This
|
||||||
|
// needs to be done because if we would lower the arguments directly
|
||||||
|
// to their real stack slot we might end up overwriting each other.
|
||||||
|
// Get source stack slot.
|
||||||
|
Source = DAG.getConstant(VA.getLocMemOffset(), getPointerTy());
|
||||||
|
if (StackPtr.Val == 0)
|
||||||
|
StackPtr = DAG.getCopyFromReg(Chain, X86StackPtr, getPointerTy());
|
||||||
|
Source = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, Source);
|
||||||
|
if ((Flags & ISD::ParamFlags::ByVal)==0)
|
||||||
|
Source = DAG.getLoad(VA.getValVT(), Chain, Source,NULL, 0);
|
||||||
|
}
|
||||||
|
|
||||||
if (Flags & ISD::ParamFlags::ByVal) {
|
if (Flags & ISD::ParamFlags::ByVal) {
|
||||||
// Copy relative to framepointer.
|
// Copy relative to framepointer.
|
||||||
unsigned Align = 1 << ((Flags & ISD::ParamFlags::ByValAlign) >>
|
MemOpChains2.
|
||||||
ISD::ParamFlags::ByValAlignOffs);
|
push_back(GetMemCpyWithFlags(DAG, Flags, Source, FIN, Chain));
|
||||||
|
|
||||||
unsigned Size = (Flags & ISD::ParamFlags::ByValSize) >>
|
|
||||||
ISD::ParamFlags::ByValSizeOffs;
|
|
||||||
|
|
||||||
SDOperand AlignNode = DAG.getConstant(Align, MVT::i32);
|
|
||||||
SDOperand SizeNode = DAG.getConstant(Size, MVT::i32);
|
|
||||||
SDOperand AlwaysInline = DAG.getConstant(1, MVT::i1);
|
|
||||||
|
|
||||||
MemOpChains2.push_back(DAG.getMemcpy(Chain, FIN, PtrOff, SizeNode,
|
|
||||||
AlignNode,AlwaysInline));
|
|
||||||
} else {
|
} else {
|
||||||
SDOperand LoadedArg = DAG.getLoad(VA.getValVT(), Chain, PtrOff,
|
// Store relative to framepointer.
|
||||||
NULL, 0);
|
MemOpChains2.push_back(DAG.getStore(Chain, Source, FIN, NULL, 0));
|
||||||
// Store relative to framepointer.
|
}
|
||||||
MemOpChains2.push_back(DAG.getStore(Chain, LoadedArg, FIN, NULL, 0));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user