mirror of
https://github.com/RPCS3/llvm.git
synced 2024-12-15 16:09:57 +00:00
Improve tail call optimized call's argument lowering. Before this
commit all arguments where moved to the stack slot where they would reside on a normal function call before the lowering to the tail call stack slot. This was done to prevent arguments overwriting each other. Now only arguments sourcing from a FORMAL_ARGUMENTS node or a CopyFromReg node with virtual register (could also be a caller's argument) are lowered indirectly. --This line, and those below, will be ignored-- M X86/X86ISelLowering.cpp M X86/README.txt git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@45867 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
719eb02255
commit
c8ab8cde43
@ -1330,10 +1330,11 @@ L5:
|
||||
|
||||
Tail call optimization improvements: Tail call optimization currently
|
||||
pushes all arguments on the top of the stack (their normal place for
|
||||
non-tail call optimized calls) before moving them to actual stack
|
||||
slot. This is done to prevent overwriting of parameters (see example
|
||||
below) that might be used, since the arguments of the callee
|
||||
overwrites caller's arguments.
|
||||
non-tail call optimized calls) that source from the callers arguments
|
||||
or that source from a virtual register (also possibly sourcing from
|
||||
callers arguments).
|
||||
This is done to prevent overwriting of parameters (see example
|
||||
below) that might be used later.
|
||||
|
||||
example:
|
||||
|
||||
@ -1352,13 +1353,6 @@ arg2 of the caller.
|
||||
|
||||
Possible optimizations:
|
||||
|
||||
- Only push those arguments to the top of the stack that are actual
|
||||
parameters of the caller function and have no local value in the
|
||||
caller.
|
||||
|
||||
In the above example local does not need to be pushed onto the top
|
||||
of the stack as it is definitely not a caller's function
|
||||
parameter.
|
||||
|
||||
- Analyse the actual parameters of the callee to see which would
|
||||
overwrite a caller parameter which is used by the callee and only
|
||||
@ -1380,35 +1374,6 @@ Possible optimizations:
|
||||
Here we need to push the arguments because they overwrite each
|
||||
other.
|
||||
|
||||
|
||||
Code for lowering directly onto callers arguments:
|
||||
+ SmallVector<std::pair<unsigned, SDOperand>, 8> RegsToPass;
|
||||
+ SmallVector<SDOperand, 8> MemOpChains;
|
||||
+
|
||||
+ SDOperand FramePtr;
|
||||
+ SDOperand PtrOff;
|
||||
+ SDOperand FIN;
|
||||
+ int FI = 0;
|
||||
+ // Walk the register/memloc assignments, inserting copies/loads.
|
||||
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
|
||||
+ CCValAssign &VA = ArgLocs[i];
|
||||
+ SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
|
||||
+
|
||||
+ ....
|
||||
+
|
||||
+ if (VA.isRegLoc()) {
|
||||
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
|
||||
+ } else {
|
||||
+ assert(VA.isMemLoc());
|
||||
+ // create frame index
|
||||
+ int32_t Offset = VA.getLocMemOffset()+FPDiff;
|
||||
+ uint32_t OpSize = (MVT::getSizeInBits(VA.getLocVT())+7)/8;
|
||||
+ FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset);
|
||||
+ FIN = DAG.getFrameIndex(FI, MVT::i32);
|
||||
+ // store relative to framepointer
|
||||
+ MemOpChains.push_back(DAG.getStore(Chain, Arg, FIN, NULL, 0));
|
||||
+ }
|
||||
+ }
|
||||
//===---------------------------------------------------------------------===//
|
||||
|
||||
main ()
|
||||
|
@ -1007,6 +1007,45 @@ X86TargetLowering::NameDecorationForFORMAL_ARGUMENTS(SDOperand Op) {
|
||||
return None;
|
||||
}
|
||||
|
||||
|
||||
// IsPossiblyOverriddenArgumentOfTailCall - Check if the operand could possibly
|
||||
// be overridden when lowering the outgoing arguments in a tail call. Currently
|
||||
// the implementation of this call is very conservative and assumes all
|
||||
// arguments sourcing from FORMAL_ARGUMENTS or a CopyFromReg with virtual
|
||||
// registers would be overridden by direct lowering.
|
||||
// Possible improvement:
|
||||
// Check FORMAL_ARGUMENTS corresponding MERGE_VALUES for CopyFromReg nodes
|
||||
// indicating inreg passed arguments which also need not be lowered to a safe
|
||||
// stack slot.
|
||||
static bool IsPossiblyOverriddenArgumentOfTailCall(SDOperand Op) {
|
||||
RegisterSDNode * OpReg = NULL;
|
||||
if (Op.getOpcode() == ISD::FORMAL_ARGUMENTS ||
|
||||
(Op.getOpcode()== ISD::CopyFromReg &&
|
||||
(OpReg = cast<RegisterSDNode>(Op.getOperand(1))) &&
|
||||
OpReg->getReg() >= MRegisterInfo::FirstVirtualRegister))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
// GetMemCpyWithFlags - Create a MemCpy using function's parameter flag.
|
||||
static SDOperand
|
||||
GetMemCpyWithFlags(SelectionDAG &DAG, unsigned Flags, SDOperand From,
|
||||
SDOperand To, SDOperand Chain) {
|
||||
|
||||
unsigned Align = 1 << ((Flags & ISD::ParamFlags::ByValAlign) >>
|
||||
ISD::ParamFlags::ByValAlignOffs);
|
||||
|
||||
unsigned Size = (Flags & ISD::ParamFlags::ByValSize) >>
|
||||
ISD::ParamFlags::ByValSizeOffs;
|
||||
|
||||
SDOperand AlignNode = DAG.getConstant(Align, MVT::i32);
|
||||
SDOperand SizeNode = DAG.getConstant(Size, MVT::i32);
|
||||
SDOperand AlwaysInline = DAG.getConstant(1, MVT::i32);
|
||||
|
||||
return DAG.getMemcpy(Chain, To, From, SizeNode, AlignNode,
|
||||
AlwaysInline);
|
||||
}
|
||||
|
||||
SDOperand X86TargetLowering::LowerMemArgument(SDOperand Op, SelectionDAG &DAG,
|
||||
const CCValAssign &VA,
|
||||
MachineFrameInfo *MFI,
|
||||
@ -1221,18 +1260,7 @@ X86TargetLowering::LowerMemOpCallTo(SDOperand Op, SelectionDAG &DAG,
|
||||
SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo());
|
||||
unsigned Flags = cast<ConstantSDNode>(FlagsOp)->getValue();
|
||||
if (Flags & ISD::ParamFlags::ByVal) {
|
||||
unsigned Align = 1 << ((Flags & ISD::ParamFlags::ByValAlign) >>
|
||||
ISD::ParamFlags::ByValAlignOffs);
|
||||
|
||||
unsigned Size = (Flags & ISD::ParamFlags::ByValSize) >>
|
||||
ISD::ParamFlags::ByValSizeOffs;
|
||||
|
||||
SDOperand AlignNode = DAG.getConstant(Align, MVT::i32);
|
||||
SDOperand SizeNode = DAG.getConstant(Size, MVT::i32);
|
||||
SDOperand AlwaysInline = DAG.getConstant(1, MVT::i32);
|
||||
|
||||
return DAG.getMemcpy(Chain, PtrOff, Arg, SizeNode, AlignNode,
|
||||
AlwaysInline);
|
||||
return GetMemCpyWithFlags(DAG, Flags, Arg, PtrOff, Chain);
|
||||
} else {
|
||||
return DAG.getStore(Chain, Arg, PtrOff, NULL, 0);
|
||||
}
|
||||
@ -1306,9 +1334,9 @@ SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
|
||||
|
||||
SDOperand StackPtr;
|
||||
|
||||
// Walk the register/memloc assignments, inserting copies/loads.
|
||||
// For tail calls, lower arguments first to the stack slot where they would
|
||||
// normally - in case of a normal function call - be.
|
||||
// Walk the register/memloc assignments, inserting copies/loads. For tail
|
||||
// calls, lower arguments which could otherwise be possibly overwritten to the
|
||||
// stack slot where they would go on normal function calls.
|
||||
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
|
||||
CCValAssign &VA = ArgLocs[i];
|
||||
SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
|
||||
@ -1331,12 +1359,14 @@ SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
|
||||
if (VA.isRegLoc()) {
|
||||
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
|
||||
} else {
|
||||
assert(VA.isMemLoc());
|
||||
if (StackPtr.Val == 0)
|
||||
StackPtr = DAG.getCopyFromReg(Chain, X86StackPtr, getPointerTy());
|
||||
|
||||
MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain,
|
||||
Arg));
|
||||
if (!IsTailCall || IsPossiblyOverriddenArgumentOfTailCall(Arg)) {
|
||||
assert(VA.isMemLoc());
|
||||
if (StackPtr.Val == 0)
|
||||
StackPtr = DAG.getCopyFromReg(Chain, X86StackPtr, getPointerTy());
|
||||
|
||||
MemOpChains.push_back(LowerMemOpCallTo(Op, DAG, StackPtr, VA, Chain,
|
||||
Arg));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1390,52 +1420,45 @@ SDOperand X86TargetLowering::LowerCALL(SDOperand Op, SelectionDAG &DAG) {
|
||||
InFlag = Chain.getValue(1);
|
||||
}
|
||||
|
||||
// Copy from stack slots to stack slot of a tail called function. This needs
|
||||
// to be done because if we would lower the arguments directly to their real
|
||||
// stack slot we might end up overwriting each other.
|
||||
// TODO: To make this more efficient (sometimes saving a store/load) we could
|
||||
// analyse the arguments and emit this store/load/store sequence only for
|
||||
// arguments which would be overwritten otherwise.
|
||||
// For tail calls lower the arguments to the 'real' stack slot.
|
||||
if (IsTailCall) {
|
||||
SmallVector<SDOperand, 8> MemOpChains2;
|
||||
SDOperand PtrOff;
|
||||
SDOperand FIN;
|
||||
int FI = 0;
|
||||
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
|
||||
CCValAssign &VA = ArgLocs[i];
|
||||
if (!VA.isRegLoc()) {
|
||||
assert(VA.isMemLoc());
|
||||
SDOperand Arg = Op.getOperand(5+2*VA.getValNo());
|
||||
SDOperand FlagsOp = Op.getOperand(6+2*VA.getValNo());
|
||||
unsigned Flags = cast<ConstantSDNode>(FlagsOp)->getValue();
|
||||
|
||||
// Get source stack slot.
|
||||
SDOperand PtrOff = DAG.getConstant(VA.getLocMemOffset(),
|
||||
getPointerTy());
|
||||
PtrOff = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, PtrOff);
|
||||
// Create frame index.
|
||||
int32_t Offset = VA.getLocMemOffset()+FPDiff;
|
||||
uint32_t OpSize = (MVT::getSizeInBits(VA.getLocVT())+7)/8;
|
||||
FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset);
|
||||
FIN = DAG.getFrameIndex(FI, MVT::i32);
|
||||
SDOperand Source = Arg;
|
||||
if (IsPossiblyOverriddenArgumentOfTailCall(Arg)){
|
||||
// Copy from stack slots to stack slot of a tail called function. This
|
||||
// needs to be done because if we would lower the arguments directly
|
||||
// to their real stack slot we might end up overwriting each other.
|
||||
// Get source stack slot.
|
||||
Source = DAG.getConstant(VA.getLocMemOffset(), getPointerTy());
|
||||
if (StackPtr.Val == 0)
|
||||
StackPtr = DAG.getCopyFromReg(Chain, X86StackPtr, getPointerTy());
|
||||
Source = DAG.getNode(ISD::ADD, getPointerTy(), StackPtr, Source);
|
||||
if ((Flags & ISD::ParamFlags::ByVal)==0)
|
||||
Source = DAG.getLoad(VA.getValVT(), Chain, Source,NULL, 0);
|
||||
}
|
||||
|
||||
if (Flags & ISD::ParamFlags::ByVal) {
|
||||
// Copy relative to framepointer.
|
||||
unsigned Align = 1 << ((Flags & ISD::ParamFlags::ByValAlign) >>
|
||||
ISD::ParamFlags::ByValAlignOffs);
|
||||
|
||||
unsigned Size = (Flags & ISD::ParamFlags::ByValSize) >>
|
||||
ISD::ParamFlags::ByValSizeOffs;
|
||||
|
||||
SDOperand AlignNode = DAG.getConstant(Align, MVT::i32);
|
||||
SDOperand SizeNode = DAG.getConstant(Size, MVT::i32);
|
||||
SDOperand AlwaysInline = DAG.getConstant(1, MVT::i1);
|
||||
|
||||
MemOpChains2.push_back(DAG.getMemcpy(Chain, FIN, PtrOff, SizeNode,
|
||||
AlignNode,AlwaysInline));
|
||||
// Copy relative to framepointer.
|
||||
MemOpChains2.
|
||||
push_back(GetMemCpyWithFlags(DAG, Flags, Source, FIN, Chain));
|
||||
} else {
|
||||
SDOperand LoadedArg = DAG.getLoad(VA.getValVT(), Chain, PtrOff,
|
||||
NULL, 0);
|
||||
// Store relative to framepointer.
|
||||
MemOpChains2.push_back(DAG.getStore(Chain, LoadedArg, FIN, NULL, 0));
|
||||
}
|
||||
// Store relative to framepointer.
|
||||
MemOpChains2.push_back(DAG.getStore(Chain, Source, FIN, NULL, 0));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user