mirror of
https://github.com/RPCS3/llvm.git
synced 2025-02-04 17:47:58 +00:00
Do not force indirect tailcall through fixed registers: eax, r11. Add support to allow loads to be folded to tail call instructions.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@98465 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
d3da36286f
commit
f48ef03655
@ -349,17 +349,17 @@ X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
|
||||
return true;
|
||||
}
|
||||
|
||||
/// MoveBelowCallSeqStart - Replace CALLSEQ_START operand with load's chain
|
||||
/// operand and move load below the call's chain operand.
|
||||
static void MoveBelowCallSeqStart(SelectionDAG *CurDAG, SDValue Load,
|
||||
SDValue Call, SDValue CallSeqStart) {
|
||||
/// MoveBelowCallOrigChain - Replace the original chain operand of the call with
|
||||
/// load's chain operand and move load below the call's chain operand.
|
||||
static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load,
|
||||
SDValue Call, SDValue OrigChain) {
|
||||
SmallVector<SDValue, 8> Ops;
|
||||
SDValue Chain = CallSeqStart.getOperand(0);
|
||||
SDValue Chain = OrigChain.getOperand(0);
|
||||
if (Chain.getNode() == Load.getNode())
|
||||
Ops.push_back(Load.getOperand(0));
|
||||
else {
|
||||
assert(Chain.getOpcode() == ISD::TokenFactor &&
|
||||
"Unexpected CallSeqStart chain operand");
|
||||
"Unexpected chain operand");
|
||||
for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i)
|
||||
if (Chain.getOperand(i).getNode() == Load.getNode())
|
||||
Ops.push_back(Load.getOperand(0));
|
||||
@ -371,9 +371,9 @@ static void MoveBelowCallSeqStart(SelectionDAG *CurDAG, SDValue Load,
|
||||
Ops.clear();
|
||||
Ops.push_back(NewChain);
|
||||
}
|
||||
for (unsigned i = 1, e = CallSeqStart.getNumOperands(); i != e; ++i)
|
||||
Ops.push_back(CallSeqStart.getOperand(i));
|
||||
CurDAG->UpdateNodeOperands(CallSeqStart, &Ops[0], Ops.size());
|
||||
for (unsigned i = 1, e = OrigChain.getNumOperands(); i != e; ++i)
|
||||
Ops.push_back(OrigChain.getOperand(i));
|
||||
CurDAG->UpdateNodeOperands(OrigChain, &Ops[0], Ops.size());
|
||||
CurDAG->UpdateNodeOperands(Load, Call.getOperand(0),
|
||||
Load.getOperand(1), Load.getOperand(2));
|
||||
Ops.clear();
|
||||
@ -386,7 +386,9 @@ static void MoveBelowCallSeqStart(SelectionDAG *CurDAG, SDValue Load,
|
||||
/// isCalleeLoad - Return true if call address is a load and it can be
|
||||
/// moved below CALLSEQ_START and the chains leading up to the call.
|
||||
/// Return the CALLSEQ_START by reference as a second output.
|
||||
static bool isCalleeLoad(SDValue Callee, SDValue &Chain) {
|
||||
/// In the case of a tail call, there isn't a callseq node between the call
|
||||
/// chain and the load.
|
||||
static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
|
||||
if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse())
|
||||
return false;
|
||||
LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode());
|
||||
@ -397,12 +399,14 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain) {
|
||||
return false;
|
||||
|
||||
// Now let's find the callseq_start.
|
||||
while (Chain.getOpcode() != ISD::CALLSEQ_START) {
|
||||
while (HasCallSeq && Chain.getOpcode() != ISD::CALLSEQ_START) {
|
||||
if (!Chain.hasOneUse())
|
||||
return false;
|
||||
Chain = Chain.getOperand(0);
|
||||
}
|
||||
|
||||
|
||||
if (!Chain.getNumOperands())
|
||||
return false;
|
||||
if (Chain.getOperand(0).getNode() == Callee.getNode())
|
||||
return true;
|
||||
if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
|
||||
@ -420,7 +424,9 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
|
||||
E = CurDAG->allnodes_end(); I != E; ) {
|
||||
SDNode *N = I++; // Preincrement iterator to avoid invalidation issues.
|
||||
|
||||
if (OptLevel != CodeGenOpt::None && N->getOpcode() == X86ISD::CALL) {
|
||||
if (OptLevel != CodeGenOpt::None &&
|
||||
(N->getOpcode() == X86ISD::CALL ||
|
||||
N->getOpcode() == X86ISD::TC_RETURN)) {
|
||||
/// Also try moving call address load from outside callseq_start to just
|
||||
/// before the call to allow it to be folded.
|
||||
///
|
||||
@ -440,11 +446,12 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
|
||||
/// \ /
|
||||
/// \ /
|
||||
/// [CALL]
|
||||
bool HasCallSeq = N->getOpcode() == X86ISD::CALL;
|
||||
SDValue Chain = N->getOperand(0);
|
||||
SDValue Load = N->getOperand(1);
|
||||
if (!isCalleeLoad(Load, Chain))
|
||||
if (!isCalleeLoad(Load, Chain, HasCallSeq))
|
||||
continue;
|
||||
MoveBelowCallSeqStart(CurDAG, Load, SDValue(N, 0), Chain);
|
||||
MoveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain);
|
||||
++NumLoadMoved;
|
||||
continue;
|
||||
}
|
||||
|
@ -2133,18 +2133,6 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
|
||||
OpFlags);
|
||||
}
|
||||
|
||||
if (isTailCall && !WasGlobalOrExternal) {
|
||||
// Force the address into a (call preserved) caller-saved register since
|
||||
// tailcall must happen after callee-saved registers are poped.
|
||||
// FIXME: Give it a special register class that contains caller-saved
|
||||
// register instead?
|
||||
unsigned TCReg = Is64Bit ? X86::R11 : X86::EAX;
|
||||
Chain = DAG.getCopyToReg(Chain, dl,
|
||||
DAG.getRegister(TCReg, getPointerTy()),
|
||||
Callee,InFlag);
|
||||
Callee = DAG.getRegister(TCReg, getPointerTy());
|
||||
}
|
||||
|
||||
// Returns a chain & a flag for retval copy to use.
|
||||
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
|
||||
SmallVector<SDValue, 8> Ops;
|
||||
@ -2190,14 +2178,6 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
|
||||
if (RVLocs[i].isRegLoc())
|
||||
MF.getRegInfo().addLiveOut(RVLocs[i].getLocReg());
|
||||
}
|
||||
|
||||
assert(((Callee.getOpcode() == ISD::Register &&
|
||||
(cast<RegisterSDNode>(Callee)->getReg() == X86::EAX ||
|
||||
cast<RegisterSDNode>(Callee)->getReg() == X86::R11)) ||
|
||||
Callee.getOpcode() == ISD::TargetExternalSymbol ||
|
||||
Callee.getOpcode() == ISD::TargetGlobalAddress) &&
|
||||
"Expecting a global address, external symbol, or scratch register");
|
||||
|
||||
return DAG.getNode(X86ISD::TC_RETURN, dl,
|
||||
NodeTys, &Ops[0], Ops.size());
|
||||
}
|
||||
|
@ -33,6 +33,15 @@ def i64i8imm : Operand<i64> {
|
||||
let ParserMatchClass = ImmSExt8AsmOperand;
|
||||
}
|
||||
|
||||
// Special i64mem for addresses of load folding tail calls. These are not
|
||||
// allowed to use callee-saved registers since they must be scheduled
|
||||
// after callee-saved register are popped.
|
||||
def i64mem_TC : Operand<i64> {
|
||||
let PrintMethod = "printi64mem";
|
||||
let MIOperandInfo = (ops GR64_TC, i8imm, GR64_TC, i32imm, i8imm);
|
||||
let ParserMatchClass = X86MemAsmOperand;
|
||||
}
|
||||
|
||||
def lea64mem : Operand<i64> {
|
||||
let PrintMethod = "printlea64mem";
|
||||
let MIOperandInfo = (ops GR64, i8imm, GR64_NOSP, i32imm);
|
||||
@ -177,22 +186,31 @@ let isCall = 1 in
|
||||
|
||||
|
||||
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
|
||||
def TCRETURNdi64 : I<0, Pseudo, (outs), (ins i64imm:$dst, i32imm:$offset,
|
||||
variable_ops),
|
||||
"#TC_RETURN $dst $offset",
|
||||
[]>;
|
||||
let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
|
||||
FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
|
||||
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
|
||||
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
|
||||
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
|
||||
Uses = [RSP] in {
|
||||
def TCRETURNdi64 : I<0, Pseudo, (outs),
|
||||
(ins i64i32imm_pcrel:$dst, i32imm:$offset, variable_ops),
|
||||
"#TC_RETURN $dst $offset", []>;
|
||||
def TCRETURNri64 : I<0, Pseudo, (outs), (ins GR64_TC:$dst, i32imm:$offset,
|
||||
variable_ops),
|
||||
"#TC_RETURN $dst $offset", []>;
|
||||
def TCRETURNmi64 : I<0, Pseudo, (outs),
|
||||
(ins i64mem_TC:$dst, i32imm:$offset, variable_ops),
|
||||
"#TC_RETURN $dst $offset", []>;
|
||||
|
||||
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
|
||||
def TCRETURNri64 : I<0, Pseudo, (outs), (ins GR64:$dst, i32imm:$offset,
|
||||
variable_ops),
|
||||
"#TC_RETURN $dst $offset",
|
||||
[]>;
|
||||
def TAILJMPd64 : Ii32<0xE9, RawFrm, (outs),
|
||||
(ins i64i32imm_pcrel:$dst, variable_ops),
|
||||
"jmp\t$dst # TAILCALL", []>;
|
||||
def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins GR64_TC:$dst, variable_ops),
|
||||
"jmp{q}\t{*}$dst # TAILCALL", []>;
|
||||
|
||||
|
||||
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
|
||||
def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins GR64:$dst, variable_ops),
|
||||
"jmp{q}\t{*}$dst # TAILCALL",
|
||||
[]>;
|
||||
def TAILJMPm64 : I<0xff, MRM4m, (outs), (ins i64mem_TC:$dst, variable_ops),
|
||||
"jmp{q}\t{*}$dst # TAILCALL", []>;
|
||||
}
|
||||
|
||||
// Branches
|
||||
let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
|
||||
@ -340,6 +358,22 @@ def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src),
|
||||
"mov{q}\t{$src, $dst|$dst, $src}",
|
||||
[(store i64immSExt32:$src, addr:$dst)]>;
|
||||
|
||||
/// Versions of MOV64rr, MOV64rm, and MOV64mr for i64mem_TC and GR64_TC.
|
||||
let neverHasSideEffects = 1 in
|
||||
def MOV64rr_TC : I<0x89, MRMDestReg, (outs GR64_TC:$dst), (ins GR64_TC:$src),
|
||||
"mov{q}\t{$src, $dst|$dst, $src}", []>;
|
||||
|
||||
let mayLoad = 1,
|
||||
canFoldAsLoad = 1, isReMaterializable = 1 in
|
||||
def MOV64rm_TC : I<0x8B, MRMSrcMem, (outs GR64_TC:$dst), (ins i64mem_TC:$src),
|
||||
"mov{q}\t{$src, $dst|$dst, $src}",
|
||||
[]>;
|
||||
|
||||
let mayStore = 1 in
|
||||
def MOV64mr_TC : I<0x89, MRMDestMem, (outs), (ins i64mem_TC:$dst, GR64_TC:$src),
|
||||
"mov{q}\t{$src, $dst|$dst, $src}",
|
||||
[]>;
|
||||
|
||||
def MOV64o8a : RIi8<0xA0, RawFrm, (outs), (ins offset8:$src),
|
||||
"mov{q}\t{$src, %rax|%rax, $src}", []>;
|
||||
def MOV64o64a : RIi32<0xA1, RawFrm, (outs), (ins offset64:$src),
|
||||
@ -1885,14 +1919,21 @@ def : Pat<(X86call (i64 texternalsym:$dst)),
|
||||
(WINCALL64pcrel32 texternalsym:$dst)>, Requires<[IsWin64]>;
|
||||
|
||||
// tailcall stuff
|
||||
def : Pat<(X86tcret GR64:$dst, imm:$off),
|
||||
(TCRETURNri64 GR64:$dst, imm:$off)>;
|
||||
def : Pat<(X86tcret GR64_TC:$dst, imm:$off),
|
||||
(TCRETURNri64 GR64_TC:$dst, imm:$off)>,
|
||||
Requires<[In64BitMode]>;
|
||||
|
||||
def : Pat<(X86tcret (load addr:$dst), imm:$off),
|
||||
(TCRETURNmi64 addr:$dst, imm:$off)>,
|
||||
Requires<[In64BitMode]>;
|
||||
|
||||
def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off),
|
||||
(TCRETURNdi64 tglobaladdr:$dst, imm:$off)>;
|
||||
(TCRETURNdi64 tglobaladdr:$dst, imm:$off)>,
|
||||
Requires<[In64BitMode]>;
|
||||
|
||||
def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off),
|
||||
(TCRETURNdi64 texternalsym:$dst, imm:$off)>;
|
||||
(TCRETURNdi64 texternalsym:$dst, imm:$off)>,
|
||||
Requires<[In64BitMode]>;
|
||||
|
||||
// Comparisons.
|
||||
|
||||
|
@ -266,6 +266,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
|
||||
{ X86::MOV16rr, X86::MOV16mr, 0, 0 },
|
||||
{ X86::MOV32ri, X86::MOV32mi, 0, 0 },
|
||||
{ X86::MOV32rr, X86::MOV32mr, 0, 0 },
|
||||
{ X86::MOV32rr_TC, X86::MOV32mr_TC, 0, 0 },
|
||||
{ X86::MOV64ri32, X86::MOV64mi32, 0, 0 },
|
||||
{ X86::MOV64rr, X86::MOV64mr, 0, 0 },
|
||||
{ X86::MOV8ri, X86::MOV8mi, 0, 0 },
|
||||
@ -301,6 +302,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
|
||||
{ X86::SETPr, X86::SETPm, 0, 0 },
|
||||
{ X86::SETSr, X86::SETSm, 0, 0 },
|
||||
{ X86::TAILJMPr, X86::TAILJMPm, 1, 0 },
|
||||
{ X86::TAILJMPr64, X86::TAILJMPm64, 1, 0 },
|
||||
{ X86::TEST16ri, X86::TEST16mi, 1, 0 },
|
||||
{ X86::TEST32ri, X86::TEST32mi, 1, 0 },
|
||||
{ X86::TEST64ri32, X86::TEST64mi32, 1, 0 },
|
||||
@ -376,6 +378,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
|
||||
{ X86::Int_UCOMISSrr, X86::Int_UCOMISSrm, 0 },
|
||||
{ X86::MOV16rr, X86::MOV16rm, 0 },
|
||||
{ X86::MOV32rr, X86::MOV32rm, 0 },
|
||||
{ X86::MOV32rr_TC, X86::MOV32rm_TC, 0 },
|
||||
{ X86::MOV64rr, X86::MOV64rm, 0 },
|
||||
{ X86::MOV64toPQIrr, X86::MOVQI2PQIrm, 0 },
|
||||
{ X86::MOV64toSDrr, X86::MOV64toSDrm, 0 },
|
||||
@ -675,6 +678,8 @@ bool X86InstrInfo::isMoveInstr(const MachineInstr& MI,
|
||||
case X86::MOV16rr:
|
||||
case X86::MOV32rr:
|
||||
case X86::MOV64rr:
|
||||
case X86::MOV32rr_TC:
|
||||
case X86::MOV64rr_TC:
|
||||
|
||||
// FP Stack register class copies
|
||||
case X86::MOV_Fp3232: case X86::MOV_Fp6464: case X86::MOV_Fp8080:
|
||||
@ -1901,6 +1906,10 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
|
||||
Opc = X86::MOV16rr;
|
||||
} else if (CommonRC == &X86::GR8_NOREXRegClass) {
|
||||
Opc = X86::MOV8rr;
|
||||
} else if (CommonRC == &X86::GR64_TCRegClass) {
|
||||
Opc = X86::MOV64rr_TC;
|
||||
} else if (CommonRC == &X86::GR32_TCRegClass) {
|
||||
Opc = X86::MOV32rr_TC;
|
||||
} else if (CommonRC == &X86::RFP32RegClass) {
|
||||
Opc = X86::MOV_Fp3232;
|
||||
} else if (CommonRC == &X86::RFP64RegClass || CommonRC == &X86::RSTRegClass) {
|
||||
@ -2038,6 +2047,10 @@ static unsigned getStoreRegOpcode(unsigned SrcReg,
|
||||
Opc = X86::MOV16mr;
|
||||
} else if (RC == &X86::GR8_NOREXRegClass) {
|
||||
Opc = X86::MOV8mr;
|
||||
} else if (RC == &X86::GR64_TCRegClass) {
|
||||
Opc = X86::MOV64mr_TC;
|
||||
} else if (RC == &X86::GR32_TCRegClass) {
|
||||
Opc = X86::MOV32mr_TC;
|
||||
} else if (RC == &X86::RFP80RegClass) {
|
||||
Opc = X86::ST_FpP80m; // pops
|
||||
} else if (RC == &X86::RFP64RegClass) {
|
||||
@ -2131,6 +2144,10 @@ static unsigned getLoadRegOpcode(unsigned DestReg,
|
||||
Opc = X86::MOV16rm;
|
||||
} else if (RC == &X86::GR8_NOREXRegClass) {
|
||||
Opc = X86::MOV8rm;
|
||||
} else if (RC == &X86::GR64_TCRegClass) {
|
||||
Opc = X86::MOV64rm_TC;
|
||||
} else if (RC == &X86::GR32_TCRegClass) {
|
||||
Opc = X86::MOV32rm_TC;
|
||||
} else if (RC == &X86::RFP80RegClass) {
|
||||
Opc = X86::LD_Fp80m;
|
||||
} else if (RC == &X86::RFP64RegClass) {
|
||||
|
@ -234,6 +234,15 @@ def i8mem_NOREX : Operand<i64> {
|
||||
let ParserMatchClass = X86MemAsmOperand;
|
||||
}
|
||||
|
||||
// Special i32mem for addresses of load folding tail calls. These are not
|
||||
// allowed to use callee-saved registers since they must be scheduled
|
||||
// after callee-saved register are popped.
|
||||
def i32mem_TC : Operand<i32> {
|
||||
let PrintMethod = "printi32mem";
|
||||
let MIOperandInfo = (ops GR32_TC, i8imm, GR32_TC, i32imm, i8imm);
|
||||
let ParserMatchClass = X86MemAsmOperand;
|
||||
}
|
||||
|
||||
def lea32mem : Operand<i32> {
|
||||
let PrintMethod = "printlea32mem";
|
||||
let MIOperandInfo = (ops GR32, i8imm, GR32_NOSP, i32imm);
|
||||
@ -696,30 +705,33 @@ def ENTER : I<0xC8, RawFrm, (outs), (ins i16imm:$len, i8imm:$lvl),
|
||||
// Tail call stuff.
|
||||
|
||||
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
|
||||
def TCRETURNdi : I<0, Pseudo, (outs),
|
||||
(ins i32imm:$dst, i32imm:$offset, variable_ops),
|
||||
"#TC_RETURN $dst $offset",
|
||||
[]>;
|
||||
let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
|
||||
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
|
||||
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
|
||||
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
|
||||
Uses = [ESP] in {
|
||||
def TCRETURNdi : I<0, Pseudo, (outs),
|
||||
(ins i32imm_pcrel:$dst, i32imm:$offset, variable_ops),
|
||||
"#TC_RETURN $dst $offset", []>;
|
||||
def TCRETURNri : I<0, Pseudo, (outs),
|
||||
(ins GR32_TC:$dst, i32imm:$offset, variable_ops),
|
||||
"#TC_RETURN $dst $offset", []>;
|
||||
def TCRETURNmi : I<0, Pseudo, (outs),
|
||||
(ins i32mem_TC:$dst, i32imm:$offset, variable_ops),
|
||||
"#TC_RETURN $dst $offset", []>;
|
||||
|
||||
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
|
||||
def TCRETURNri : I<0, Pseudo, (outs),
|
||||
(ins GR32:$dst, i32imm:$offset, variable_ops),
|
||||
"#TC_RETURN $dst $offset",
|
||||
[]>;
|
||||
|
||||
// FIXME: The should be pseudo instructions that are lowered when going to
|
||||
// mcinst.
|
||||
let isCall = 1, isBranch = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
|
||||
def TAILJMPd : Ii32<0xE9, RawFrm, (outs),(ins i32imm_pcrel:$dst,variable_ops),
|
||||
// FIXME: The should be pseudo instructions that are lowered when going to
|
||||
// mcinst.
|
||||
def TAILJMPd : Ii32<0xE9, RawFrm, (outs),
|
||||
(ins i32imm_pcrel:$dst, variable_ops),
|
||||
"jmp\t$dst # TAILCALL",
|
||||
[]>;
|
||||
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
|
||||
def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32:$dst, variable_ops),
|
||||
def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32_TC:$dst, variable_ops),
|
||||
"jmp{l}\t{*}$dst # TAILCALL",
|
||||
[]>;
|
||||
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
|
||||
def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem:$dst, variable_ops),
|
||||
"jmp\t{*}$dst # TAILCALL", []>;
|
||||
def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst, variable_ops),
|
||||
"jmp{l}\t{*}$dst # TAILCALL", []>;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// Miscellaneous Instructions...
|
||||
@ -1032,6 +1044,22 @@ def MOV32mr : I<0x89, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
|
||||
"mov{l}\t{$src, $dst|$dst, $src}",
|
||||
[(store GR32:$src, addr:$dst)]>;
|
||||
|
||||
/// Versions of MOV32rr, MOV32rm, and MOV32mr for i32mem_TC and GR32_TC.
|
||||
let neverHasSideEffects = 1 in
|
||||
def MOV32rr_TC : I<0x89, MRMDestReg, (outs GR32_TC:$dst), (ins GR32_TC:$src),
|
||||
"mov{l}\t{$src, $dst|$dst, $src}", []>;
|
||||
|
||||
let mayLoad = 1,
|
||||
canFoldAsLoad = 1, isReMaterializable = 1 in
|
||||
def MOV32rm_TC : I<0x8B, MRMSrcMem, (outs GR32_TC:$dst), (ins i32mem_TC:$src),
|
||||
"mov{l}\t{$src, $dst|$dst, $src}",
|
||||
[]>;
|
||||
|
||||
let mayStore = 1 in
|
||||
def MOV32mr_TC : I<0x89, MRMDestMem, (outs), (ins i32mem_TC:$dst, GR32_TC:$src),
|
||||
"mov{l}\t{$src, $dst|$dst, $src}",
|
||||
[]>;
|
||||
|
||||
// Versions of MOV8rr, MOV8mr, and MOV8rm that use i8mem_NOREX and GR8_NOREX so
|
||||
// that they can be used for copying and storing h registers, which can't be
|
||||
// encoded when a REX prefix is present.
|
||||
@ -4294,14 +4322,21 @@ def : Pat<(store (i32 (X86Wrapper tblockaddress:$src)), addr:$dst),
|
||||
|
||||
// Calls
|
||||
// tailcall stuff
|
||||
def : Pat<(X86tcret GR32:$dst, imm:$off),
|
||||
(TCRETURNri GR32:$dst, imm:$off)>;
|
||||
def : Pat<(X86tcret GR32_TC:$dst, imm:$off),
|
||||
(TCRETURNri GR32_TC:$dst, imm:$off)>,
|
||||
Requires<[In32BitMode]>;
|
||||
|
||||
def : Pat<(X86tcret (load addr:$dst), imm:$off),
|
||||
(TCRETURNmi addr:$dst, imm:$off)>,
|
||||
Requires<[In32BitMode]>;
|
||||
|
||||
def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),
|
||||
(TCRETURNdi texternalsym:$dst, imm:$off)>;
|
||||
(TCRETURNdi texternalsym:$dst, imm:$off)>,
|
||||
Requires<[In32BitMode]>;
|
||||
|
||||
def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),
|
||||
(TCRETURNdi texternalsym:$dst, imm:$off)>;
|
||||
(TCRETURNdi texternalsym:$dst, imm:$off)>,
|
||||
Requires<[In32BitMode]>;
|
||||
|
||||
// Normal calls, with various flavors of addresses.
|
||||
def : Pat<(X86call (i32 tglobaladdr:$dst)),
|
||||
|
@ -1138,13 +1138,12 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
|
||||
case X86::RETI:
|
||||
case X86::TCRETURNdi:
|
||||
case X86::TCRETURNri:
|
||||
case X86::TCRETURNri64:
|
||||
case X86::TCRETURNmi:
|
||||
case X86::TCRETURNdi64:
|
||||
case X86::TCRETURNri64:
|
||||
case X86::TCRETURNmi64:
|
||||
case X86::EH_RETURN:
|
||||
case X86::EH_RETURN64:
|
||||
case X86::TAILJMPd:
|
||||
case X86::TAILJMPr:
|
||||
case X86::TAILJMPm:
|
||||
break; // These are ok
|
||||
}
|
||||
|
||||
@ -1229,11 +1228,14 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
|
||||
TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
|
||||
StackPtr).addReg(DestAddr.getReg());
|
||||
} else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi ||
|
||||
RetOpcode== X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64) {
|
||||
RetOpcode == X86::TCRETURNmi ||
|
||||
RetOpcode == X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64 ||
|
||||
RetOpcode == X86::TCRETURNmi64) {
|
||||
bool isMem = RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64;
|
||||
// Tail call return: adjust the stack pointer and jump to callee.
|
||||
MBBI = prior(MBB.end());
|
||||
MachineOperand &JumpTarget = MBBI->getOperand(0);
|
||||
MachineOperand &StackAdjust = MBBI->getOperand(1);
|
||||
MachineOperand &StackAdjust = MBBI->getOperand(isMem ? 5 : 1);
|
||||
assert(StackAdjust.isImm() && "Expecting immediate value.");
|
||||
|
||||
// Adjust stack pointer.
|
||||
@ -1253,10 +1255,17 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
|
||||
}
|
||||
|
||||
// Jump to label or value in register.
|
||||
if (RetOpcode == X86::TCRETURNdi|| RetOpcode == X86::TCRETURNdi64) {
|
||||
BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPd)).
|
||||
if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64) {
|
||||
BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNdi)
|
||||
? X86::TAILJMPd : X86::TAILJMPd64)).
|
||||
addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
|
||||
JumpTarget.getTargetFlags());
|
||||
} else if (RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64) {
|
||||
MachineInstrBuilder MIB =
|
||||
BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNmi)
|
||||
? X86::TAILJMPm : X86::TAILJMPm64));
|
||||
for (unsigned i = 0; i != 5; ++i)
|
||||
MIB.addOperand(MBBI->getOperand(i));
|
||||
} else if (RetOpcode == X86::TCRETURNri64) {
|
||||
BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64), JumpTarget.getReg());
|
||||
} else {
|
||||
|
@ -535,6 +535,13 @@ def GR32_ABCD : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX, EBX]> {
|
||||
def GR64_ABCD : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RBX]> {
|
||||
let SubRegClassList = [GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD];
|
||||
}
|
||||
def GR32_TC : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX]> {
|
||||
let SubRegClassList = [GR8, GR8, GR16];
|
||||
}
|
||||
def GR64_TC : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RSI, RDI,
|
||||
R8, R9, R11]> {
|
||||
let SubRegClassList = [GR8, GR8, GR16, GR32_TC];
|
||||
}
|
||||
|
||||
// GR8_NOREX - GR8 registers which do not require a REX prefix.
|
||||
def GR8_NOREX : RegisterClass<"X86", [i8], 8,
|
||||
|
@ -20,7 +20,7 @@ define fastcc i32 @indirect_manyargs(i32(i32,i32,i32,i32,i32,i32,i32)* %target)
|
||||
; CHECK: subq $8, %rsp
|
||||
; Put the call target into R11, which won't be clobbered while restoring
|
||||
; callee-saved registers and won't be used for passing arguments.
|
||||
; CHECK: movq %rdi, %r11
|
||||
; CHECK: movq %rdi, %rax
|
||||
; Pass the stack argument.
|
||||
; CHECK: movl $7, 16(%rsp)
|
||||
; Pass the register arguments, in the right registers.
|
||||
@ -33,7 +33,7 @@ define fastcc i32 @indirect_manyargs(i32(i32,i32,i32,i32,i32,i32,i32)* %target)
|
||||
; Adjust the stack to "return".
|
||||
; CHECK: addq $8, %rsp
|
||||
; And tail-call to the target.
|
||||
; CHECK: jmpq *%r11 # TAILCALL
|
||||
; CHECK: jmpq *%rax # TAILCALL
|
||||
%res = tail call fastcc i32 %target(i32 1, i32 2, i32 3, i32 4, i32 5,
|
||||
i32 6, i32 7)
|
||||
ret i32 %res
|
||||
@ -60,11 +60,11 @@ define fastcc i32 @direct_manyargs() {
|
||||
; the jmp instruction. Put it into R11, which won't be clobbered
|
||||
; while restoring callee-saved registers and won't be used for passing
|
||||
; arguments.
|
||||
; CHECK: movabsq $manyargs_callee, %r11
|
||||
; CHECK: movabsq $manyargs_callee, %rax
|
||||
; Adjust the stack to "return".
|
||||
; CHECK: addq $8, %rsp
|
||||
; And tail-call to the target.
|
||||
; CHECK: jmpq *%r11 # TAILCALL
|
||||
; CHECK: jmpq *%rax # TAILCALL
|
||||
%res = tail call fastcc i32 @manyargs_callee(i32 1, i32 2, i32 3, i32 4,
|
||||
i32 5, i32 6, i32 7)
|
||||
ret i32 %res
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc < %s -march=x86 -tailcallopt | grep {jmp} | grep {\\*%eax}
|
||||
; RUN: llc < %s -march=x86 -tailcallopt | grep {jmp} | grep {\\*%edx}
|
||||
|
||||
declare i32 @putchar(i32)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user