Do not force indirect tailcall through fixed registers: eax, r11. Add support to allow loads to be folded to tail call instructions.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@98465 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Evan Cheng 2010-03-14 03:48:46 +00:00
parent d3da36286f
commit f48ef03655
9 changed files with 185 additions and 89 deletions

View File

@ -349,17 +349,17 @@ X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
return true;
}
/// MoveBelowCallSeqStart - Replace CALLSEQ_START operand with load's chain
/// operand and move load below the call's chain operand.
static void MoveBelowCallSeqStart(SelectionDAG *CurDAG, SDValue Load,
SDValue Call, SDValue CallSeqStart) {
/// MoveBelowCallOrigChain - Replace the original chain operand of the call with
/// load's chain operand and move load below the call's chain operand.
static void MoveBelowOrigChain(SelectionDAG *CurDAG, SDValue Load,
SDValue Call, SDValue OrigChain) {
SmallVector<SDValue, 8> Ops;
SDValue Chain = CallSeqStart.getOperand(0);
SDValue Chain = OrigChain.getOperand(0);
if (Chain.getNode() == Load.getNode())
Ops.push_back(Load.getOperand(0));
else {
assert(Chain.getOpcode() == ISD::TokenFactor &&
"Unexpected CallSeqStart chain operand");
"Unexpected chain operand");
for (unsigned i = 0, e = Chain.getNumOperands(); i != e; ++i)
if (Chain.getOperand(i).getNode() == Load.getNode())
Ops.push_back(Load.getOperand(0));
@ -371,9 +371,9 @@ static void MoveBelowCallSeqStart(SelectionDAG *CurDAG, SDValue Load,
Ops.clear();
Ops.push_back(NewChain);
}
for (unsigned i = 1, e = CallSeqStart.getNumOperands(); i != e; ++i)
Ops.push_back(CallSeqStart.getOperand(i));
CurDAG->UpdateNodeOperands(CallSeqStart, &Ops[0], Ops.size());
for (unsigned i = 1, e = OrigChain.getNumOperands(); i != e; ++i)
Ops.push_back(OrigChain.getOperand(i));
CurDAG->UpdateNodeOperands(OrigChain, &Ops[0], Ops.size());
CurDAG->UpdateNodeOperands(Load, Call.getOperand(0),
Load.getOperand(1), Load.getOperand(2));
Ops.clear();
@ -386,7 +386,9 @@ static void MoveBelowCallSeqStart(SelectionDAG *CurDAG, SDValue Load,
/// isCalleeLoad - Return true if call address is a load and it can be
/// moved below CALLSEQ_START and the chains leading up to the call.
/// Return the CALLSEQ_START by reference as a second output.
static bool isCalleeLoad(SDValue Callee, SDValue &Chain) {
/// In the case of a tail call, there isn't a callseq node between the call
/// chain and the load.
static bool isCalleeLoad(SDValue Callee, SDValue &Chain, bool HasCallSeq) {
if (Callee.getNode() == Chain.getNode() || !Callee.hasOneUse())
return false;
LoadSDNode *LD = dyn_cast<LoadSDNode>(Callee.getNode());
@ -397,12 +399,14 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain) {
return false;
// Now let's find the callseq_start.
while (Chain.getOpcode() != ISD::CALLSEQ_START) {
while (HasCallSeq && Chain.getOpcode() != ISD::CALLSEQ_START) {
if (!Chain.hasOneUse())
return false;
Chain = Chain.getOperand(0);
}
if (!Chain.getNumOperands())
return false;
if (Chain.getOperand(0).getNode() == Callee.getNode())
return true;
if (Chain.getOperand(0).getOpcode() == ISD::TokenFactor &&
@ -420,7 +424,9 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
E = CurDAG->allnodes_end(); I != E; ) {
SDNode *N = I++; // Preincrement iterator to avoid invalidation issues.
if (OptLevel != CodeGenOpt::None && N->getOpcode() == X86ISD::CALL) {
if (OptLevel != CodeGenOpt::None &&
(N->getOpcode() == X86ISD::CALL ||
N->getOpcode() == X86ISD::TC_RETURN)) {
/// Also try moving call address load from outside callseq_start to just
/// before the call to allow it to be folded.
///
@ -440,11 +446,12 @@ void X86DAGToDAGISel::PreprocessISelDAG() {
/// \ /
/// \ /
/// [CALL]
bool HasCallSeq = N->getOpcode() == X86ISD::CALL;
SDValue Chain = N->getOperand(0);
SDValue Load = N->getOperand(1);
if (!isCalleeLoad(Load, Chain))
if (!isCalleeLoad(Load, Chain, HasCallSeq))
continue;
MoveBelowCallSeqStart(CurDAG, Load, SDValue(N, 0), Chain);
MoveBelowOrigChain(CurDAG, Load, SDValue(N, 0), Chain);
++NumLoadMoved;
continue;
}

View File

@ -2133,18 +2133,6 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
OpFlags);
}
if (isTailCall && !WasGlobalOrExternal) {
// Force the address into a (call preserved) caller-saved register since
// tailcall must happen after callee-saved registers are poped.
// FIXME: Give it a special register class that contains caller-saved
// register instead?
unsigned TCReg = Is64Bit ? X86::R11 : X86::EAX;
Chain = DAG.getCopyToReg(Chain, dl,
DAG.getRegister(TCReg, getPointerTy()),
Callee,InFlag);
Callee = DAG.getRegister(TCReg, getPointerTy());
}
// Returns a chain & a flag for retval copy to use.
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
SmallVector<SDValue, 8> Ops;
@ -2190,14 +2178,6 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
if (RVLocs[i].isRegLoc())
MF.getRegInfo().addLiveOut(RVLocs[i].getLocReg());
}
assert(((Callee.getOpcode() == ISD::Register &&
(cast<RegisterSDNode>(Callee)->getReg() == X86::EAX ||
cast<RegisterSDNode>(Callee)->getReg() == X86::R11)) ||
Callee.getOpcode() == ISD::TargetExternalSymbol ||
Callee.getOpcode() == ISD::TargetGlobalAddress) &&
"Expecting a global address, external symbol, or scratch register");
return DAG.getNode(X86ISD::TC_RETURN, dl,
NodeTys, &Ops[0], Ops.size());
}

View File

@ -33,6 +33,15 @@ def i64i8imm : Operand<i64> {
let ParserMatchClass = ImmSExt8AsmOperand;
}
// Special i64mem for addresses of load folding tail calls. These are not
// allowed to use callee-saved registers since they must be scheduled
// after callee-saved register are popped.
def i64mem_TC : Operand<i64> {
let PrintMethod = "printi64mem";
let MIOperandInfo = (ops GR64_TC, i8imm, GR64_TC, i32imm, i8imm);
let ParserMatchClass = X86MemAsmOperand;
}
def lea64mem : Operand<i64> {
let PrintMethod = "printlea64mem";
let MIOperandInfo = (ops GR64, i8imm, GR64_NOSP, i32imm);
@ -177,22 +186,31 @@ let isCall = 1 in
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
def TCRETURNdi64 : I<0, Pseudo, (outs), (ins i64imm:$dst, i32imm:$offset,
variable_ops),
"#TC_RETURN $dst $offset",
[]>;
let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0, ST1,
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
Uses = [RSP] in {
def TCRETURNdi64 : I<0, Pseudo, (outs),
(ins i64i32imm_pcrel:$dst, i32imm:$offset, variable_ops),
"#TC_RETURN $dst $offset", []>;
def TCRETURNri64 : I<0, Pseudo, (outs), (ins GR64_TC:$dst, i32imm:$offset,
variable_ops),
"#TC_RETURN $dst $offset", []>;
def TCRETURNmi64 : I<0, Pseudo, (outs),
(ins i64mem_TC:$dst, i32imm:$offset, variable_ops),
"#TC_RETURN $dst $offset", []>;
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
def TCRETURNri64 : I<0, Pseudo, (outs), (ins GR64:$dst, i32imm:$offset,
variable_ops),
"#TC_RETURN $dst $offset",
[]>;
def TAILJMPd64 : Ii32<0xE9, RawFrm, (outs),
(ins i64i32imm_pcrel:$dst, variable_ops),
"jmp\t$dst # TAILCALL", []>;
def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins GR64_TC:$dst, variable_ops),
"jmp{q}\t{*}$dst # TAILCALL", []>;
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
def TAILJMPr64 : I<0xFF, MRM4r, (outs), (ins GR64:$dst, variable_ops),
"jmp{q}\t{*}$dst # TAILCALL",
[]>;
def TAILJMPm64 : I<0xff, MRM4m, (outs), (ins i64mem_TC:$dst, variable_ops),
"jmp{q}\t{*}$dst # TAILCALL", []>;
}
// Branches
let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
@ -340,6 +358,22 @@ def MOV64mi32 : RIi32<0xC7, MRM0m, (outs), (ins i64mem:$dst, i64i32imm:$src),
"mov{q}\t{$src, $dst|$dst, $src}",
[(store i64immSExt32:$src, addr:$dst)]>;
/// Versions of MOV64rr, MOV64rm, and MOV64mr for i64mem_TC and GR64_TC.
let neverHasSideEffects = 1 in
def MOV64rr_TC : I<0x89, MRMDestReg, (outs GR64_TC:$dst), (ins GR64_TC:$src),
"mov{q}\t{$src, $dst|$dst, $src}", []>;
let mayLoad = 1,
canFoldAsLoad = 1, isReMaterializable = 1 in
def MOV64rm_TC : I<0x8B, MRMSrcMem, (outs GR64_TC:$dst), (ins i64mem_TC:$src),
"mov{q}\t{$src, $dst|$dst, $src}",
[]>;
let mayStore = 1 in
def MOV64mr_TC : I<0x89, MRMDestMem, (outs), (ins i64mem_TC:$dst, GR64_TC:$src),
"mov{q}\t{$src, $dst|$dst, $src}",
[]>;
def MOV64o8a : RIi8<0xA0, RawFrm, (outs), (ins offset8:$src),
"mov{q}\t{$src, %rax|%rax, $src}", []>;
def MOV64o64a : RIi32<0xA1, RawFrm, (outs), (ins offset64:$src),
@ -1885,14 +1919,21 @@ def : Pat<(X86call (i64 texternalsym:$dst)),
(WINCALL64pcrel32 texternalsym:$dst)>, Requires<[IsWin64]>;
// tailcall stuff
def : Pat<(X86tcret GR64:$dst, imm:$off),
(TCRETURNri64 GR64:$dst, imm:$off)>;
def : Pat<(X86tcret GR64_TC:$dst, imm:$off),
(TCRETURNri64 GR64_TC:$dst, imm:$off)>,
Requires<[In64BitMode]>;
def : Pat<(X86tcret (load addr:$dst), imm:$off),
(TCRETURNmi64 addr:$dst, imm:$off)>,
Requires<[In64BitMode]>;
def : Pat<(X86tcret (i64 tglobaladdr:$dst), imm:$off),
(TCRETURNdi64 tglobaladdr:$dst, imm:$off)>;
(TCRETURNdi64 tglobaladdr:$dst, imm:$off)>,
Requires<[In64BitMode]>;
def : Pat<(X86tcret (i64 texternalsym:$dst), imm:$off),
(TCRETURNdi64 texternalsym:$dst, imm:$off)>;
(TCRETURNdi64 texternalsym:$dst, imm:$off)>,
Requires<[In64BitMode]>;
// Comparisons.

View File

@ -266,6 +266,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::MOV16rr, X86::MOV16mr, 0, 0 },
{ X86::MOV32ri, X86::MOV32mi, 0, 0 },
{ X86::MOV32rr, X86::MOV32mr, 0, 0 },
{ X86::MOV32rr_TC, X86::MOV32mr_TC, 0, 0 },
{ X86::MOV64ri32, X86::MOV64mi32, 0, 0 },
{ X86::MOV64rr, X86::MOV64mr, 0, 0 },
{ X86::MOV8ri, X86::MOV8mi, 0, 0 },
@ -301,6 +302,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::SETPr, X86::SETPm, 0, 0 },
{ X86::SETSr, X86::SETSm, 0, 0 },
{ X86::TAILJMPr, X86::TAILJMPm, 1, 0 },
{ X86::TAILJMPr64, X86::TAILJMPm64, 1, 0 },
{ X86::TEST16ri, X86::TEST16mi, 1, 0 },
{ X86::TEST32ri, X86::TEST32mi, 1, 0 },
{ X86::TEST64ri32, X86::TEST64mi32, 1, 0 },
@ -376,6 +378,7 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::Int_UCOMISSrr, X86::Int_UCOMISSrm, 0 },
{ X86::MOV16rr, X86::MOV16rm, 0 },
{ X86::MOV32rr, X86::MOV32rm, 0 },
{ X86::MOV32rr_TC, X86::MOV32rm_TC, 0 },
{ X86::MOV64rr, X86::MOV64rm, 0 },
{ X86::MOV64toPQIrr, X86::MOVQI2PQIrm, 0 },
{ X86::MOV64toSDrr, X86::MOV64toSDrm, 0 },
@ -675,6 +678,8 @@ bool X86InstrInfo::isMoveInstr(const MachineInstr& MI,
case X86::MOV16rr:
case X86::MOV32rr:
case X86::MOV64rr:
case X86::MOV32rr_TC:
case X86::MOV64rr_TC:
// FP Stack register class copies
case X86::MOV_Fp3232: case X86::MOV_Fp6464: case X86::MOV_Fp8080:
@ -1901,6 +1906,10 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
Opc = X86::MOV16rr;
} else if (CommonRC == &X86::GR8_NOREXRegClass) {
Opc = X86::MOV8rr;
} else if (CommonRC == &X86::GR64_TCRegClass) {
Opc = X86::MOV64rr_TC;
} else if (CommonRC == &X86::GR32_TCRegClass) {
Opc = X86::MOV32rr_TC;
} else if (CommonRC == &X86::RFP32RegClass) {
Opc = X86::MOV_Fp3232;
} else if (CommonRC == &X86::RFP64RegClass || CommonRC == &X86::RSTRegClass) {
@ -2038,6 +2047,10 @@ static unsigned getStoreRegOpcode(unsigned SrcReg,
Opc = X86::MOV16mr;
} else if (RC == &X86::GR8_NOREXRegClass) {
Opc = X86::MOV8mr;
} else if (RC == &X86::GR64_TCRegClass) {
Opc = X86::MOV64mr_TC;
} else if (RC == &X86::GR32_TCRegClass) {
Opc = X86::MOV32mr_TC;
} else if (RC == &X86::RFP80RegClass) {
Opc = X86::ST_FpP80m; // pops
} else if (RC == &X86::RFP64RegClass) {
@ -2131,6 +2144,10 @@ static unsigned getLoadRegOpcode(unsigned DestReg,
Opc = X86::MOV16rm;
} else if (RC == &X86::GR8_NOREXRegClass) {
Opc = X86::MOV8rm;
} else if (RC == &X86::GR64_TCRegClass) {
Opc = X86::MOV64rm_TC;
} else if (RC == &X86::GR32_TCRegClass) {
Opc = X86::MOV32rm_TC;
} else if (RC == &X86::RFP80RegClass) {
Opc = X86::LD_Fp80m;
} else if (RC == &X86::RFP64RegClass) {

View File

@ -234,6 +234,15 @@ def i8mem_NOREX : Operand<i64> {
let ParserMatchClass = X86MemAsmOperand;
}
// Special i32mem for addresses of load folding tail calls. These are not
// allowed to use callee-saved registers since they must be scheduled
// after callee-saved register are popped.
def i32mem_TC : Operand<i32> {
let PrintMethod = "printi32mem";
let MIOperandInfo = (ops GR32_TC, i8imm, GR32_TC, i32imm, i8imm);
let ParserMatchClass = X86MemAsmOperand;
}
def lea32mem : Operand<i32> {
let PrintMethod = "printlea32mem";
let MIOperandInfo = (ops GR32, i8imm, GR32_NOSP, i32imm);
@ -696,30 +705,33 @@ def ENTER : I<0xC8, RawFrm, (outs), (ins i16imm:$len, i8imm:$lvl),
// Tail call stuff.
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
def TCRETURNdi : I<0, Pseudo, (outs),
(ins i32imm:$dst, i32imm:$offset, variable_ops),
"#TC_RETURN $dst $offset",
[]>;
let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
Uses = [ESP] in {
def TCRETURNdi : I<0, Pseudo, (outs),
(ins i32imm_pcrel:$dst, i32imm:$offset, variable_ops),
"#TC_RETURN $dst $offset", []>;
def TCRETURNri : I<0, Pseudo, (outs),
(ins GR32_TC:$dst, i32imm:$offset, variable_ops),
"#TC_RETURN $dst $offset", []>;
def TCRETURNmi : I<0, Pseudo, (outs),
(ins i32mem_TC:$dst, i32imm:$offset, variable_ops),
"#TC_RETURN $dst $offset", []>;
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
def TCRETURNri : I<0, Pseudo, (outs),
(ins GR32:$dst, i32imm:$offset, variable_ops),
"#TC_RETURN $dst $offset",
[]>;
// FIXME: The should be pseudo instructions that are lowered when going to
// mcinst.
let isCall = 1, isBranch = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
def TAILJMPd : Ii32<0xE9, RawFrm, (outs),(ins i32imm_pcrel:$dst,variable_ops),
// FIXME: The should be pseudo instructions that are lowered when going to
// mcinst.
def TAILJMPd : Ii32<0xE9, RawFrm, (outs),
(ins i32imm_pcrel:$dst, variable_ops),
"jmp\t$dst # TAILCALL",
[]>;
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32:$dst, variable_ops),
def TAILJMPr : I<0xFF, MRM4r, (outs), (ins GR32_TC:$dst, variable_ops),
"jmp{l}\t{*}$dst # TAILCALL",
[]>;
let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1 in
def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem:$dst, variable_ops),
"jmp\t{*}$dst # TAILCALL", []>;
def TAILJMPm : I<0xFF, MRM4m, (outs), (ins i32mem_TC:$dst, variable_ops),
"jmp{l}\t{*}$dst # TAILCALL", []>;
}
//===----------------------------------------------------------------------===//
// Miscellaneous Instructions...
@ -1032,6 +1044,22 @@ def MOV32mr : I<0x89, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
"mov{l}\t{$src, $dst|$dst, $src}",
[(store GR32:$src, addr:$dst)]>;
/// Versions of MOV32rr, MOV32rm, and MOV32mr for i32mem_TC and GR32_TC.
let neverHasSideEffects = 1 in
def MOV32rr_TC : I<0x89, MRMDestReg, (outs GR32_TC:$dst), (ins GR32_TC:$src),
"mov{l}\t{$src, $dst|$dst, $src}", []>;
let mayLoad = 1,
canFoldAsLoad = 1, isReMaterializable = 1 in
def MOV32rm_TC : I<0x8B, MRMSrcMem, (outs GR32_TC:$dst), (ins i32mem_TC:$src),
"mov{l}\t{$src, $dst|$dst, $src}",
[]>;
let mayStore = 1 in
def MOV32mr_TC : I<0x89, MRMDestMem, (outs), (ins i32mem_TC:$dst, GR32_TC:$src),
"mov{l}\t{$src, $dst|$dst, $src}",
[]>;
// Versions of MOV8rr, MOV8mr, and MOV8rm that use i8mem_NOREX and GR8_NOREX so
// that they can be used for copying and storing h registers, which can't be
// encoded when a REX prefix is present.
@ -4294,14 +4322,21 @@ def : Pat<(store (i32 (X86Wrapper tblockaddress:$src)), addr:$dst),
// Calls
// tailcall stuff
def : Pat<(X86tcret GR32:$dst, imm:$off),
(TCRETURNri GR32:$dst, imm:$off)>;
def : Pat<(X86tcret GR32_TC:$dst, imm:$off),
(TCRETURNri GR32_TC:$dst, imm:$off)>,
Requires<[In32BitMode]>;
def : Pat<(X86tcret (load addr:$dst), imm:$off),
(TCRETURNmi addr:$dst, imm:$off)>,
Requires<[In32BitMode]>;
def : Pat<(X86tcret (i32 tglobaladdr:$dst), imm:$off),
(TCRETURNdi texternalsym:$dst, imm:$off)>;
(TCRETURNdi texternalsym:$dst, imm:$off)>,
Requires<[In32BitMode]>;
def : Pat<(X86tcret (i32 texternalsym:$dst), imm:$off),
(TCRETURNdi texternalsym:$dst, imm:$off)>;
(TCRETURNdi texternalsym:$dst, imm:$off)>,
Requires<[In32BitMode]>;
// Normal calls, with various flavors of addresses.
def : Pat<(X86call (i32 tglobaladdr:$dst)),

View File

@ -1138,13 +1138,12 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
case X86::RETI:
case X86::TCRETURNdi:
case X86::TCRETURNri:
case X86::TCRETURNri64:
case X86::TCRETURNmi:
case X86::TCRETURNdi64:
case X86::TCRETURNri64:
case X86::TCRETURNmi64:
case X86::EH_RETURN:
case X86::EH_RETURN64:
case X86::TAILJMPd:
case X86::TAILJMPr:
case X86::TAILJMPm:
break; // These are ok
}
@ -1229,11 +1228,14 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
TII.get(Is64Bit ? X86::MOV64rr : X86::MOV32rr),
StackPtr).addReg(DestAddr.getReg());
} else if (RetOpcode == X86::TCRETURNri || RetOpcode == X86::TCRETURNdi ||
RetOpcode== X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64) {
RetOpcode == X86::TCRETURNmi ||
RetOpcode == X86::TCRETURNri64 || RetOpcode == X86::TCRETURNdi64 ||
RetOpcode == X86::TCRETURNmi64) {
bool isMem = RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64;
// Tail call return: adjust the stack pointer and jump to callee.
MBBI = prior(MBB.end());
MachineOperand &JumpTarget = MBBI->getOperand(0);
MachineOperand &StackAdjust = MBBI->getOperand(1);
MachineOperand &StackAdjust = MBBI->getOperand(isMem ? 5 : 1);
assert(StackAdjust.isImm() && "Expecting immediate value.");
// Adjust stack pointer.
@ -1253,10 +1255,17 @@ void X86RegisterInfo::emitEpilogue(MachineFunction &MF,
}
// Jump to label or value in register.
if (RetOpcode == X86::TCRETURNdi|| RetOpcode == X86::TCRETURNdi64) {
BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPd)).
if (RetOpcode == X86::TCRETURNdi || RetOpcode == X86::TCRETURNdi64) {
BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNdi)
? X86::TAILJMPd : X86::TAILJMPd64)).
addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
JumpTarget.getTargetFlags());
} else if (RetOpcode == X86::TCRETURNmi || RetOpcode == X86::TCRETURNmi64) {
MachineInstrBuilder MIB =
BuildMI(MBB, MBBI, DL, TII.get((RetOpcode == X86::TCRETURNmi)
? X86::TAILJMPm : X86::TAILJMPm64));
for (unsigned i = 0; i != 5; ++i)
MIB.addOperand(MBBI->getOperand(i));
} else if (RetOpcode == X86::TCRETURNri64) {
BuildMI(MBB, MBBI, DL, TII.get(X86::TAILJMPr64), JumpTarget.getReg());
} else {

View File

@ -535,6 +535,13 @@ def GR32_ABCD : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX, EBX]> {
def GR64_ABCD : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RBX]> {
let SubRegClassList = [GR8_ABCD_L, GR8_ABCD_H, GR16_ABCD, GR32_ABCD];
}
def GR32_TC : RegisterClass<"X86", [i32], 32, [EAX, ECX, EDX]> {
let SubRegClassList = [GR8, GR8, GR16];
}
def GR64_TC : RegisterClass<"X86", [i64], 64, [RAX, RCX, RDX, RSI, RDI,
R8, R9, R11]> {
let SubRegClassList = [GR8, GR8, GR16, GR32_TC];
}
// GR8_NOREX - GR8 registers which do not require a REX prefix.
def GR8_NOREX : RegisterClass<"X86", [i8], 8,

View File

@ -20,7 +20,7 @@ define fastcc i32 @indirect_manyargs(i32(i32,i32,i32,i32,i32,i32,i32)* %target)
; CHECK: subq $8, %rsp
; Put the call target into R11, which won't be clobbered while restoring
; callee-saved registers and won't be used for passing arguments.
; CHECK: movq %rdi, %r11
; CHECK: movq %rdi, %rax
; Pass the stack argument.
; CHECK: movl $7, 16(%rsp)
; Pass the register arguments, in the right registers.
@ -33,7 +33,7 @@ define fastcc i32 @indirect_manyargs(i32(i32,i32,i32,i32,i32,i32,i32)* %target)
; Adjust the stack to "return".
; CHECK: addq $8, %rsp
; And tail-call to the target.
; CHECK: jmpq *%r11 # TAILCALL
; CHECK: jmpq *%rax # TAILCALL
%res = tail call fastcc i32 %target(i32 1, i32 2, i32 3, i32 4, i32 5,
i32 6, i32 7)
ret i32 %res
@ -60,11 +60,11 @@ define fastcc i32 @direct_manyargs() {
; the jmp instruction. Put it into R11, which won't be clobbered
; while restoring callee-saved registers and won't be used for passing
; arguments.
; CHECK: movabsq $manyargs_callee, %r11
; CHECK: movabsq $manyargs_callee, %rax
; Adjust the stack to "return".
; CHECK: addq $8, %rsp
; And tail-call to the target.
; CHECK: jmpq *%r11 # TAILCALL
; CHECK: jmpq *%rax # TAILCALL
%res = tail call fastcc i32 @manyargs_callee(i32 1, i32 2, i32 3, i32 4,
i32 5, i32 6, i32 7)
ret i32 %res

View File

@ -1,4 +1,4 @@
; RUN: llc < %s -march=x86 -tailcallopt | grep {jmp} | grep {\\*%eax}
; RUN: llc < %s -march=x86 -tailcallopt | grep {jmp} | grep {\\*%edx}
declare i32 @putchar(i32)