diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 06cf9af80b6..e430b07d143 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -775,6 +775,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::CALL_NOP_TLS: return "PPCISD::CALL_NOP_TLS"; case PPCISD::MTCTR: return "PPCISD::MTCTR"; case PPCISD::BCTRL: return "PPCISD::BCTRL"; + case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC"; case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG"; case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE"; case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP"; @@ -3864,7 +3865,6 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, // stack frame. If caller and callee belong to the same module (and have the // same TOC), the NOP will remain unchanged. - bool needsTOCRestore = false; if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64()) { if (CallOpc == PPCISD::BCTRL) { // This is a call through a function pointer. @@ -3876,7 +3876,17 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, // since r2 is a reserved register (which prevents the register allocator // from allocating it), resulting in an additional register being // allocated and an unnecessary move instruction being generated. - needsTOCRestore = true; + CallOpc = PPCISD::BCTRL_LOAD_TOC; + + EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); + SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT); + unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset(isELFv2ABI); + SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset); + SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff); + + // The address needs to go after the chain input but before the flag (or + // any other variadic arguments). + Ops.insert(std::next(Ops.begin()), AddTOC); } else if ((CallOpc == PPCISD::CALL) && (!isLocalCall(Callee) || DAG.getTarget().getRelocationModel() == Reloc::PIC_)) { @@ -3890,17 +3900,6 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl, Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops); InFlag = Chain.getValue(1); - if (needsTOCRestore) { - SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); - EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); - SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT); - unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset(isELFv2ABI); - SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset); - SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff); - Chain = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain, AddTOC, InFlag); - InFlag = Chain.getValue(1); - } - Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), DAG.getIntPtrConstant(BytesCalleePops, true), InFlag, dl); diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 2564d3d9bf3..089cfe73c47 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -117,6 +117,10 @@ namespace llvm { /// BCTRL instruction. BCTRL, + /// CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl + /// instruction and the TOC reload required on SVR4 PPC64. + BCTRL_LOAD_TOC, + /// Return with a flag operand, matched by 'blr' RET_FLAG, diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 460e49e5724..bea27a34926 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -167,6 +167,17 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in { } } } + +let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1, + Defs = [LR8, X2], Uses = [CTR8, RM], RST = 2 in { + def BCTRL8_LDinto_toc : + XLForm_2_ext_and_DSForm_1<19, 528, 20, 0, 1, 58, 0, (outs), + (ins memrix:$src), + "bctrl\n\tld 2, $src", IIC_BrB, + [(PPCbctrl_load_toc ixaddr:$src)]>, + Requires<[In64BitMode]>; +} + } // Interpretation64Bit // FIXME: Duplicating this for the asm parser should be unnecessary, but the diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td index 99b266d3365..186d5dce570 100644 --- a/lib/Target/PowerPC/PPCInstrFormats.td +++ b/lib/Target/PowerPC/PPCInstrFormats.td @@ -945,6 +945,45 @@ class XLForm_3 opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, let Inst{31} = 0; } +class XLForm_2_and_DSForm_1 opcode1, bits<10> xo1, bit lk, + bits<6> opcode2, bits<2> xo2, + dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I2 { + bits<5> BO; + bits<5> BI; + bits<2> BH; + + bits<5> RST; + bits<19> DS_RA; + + let Pattern = pattern; + + let Inst{6-10} = BO; + let Inst{11-15} = BI; + let Inst{16-18} = 0; + let Inst{19-20} = BH; + let Inst{21-30} = xo1; + let Inst{31} = lk; + + let Inst{38-42} = RST; + let Inst{43-47} = DS_RA{18-14}; // Register # + let Inst{48-61} = DS_RA{13-0}; // Displacement. + let Inst{62-63} = xo2; +} + +class XLForm_2_ext_and_DSForm_1 opcode1, bits<10> xo1, + bits<5> bo, bits<5> bi, bit lk, + bits<6> opcode2, bits<2> xo2, + dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : XLForm_2_and_DSForm_1 { + let BO = bo; + let BI = bi; + let BH = 0; +} + // 1.7.8 XFX-Form class XFXForm_1 opcode, bits<10> xo, dag OOL, dag IOL, string asmstr, InstrItinClass itin> diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index ff8592f3778..4c01b2dee0c 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -153,6 +153,10 @@ def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall, def PPCbctrl : SDNode<"PPCISD::BCTRL", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +def PPCbctrl_load_toc : SDNode<"PPCISD::BCTRL_LOAD_TOC", + SDTypeProfile<0, 1, []>, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; def retflag : SDNode<"PPCISD::RET_FLAG", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; diff --git a/test/CodeGen/PowerPC/ppc64-calls.ll b/test/CodeGen/PowerPC/ppc64-calls.ll index 31794be25be..707ba95235f 100644 --- a/test/CodeGen/PowerPC/ppc64-calls.ll +++ b/test/CodeGen/PowerPC/ppc64-calls.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=ppc64 | FileCheck %s +; RUN: llc < %s -march=ppc64 -mcpu=pwr7 | FileCheck %s target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64" target triple = "powerpc64-unknown-linux-gnu" @@ -67,3 +67,20 @@ define double @test_external(double %x) nounwind { ; CHECK-NEXT: nop ret double %call } + +; The 'ld 2, 40(1)' really must always come directly after the bctrl to make +; the unwinding code in libgcc happy. +@g = external global void ()* +declare void @h(i64) +define void @test_indir_toc_reload(i64 %x) { + %1 = load void ()** @g + call void %1() + call void @h(i64 %x) + ret void + +; CHECK-LABEL: @test_indir_toc_reload +; CHECK: bctrl +; CHECK-NEXT: ld 2, 40(1) +; CHECK: blr +} +