From 03f517b7993e57a55c3a07f9ec54d13e564d22ef Mon Sep 17 00:00:00 2001 From: Tilmann Scheller Date: Sat, 15 Aug 2009 11:54:46 +0000 Subject: [PATCH] Add support for the PowerPC 64-bit SVR4 ABI. The Link Register is volatile when using the 32-bit SVR4 ABI. Make it possible to use the 64-bit SVR4 ABI. Add non-volatile registers for the 64-bit SVR4 ABI. Make sure r2 is a reserved register when using the 64-bit SVR4 ABI. Update PPCFrameInfo for the 64-bit SVR4 ABI. Add FIXME for 64-bit Darwin PPC. Insert NOP instruction after direct function calls. Emit official procedure descriptors. Create TOC entries for GlobalAddress references. Spill 64-bit non-volatile registers to the correct slots. Only custom lower VAARG when using the 32-bit SVR4 ABI. Use simple VASTART lowering for the 64-bit SVR4 ABI. llvm-svn: 79091 --- .../PowerPC/AsmPrinter/PPCAsmPrinter.cpp | 60 +++++++- lib/Target/PowerPC/PPCFrameInfo.h | 143 +++++++++++++++--- lib/Target/PowerPC/PPCISelLowering.cpp | 75 +++++---- lib/Target/PowerPC/PPCISelLowering.h | 5 + lib/Target/PowerPC/PPCInstr64Bit.td | 6 + lib/Target/PowerPC/PPCInstrInfo.td | 8 + lib/Target/PowerPC/PPCRegisterInfo.cpp | 121 +++++++++++++-- lib/Target/PowerPC/PPCRegisterInfo.td | 10 +- lib/Target/PowerPC/PPCSubtarget.h | 5 +- 9 files changed, 361 insertions(+), 72 deletions(-) diff --git a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp index efbf7492480..efb9e5c62cd 100644 --- a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp +++ b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp @@ -81,13 +81,14 @@ namespace { }; StringMap FnStubs; - StringMap GVStubs, HiddenGVStubs; + StringMap GVStubs, HiddenGVStubs, TOC; const PPCSubtarget &Subtarget; + uint64_t LabelID; public: explicit PPCAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM, const TargetAsmInfo *T, bool V) : AsmPrinter(O, TM, T, V), - Subtarget(TM.getSubtarget()) {} + Subtarget(TM.getSubtarget()), LabelID(0) {} virtual const char *getPassName() const { return "PowerPC Assembly Printer"; @@ -310,6 +311,28 @@ namespace { printOperand(MI, OpNo+1); } + void printTOCEntryLabel(const MachineInstr *MI, unsigned OpNo) { + const MachineOperand &MO = MI->getOperand(OpNo); + + assert(MO.getType() == MachineOperand::MO_GlobalAddress); + + GlobalValue *GV = MO.getGlobal(); + + std::string Name = Mang->getMangledName(GV); + + // Map symbol -> label of TOC entry. + if (TOC.count(Name) == 0) { + std::string Label; + Label += TAI->getPrivateGlobalPrefix(); + Label += "C"; + Label += utostr(LabelID++); + + TOC[Name] = Label; + } + + O << TOC[Name] << "@toc"; + } + void printPredicateOperand(const MachineInstr *MI, unsigned OpNo, const char *Modifier); @@ -330,6 +353,7 @@ namespace { } bool runOnMachineFunction(MachineFunction &F); + bool doFinalization(Module &M); void getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); @@ -612,7 +636,19 @@ bool PPCLinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF) { printVisibility(CurrentFnName, F->getVisibility()); EmitAlignment(MF.getAlignment(), F); - O << CurrentFnName << ":\n"; + + if (Subtarget.isPPC64()) { + // Emit an official procedure descriptor. + // FIXME 64-bit SVR4: Use MCSection here? + O << "\t.section\t\".opd\",\"aw\"\n"; + O << "\t.align 3\n"; + O << CurrentFnName << ":\n"; + O << "\t.quad .L." << CurrentFnName << ",.TOC.@tocbase\n"; + O << "\t.previous\n"; + O << ".L." << CurrentFnName << ":\n"; + } else { + O << CurrentFnName << ":\n"; + } // Emit pre-function debug information. DW->BeginFunction(&MF); @@ -731,6 +767,24 @@ void PPCLinuxAsmPrinter::PrintGlobalVariable(const GlobalVariable *GVar) { O << '\n'; } +bool PPCLinuxAsmPrinter::doFinalization(Module &M) { + const TargetData *TD = TM.getTargetData(); + + bool isPPC64 = TD->getPointerSizeInBits() == 64; + + if (isPPC64 && !TOC.empty()) { + // FIXME 64-bit SVR4: Use MCSection here? + O << "\t.section\t\".toc\",\"aw\"\n"; + + for (StringMap::iterator I = TOC.begin(), E = TOC.end(); + I != E; ++I) { + O << I->second << ":\n"; + O << "\t.tc " << I->getKeyData() << "[TC]," << I->getKeyData() << '\n'; + } + } + + return AsmPrinter::doFinalization(M); +} /// runOnMachineFunction - This uses the printMachineInstruction() /// method to print assembly for each instruction. diff --git a/lib/Target/PowerPC/PPCFrameInfo.h b/lib/Target/PowerPC/PPCFrameInfo.h index 770a560ccf4..c5636375bf0 100644 --- a/lib/Target/PowerPC/PPCFrameInfo.h +++ b/lib/Target/PowerPC/PPCFrameInfo.h @@ -31,33 +31,32 @@ public: /// getReturnSaveOffset - Return the previous frame offset to save the /// return address. - static unsigned getReturnSaveOffset(bool LP64, bool isDarwinABI) { + static unsigned getReturnSaveOffset(bool isPPC64, bool isDarwinABI) { if (isDarwinABI) - return LP64 ? 16 : 8; + return isPPC64 ? 16 : 8; // SVR4 ABI: - return 4; + return isPPC64 ? 16 : 4; } /// getFramePointerSaveOffset - Return the previous frame offset to save the /// frame pointer. - static unsigned getFramePointerSaveOffset(bool LP64, bool isDarwinABI) { + static unsigned getFramePointerSaveOffset(bool isPPC64, bool isDarwinABI) { // For the Darwin ABI: // Use the TOC save slot in the PowerPC linkage area for saving the frame // pointer (if needed.) LLVM does not generate code that uses the TOC (R2 // is treated as a caller saved register.) if (isDarwinABI) - return LP64 ? 40 : 20; + return isPPC64 ? 40 : 20; - // SVR4 ABI: - // Save it right before the link register + // SVR4 ABI: First slot in the general register save area. return -4U; } /// getLinkageSize - Return the size of the PowerPC ABI linkage area. /// - static unsigned getLinkageSize(bool LP64, bool isDarwinABI) { - if (isDarwinABI) - return 6 * (LP64 ? 8 : 4); + static unsigned getLinkageSize(bool isPPC64, bool isDarwinABI) { + if (isDarwinABI || isPPC64) + return 6 * (isPPC64 ? 8 : 4); // SVR4 ABI: return 8; @@ -65,27 +64,27 @@ public: /// getMinCallArgumentsSize - Return the size of the minium PowerPC ABI /// argument area. - static unsigned getMinCallArgumentsSize(bool LP64, bool isDarwinABI) { - // For the Darwin ABI: + static unsigned getMinCallArgumentsSize(bool isPPC64, bool isDarwinABI) { + // For the Darwin ABI / 64-bit SVR4 ABI: // The prolog code of the callee may store up to 8 GPR argument registers to // the stack, allowing va_start to index over them in memory if its varargs. // Because we cannot tell if this is needed on the caller side, we have to // conservatively assume that it is needed. As such, make sure we have at // least enough stack space for the caller to store the 8 GPRs. - if (isDarwinABI) - return 8 * (LP64 ? 8 : 4); + if (isDarwinABI || isPPC64) + return 8 * (isPPC64 ? 8 : 4); - // SVR4 ABI: + // 32-bit SVR4 ABI: // There is no default stack allocated for the 8 first GPR arguments. return 0; } /// getMinCallFrameSize - Return the minimum size a call frame can be using /// the PowerPC ABI. - static unsigned getMinCallFrameSize(bool LP64, bool isDarwinABI) { + static unsigned getMinCallFrameSize(bool isPPC64, bool isDarwinABI) { // The call frame needs to be at least big enough for linkage and 8 args. - return getLinkageSize(LP64, isDarwinABI) + - getMinCallArgumentsSize(LP64, isDarwinABI); + return getLinkageSize(isPPC64, isDarwinABI) + + getMinCallArgumentsSize(isPPC64, isDarwinABI); } // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack. @@ -174,9 +173,113 @@ public: std::pair(PPC::V20, -192) }; - NumEntries = array_lengthof(Offsets); + static const std::pair Offsets64[] = { + // Floating-point register save area offsets. + std::pair(PPC::F31, -8), + std::pair(PPC::F30, -16), + std::pair(PPC::F29, -24), + std::pair(PPC::F28, -32), + std::pair(PPC::F27, -40), + std::pair(PPC::F26, -48), + std::pair(PPC::F25, -56), + std::pair(PPC::F24, -64), + std::pair(PPC::F23, -72), + std::pair(PPC::F22, -80), + std::pair(PPC::F21, -88), + std::pair(PPC::F20, -96), + std::pair(PPC::F19, -104), + std::pair(PPC::F18, -112), + std::pair(PPC::F17, -120), + std::pair(PPC::F16, -128), + std::pair(PPC::F15, -136), + std::pair(PPC::F14, -144), + + // General register save area offsets. + // FIXME 64-bit SVR4: Are 32-bit registers actually allocated in 64-bit + // mode? + std::pair(PPC::R31, -4), + std::pair(PPC::R30, -12), + std::pair(PPC::R29, -20), + std::pair(PPC::R28, -28), + std::pair(PPC::R27, -36), + std::pair(PPC::R26, -44), + std::pair(PPC::R25, -52), + std::pair(PPC::R24, -60), + std::pair(PPC::R23, -68), + std::pair(PPC::R22, -76), + std::pair(PPC::R21, -84), + std::pair(PPC::R20, -92), + std::pair(PPC::R19, -100), + std::pair(PPC::R18, -108), + std::pair(PPC::R17, -116), + std::pair(PPC::R16, -124), + std::pair(PPC::R15, -132), + std::pair(PPC::R14, -140), + + std::pair(PPC::X31, -8), + std::pair(PPC::X30, -16), + std::pair(PPC::X29, -24), + std::pair(PPC::X28, -32), + std::pair(PPC::X27, -40), + std::pair(PPC::X26, -48), + std::pair(PPC::X25, -56), + std::pair(PPC::X24, -64), + std::pair(PPC::X23, -72), + std::pair(PPC::X22, -80), + std::pair(PPC::X21, -88), + std::pair(PPC::X20, -96), + std::pair(PPC::X19, -104), + std::pair(PPC::X18, -112), + std::pair(PPC::X17, -120), + std::pair(PPC::X16, -128), + std::pair(PPC::X15, -136), + std::pair(PPC::X14, -144), + + // CR save area offset. + // FIXME SVR4: Disable CR save area for now. +// std::pair(PPC::CR2, -4), +// std::pair(PPC::CR3, -4), +// std::pair(PPC::CR4, -4), +// std::pair(PPC::CR2LT, -4), +// std::pair(PPC::CR2GT, -4), +// std::pair(PPC::CR2EQ, -4), +// std::pair(PPC::CR2UN, -4), +// std::pair(PPC::CR3LT, -4), +// std::pair(PPC::CR3GT, -4), +// std::pair(PPC::CR3EQ, -4), +// std::pair(PPC::CR3UN, -4), +// std::pair(PPC::CR4LT, -4), +// std::pair(PPC::CR4GT, -4), +// std::pair(PPC::CR4EQ, -4), +// std::pair(PPC::CR4UN, -4), + + // VRSAVE save area offset. + std::pair(PPC::VRSAVE, -4), + + // Vector register save area + std::pair(PPC::V31, -16), + std::pair(PPC::V30, -32), + std::pair(PPC::V29, -48), + std::pair(PPC::V28, -64), + std::pair(PPC::V27, -80), + std::pair(PPC::V26, -96), + std::pair(PPC::V25, -112), + std::pair(PPC::V24, -128), + std::pair(PPC::V23, -144), + std::pair(PPC::V22, -160), + std::pair(PPC::V21, -176), + std::pair(PPC::V20, -192) + }; - return Offsets; + if (TM.getSubtarget().isPPC64()) { + NumEntries = array_lengthof(Offsets64); + + return Offsets64; + } else { + NumEntries = array_lengthof(Offsets); + + return Offsets; + } } }; diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index 7ad81f8936b..b77a35f6294 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -212,8 +212,9 @@ PPCTargetLowering::PPCTargetLowering(PPCTargetMachine &TM) // VASTART needs to be custom lowered to use the VarArgsFrameIndex setOperationAction(ISD::VASTART , MVT::Other, Custom); - // VAARG is custom lowered with the SVR4 ABI - if (TM.getSubtarget().isSVR4ABI()) + // VAARG is custom lowered with the 32-bit SVR4 ABI. + if ( TM.getSubtarget().isSVR4ABI() + && !TM.getSubtarget().isPPC64()) setOperationAction(ISD::VAARG, MVT::Other, Custom); else setOperationAction(ISD::VAARG, MVT::Other, Expand); @@ -419,6 +420,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::VPERM: return "PPCISD::VPERM"; case PPCISD::Hi: return "PPCISD::Hi"; case PPCISD::Lo: return "PPCISD::Lo"; + case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY"; case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC"; case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg"; case PPCISD::SRL: return "PPCISD::SRL"; @@ -428,6 +430,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::STD_32: return "PPCISD::STD_32"; case PPCISD::CALL_SVR4: return "PPCISD::CALL_SVR4"; case PPCISD::CALL_Darwin: return "PPCISD::CALL_Darwin"; + case PPCISD::NOP: return "PPCISD::NOP"; case PPCISD::MTCTR: return "PPCISD::MTCTR"; case PPCISD::BCTRL_Darwin: return "PPCISD::BCTRL_Darwin"; case PPCISD::BCTRL_SVR4: return "PPCISD::BCTRL_SVR4"; @@ -1176,6 +1179,13 @@ SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op, const TargetMachine &TM = DAG.getTarget(); + // 64-bit SVR4 ABI code is always position-independent. + // The actual address of the GlobalValue is stored in the TOC. + if (PPCSubTarget.isSVR4ABI() && PPCSubTarget.isPPC64()) { + return DAG.getNode(PPCISD::TOC_ENTRY, dl, MVT::i64, GA, + DAG.getRegister(PPC::X2, MVT::i64)); + } + SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, GA, Zero); SDValue Lo = DAG.getNode(PPCISD::Lo, dl, PtrVT, GA, Zero); @@ -1308,7 +1318,7 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, const PPCSubtarget &Subtarget) { DebugLoc dl = Op.getDebugLoc(); - if (Subtarget.isDarwinABI()) { + if (Subtarget.isDarwinABI() || Subtarget.isPPC64()) { // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); @@ -1317,7 +1327,7 @@ SDValue PPCTargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG, return DAG.getStore(Op.getOperand(0), dl, FR, Op.getOperand(1), SV, 0); } - // For the SVR4 ABI we follow the layout of the va_list struct. + // For the 32-bit SVR4 ABI we follow the layout of the va_list struct. // We suppose the given va_list is already allocated. // // typedef struct { @@ -1450,21 +1460,13 @@ static bool CC_PPC_SVR4_Custom_AlignFPArgRegs(unsigned &ValNo, EVT &ValVT, } /// GetFPR - Get the set of FP registers that should be allocated for arguments, -/// depending on which subtarget is selected. -static const unsigned *GetFPR(const PPCSubtarget &Subtarget) { - if (Subtarget.isDarwinABI()) { - static const unsigned FPR[] = { - PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, - PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13 - }; - return FPR; - } - - +/// on Darwin. +static const unsigned *GetFPR() { static const unsigned FPR[] = { PPC::F1, PPC::F2, PPC::F3, PPC::F4, PPC::F5, PPC::F6, PPC::F7, - PPC::F8 + PPC::F8, PPC::F9, PPC::F10, PPC::F11, PPC::F12, PPC::F13 }; + return FPR; } @@ -1487,7 +1489,7 @@ PPCTargetLowering::LowerFormalArguments(SDValue Chain, &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) { - if (PPCSubTarget.isSVR4ABI()) { + if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) { return LowerFormalArguments_SVR4(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals); } else { @@ -1505,7 +1507,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4( DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) { - // SVR4 ABI Stack Frame Layout: + // 32-bit SVR4 ABI Stack Frame Layout: // +-----------------------------------+ // +--> | Back chain | // | +-----------------------------------+ @@ -1687,8 +1689,8 @@ PPCTargetLowering::LowerFormalArguments_SVR4( FIN = DAG.getNode(ISD::ADD, dl, PtrOff.getValueType(), FIN, PtrOff); } - // FIXME SVR4: We only need to save FP argument registers if CR bit 6 is - // set. + // FIXME 32-bit SVR4: We only need to save FP argument registers if CR bit 6 + // is set. // The double arguments are stored to the VarArgsFrameIndex // on the stack. @@ -1731,7 +1733,6 @@ PPCTargetLowering::LowerFormalArguments_Darwin( &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) { - // TODO: add description of PPC stack frame format, or at least some docs. // MachineFunction &MF = DAG.getMachineFunction(); @@ -1756,7 +1757,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; - static const unsigned *FPR = GetFPR(PPCSubTarget); + static const unsigned *FPR = GetFPR(); static const unsigned VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, @@ -1986,7 +1987,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin( GPR_idx++; } ArgOffset += 16; - GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); + GPR_idx = std::min(GPR_idx+4, Num_GPR_Regs); // FIXME correct for ppc64? } ++VR_idx; } else { @@ -2262,8 +2263,8 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG, Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx, PseudoSourceValue::getFixedStack(NewRetAddr), 0); - // When using the SVR4 ABI there is no need to move the FP stack slot - // as the FP is never overwritten. + // When using the 32/64-bit SVR4 ABI there is no need to move the FP stack + // slot as the FP is never overwritten. if (isDarwinABI) { int NewFPLoc = SPDiff + PPCFrameInfo::getFramePointerSaveOffset(isPPC64, isDarwinABI); @@ -2311,8 +2312,8 @@ SDValue PPCTargetLowering::EmitTailCallLoadFPAndRetAddr(SelectionDAG & DAG, LROpOut = DAG.getLoad(VT, dl, Chain, LROpOut, NULL, 0); Chain = SDValue(LROpOut.getNode(), 1); - // When using the SVR4 ABI there is no need to load the FP stack slot - // as the FP is never overwritten. + // When using the 32/64-bit SVR4 ABI there is no need to load the FP stack + // slot as the FP is never overwritten. if (isDarwinABI) { FPOpOut = getFramePointerFrameIndex(DAG); FPOpOut = DAG.getLoad(VT, dl, Chain, FPOpOut, NULL, 0); @@ -2487,7 +2488,6 @@ PPCTargetLowering::FinishCall(unsigned CallConv, DebugLoc dl, bool isTailCall, int SPDiff, unsigned NumBytes, const SmallVectorImpl &Ins, SmallVectorImpl &InVals) { - std::vector NodeTys; SmallVector Ops; unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff, @@ -2529,6 +2529,19 @@ PPCTargetLowering::FinishCall(unsigned CallConv, DebugLoc dl, bool isTailCall, Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); InFlag = Chain.getValue(1); + // Add a NOP immediately after the branch instruction when using the 64-bit + // SVR4 ABI. At link time, if caller and callee are in a different module and + // thus have a different TOC, the call will be replaced with a call to a stub + // function which saves the current TOC, loads the TOC of the callee and + // branches to the callee. The NOP will be replaced with a load instruction + // which restores the TOC of the caller from the TOC save slot of the current + // stack frame. If caller and callee belong to the same module (and have the + // same TOC), the NOP will remain unchanged. + if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) { + // Insert NOP. + InFlag = DAG.getNode(PPCISD::NOP, dl, MVT::Flag, InFlag); + } + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), DAG.getIntPtrConstant(BytesCalleePops, true), InFlag); @@ -2547,7 +2560,7 @@ PPCTargetLowering::LowerCall(SDValue Chain, SDValue Callee, const SmallVectorImpl &Ins, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) { - if (PPCSubTarget.isSVR4ABI()) { + if (PPCSubTarget.isSVR4ABI() && !PPCSubTarget.isPPC64()) { return LowerCall_SVR4(Chain, Callee, CallConv, isVarArg, isTailCall, Outs, Ins, dl, DAG, InVals); @@ -2567,7 +2580,7 @@ PPCTargetLowering::LowerCall_SVR4(SDValue Chain, SDValue Callee, DebugLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) { // See PPCTargetLowering::LowerFormalArguments_SVR4() for a description - // of the SVR4 ABI stack frame layout. + // of the 32-bit SVR4 ABI stack frame layout. assert((!isTailCall || (CallConv == CallingConv::Fast && PerformTailCallOpt)) && @@ -2846,7 +2859,7 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee, PPC::X3, PPC::X4, PPC::X5, PPC::X6, PPC::X7, PPC::X8, PPC::X9, PPC::X10, }; - static const unsigned *FPR = GetFPR(PPCSubTarget); + static const unsigned *FPR = GetFPR(); static const unsigned VR[] = { PPC::V2, PPC::V3, PPC::V4, PPC::V5, PPC::V6, PPC::V7, PPC::V8, diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 7830e0f08d8..19fef4da0b4 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -60,6 +60,8 @@ namespace llvm { /// though these are usually folded into other nodes. Hi, Lo, + TOC_ENTRY, + /// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX) /// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to /// compute an allocation on the stack. @@ -84,6 +86,9 @@ namespace llvm { /// CALL - A direct function call. CALL_Darwin, CALL_SVR4, + /// NOP - Special NOP which follows 64-bit SVR4 calls. + NOP, + /// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a /// MTCTR instruction. MTCTR, diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 3823e537f11..85524cef01c 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -123,6 +123,8 @@ def : Pat<(PPCcall_SVR4 (i64 tglobaladdr:$dst)), (BL8_ELF tglobaladdr:$dst)>; def : Pat<(PPCcall_SVR4 (i64 texternalsym:$dst)), (BL8_ELF texternalsym:$dst)>; +def : Pat<(PPCnop), + (NOP)>; // Atomic operations let usesCustomDAGSchedInserter = 1 in { @@ -543,6 +545,10 @@ let canFoldAsLoad = 1, PPC970_Unit = 2 in { def LD : DSForm_1<58, 0, (outs G8RC:$rD), (ins memrix:$src), "ld $rD, $src", LdStLD, [(set G8RC:$rD, (load ixaddr:$src))]>, isPPC64; +def LDtoc: DSForm_1<58, 0, (outs G8RC:$rD), (ins tocentry:$disp, G8RC:$reg), + "ld $rD, $disp($reg)", LdStLD, + [(set G8RC:$rD, + (PPCtoc_entry tglobaladdr:$disp, G8RC:$reg))]>, isPPC64; def LDX : XForm_1<31, 21, (outs G8RC:$rD), (ins memrr:$src), "ldx $rD, $src", LdStLD, [(set G8RC:$rD, (load xaddr:$src))]>, isPPC64; diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 759cdf0a486..3c32c4abfc6 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -53,6 +53,8 @@ def SDT_PPCTC_ret : SDTypeProfile<0, 2, [ SDTCisPtrTy<0>, SDTCisVT<1, i32> ]>; +def SDT_PPCnop : SDTypeProfile<0, 0, []>; + //===----------------------------------------------------------------------===// // PowerPC specific DAG Nodes. // @@ -85,6 +87,7 @@ def PPCfsel : SDNode<"PPCISD::FSEL", def PPChi : SDNode<"PPCISD::Hi", SDTIntBinOp, []>; def PPClo : SDNode<"PPCISD::Lo", SDTIntBinOp, []>; +def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp, [SDNPMayLoad]>; def PPCvmaddfp : SDNode<"PPCISD::VMADDFP", SDTFPTernaryOp, []>; def PPCvnmsubfp : SDNode<"PPCISD::VNMSUBFP", SDTFPTernaryOp, []>; @@ -111,6 +114,7 @@ def PPCcall_Darwin : SDNode<"PPCISD::CALL_Darwin", SDT_PPCCall, [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; def PPCcall_SVR4 : SDNode<"PPCISD::CALL_SVR4", SDT_PPCCall, [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; +def PPCnop : SDNode<"PPCISD::NOP", SDT_PPCnop, [SDNPInFlag, SDNPOutFlag]>; def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall, [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>; def PPCbctrl_Darwin : SDNode<"PPCISD::BCTRL_Darwin", SDTNone, @@ -306,6 +310,10 @@ def memrix : Operand { // memri where the imm is shifted 2 bits. let PrintMethod = "printMemRegImmShifted"; let MIOperandInfo = (ops i32imm:$imm, ptr_rc:$reg); } +def tocentry : Operand { + let PrintMethod = "printTOCEntryLabel"; + let MIOperandInfo = (ops i32imm:$imm); +} // PowerPC Predicate operand. 20 = (0<<5)|20 = always, CR0 is a dummy reg // that doesn't matter. diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp index b124e607355..2eb2abc0474 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.cpp +++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp @@ -174,7 +174,8 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { PPC::LR, 0 }; - + + // 32-bit SVR4 calling convention. static const unsigned SVR4_CalleeSavedRegs[] = { PPC::R14, PPC::R15, PPC::R16, PPC::R17, PPC::R18, PPC::R19, @@ -200,7 +201,7 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN, PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN, - PPC::LR, 0 + 0 }; // 64-bit Darwin calling convention. static const unsigned Darwin64_CalleeSavedRegs[] = { @@ -227,12 +228,41 @@ PPCRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { PPC::LR8, 0 }; + + // 64-bit SVR4 calling convention. + static const unsigned SVR4_64_CalleeSavedRegs[] = { + PPC::X14, PPC::X15, + PPC::X16, PPC::X17, PPC::X18, PPC::X19, + PPC::X20, PPC::X21, PPC::X22, PPC::X23, + PPC::X24, PPC::X25, PPC::X26, PPC::X27, + PPC::X28, PPC::X29, PPC::X30, PPC::X31, + + PPC::F14, PPC::F15, PPC::F16, PPC::F17, + PPC::F18, PPC::F19, PPC::F20, PPC::F21, + PPC::F22, PPC::F23, PPC::F24, PPC::F25, + PPC::F26, PPC::F27, PPC::F28, PPC::F29, + PPC::F30, PPC::F31, + + PPC::CR2, PPC::CR3, PPC::CR4, + + PPC::VRSAVE, + + PPC::V20, PPC::V21, PPC::V22, PPC::V23, + PPC::V24, PPC::V25, PPC::V26, PPC::V27, + PPC::V28, PPC::V29, PPC::V30, PPC::V31, + + PPC::CR2LT, PPC::CR2GT, PPC::CR2EQ, PPC::CR2UN, + PPC::CR3LT, PPC::CR3GT, PPC::CR3EQ, PPC::CR3UN, + PPC::CR4LT, PPC::CR4GT, PPC::CR4EQ, PPC::CR4UN, + + 0 + }; if (Subtarget.isDarwinABI()) return Subtarget.isPPC64() ? Darwin64_CalleeSavedRegs : Darwin32_CalleeSavedRegs; - - return SVR4_CalleeSavedRegs; + + return Subtarget.isPPC64() ? SVR4_64_CalleeSavedRegs : SVR4_CalleeSavedRegs; } const TargetRegisterClass* const* @@ -267,6 +297,7 @@ PPCRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { &PPC::GPRCRegClass, 0 }; + // 32-bit SVR4 calling convention. static const TargetRegisterClass * const SVR4_CalleeSavedRegClasses[] = { &PPC::GPRCRegClass,&PPC::GPRCRegClass, &PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass,&PPC::GPRCRegClass, @@ -295,7 +326,7 @@ PPCRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, &PPC::CRBITRCRegClass, - &PPC::GPRCRegClass, 0 + 0 }; // 64-bit Darwin calling convention. @@ -327,12 +358,45 @@ PPCRegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const { &PPC::G8RCRegClass, 0 }; + + // 64-bit SVR4 calling convention. + static const TargetRegisterClass * const SVR4_64_CalleeSavedRegClasses[] = { + &PPC::G8RCRegClass,&PPC::G8RCRegClass, + &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass, + &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass, + &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass, + &PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass,&PPC::G8RCRegClass, + + &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, + &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, + &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, + &PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass,&PPC::F8RCRegClass, + &PPC::F8RCRegClass,&PPC::F8RCRegClass, + + &PPC::CRRCRegClass,&PPC::CRRCRegClass,&PPC::CRRCRegClass, + + &PPC::VRSAVERCRegClass, + + &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, + &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, + &PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass,&PPC::VRRCRegClass, + + &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, + &PPC::CRBITRCRegClass, + &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, + &PPC::CRBITRCRegClass, + &PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass,&PPC::CRBITRCRegClass, + &PPC::CRBITRCRegClass, + + 0 + }; if (Subtarget.isDarwinABI()) return Subtarget.isPPC64() ? Darwin64_CalleeSavedRegClasses : Darwin32_CalleeSavedRegClasses; - return SVR4_CalleeSavedRegClasses; + return Subtarget.isPPC64() ? SVR4_64_CalleeSavedRegClasses + : SVR4_CalleeSavedRegClasses; } // needsFP - Return true if the specified function should have a dedicated frame @@ -364,9 +428,9 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(PPC::R13); // Small Data Area pointer register } - // On PPC64, r13 is the thread pointer. Never allocate this register. Note - // that this is over conservative, as it also prevents allocation of R31 when - // the FP is not needed. + // On PPC64, r13 is the thread pointer. Never allocate this register. + // Note that this is over conservative, as it also prevents allocation of R31 + // when the FP is not needed. if (Subtarget.isPPC64()) { Reserved.set(PPC::R13); Reserved.set(PPC::R31); @@ -378,6 +442,11 @@ BitVector PPCRegisterInfo::getReservedRegs(const MachineFunction &MF) const { Reserved.set(PPC::X1); Reserved.set(PPC::X13); Reserved.set(PPC::X31); + + // The 64-bit SVR4 ABI reserves r2 for the TOC pointer. + if (Subtarget.isSVR4ABI()) { + Reserved.set(PPC::X2); + } } if (needsFP(MF)) @@ -911,7 +980,7 @@ void PPCRegisterInfo::determineFrameLayout(MachineFunction &MF) const { // don't have a frame pointer, calls, or dynamic alloca then we do not need // to adjust the stack pointer (we fit in the Red Zone). bool DisableRedZone = MF.getFunction()->hasFnAttr(Attribute::NoRedZone); - // FIXME SVR4 The SVR4 ABI has no red zone. + // FIXME SVR4 The 32-bit SVR4 ABI has no red zone. if (!DisableRedZone && FrameSize <= 224 && // Fits in red zone. !MFI->hasVarSizedObjects() && // No dynamic alloca. @@ -1006,7 +1075,7 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) if (!Subtarget.isSVR4ABI()) { return; } - + // Get callee saved register information. MachineFrameInfo *FFI = MF.getFrameInfo(); const std::vector &CSI = FFI->getCalleeSavedInfo(); @@ -1017,16 +1086,19 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) } unsigned MinGPR = PPC::R31; + unsigned MinG8R = PPC::X31; unsigned MinFPR = PPC::F31; unsigned MinVR = PPC::V31; bool HasGPSaveArea = false; + bool HasG8SaveArea = false; bool HasFPSaveArea = false; bool HasCRSaveArea = false; bool HasVRSAVESaveArea = false; bool HasVRSaveArea = false; SmallVector GPRegs; + SmallVector G8Regs; SmallVector FPRegs; SmallVector VRegs; @@ -1042,6 +1114,14 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) if (Reg < MinGPR) { MinGPR = Reg; } + } else if (RC == PPC::G8RCRegisterClass) { + HasG8SaveArea = true; + + G8Regs.push_back(CSI[i]); + + if (Reg < MinG8R) { + MinG8R = Reg; + } } else if (RC == PPC::F8RCRegisterClass) { HasFPSaveArea = true; @@ -1104,7 +1184,7 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) // General register save area starts right below the Floating-point // register save area. - if (HasGPSaveArea) { + if (HasGPSaveArea || HasG8SaveArea) { // Move general register save area spill slots down, taking into account // the size of the Floating-point register save area. for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) { @@ -1113,7 +1193,22 @@ PPCRegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF) FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); } - LowerBound -= (31 - getRegisterNumbering(MinGPR) + 1) * 4; + // Move general register save area spill slots down, taking into account + // the size of the Floating-point register save area. + for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) { + int FI = G8Regs[i].getFrameIdx(); + + FFI->setObjectOffset(FI, LowerBound + FFI->getObjectOffset(FI)); + } + + unsigned MinReg = std::min(getRegisterNumbering(MinGPR), + getRegisterNumbering(MinG8R)); + + if (Subtarget.isPPC64()) { + LowerBound -= (31 - MinReg + 1) * 8; + } else { + LowerBound -= (31 - MinReg + 1) * 4; + } } // The CR save area is below the general register save area. diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td index bac8e3aed8e..140f5df3e6a 100644 --- a/lib/Target/PowerPC/PPCRegisterInfo.td +++ b/lib/Target/PowerPC/PPCRegisterInfo.td @@ -280,7 +280,8 @@ def GPRC : RegisterClass<"PPC", [i32], 32, let MethodBodies = [{ GPRCClass::iterator GPRCClass::allocation_order_begin(const MachineFunction &MF) const { - // In Linux, r2 is reserved for the OS. + // 32-bit SVR4 ABI: r2 is reserved for the OS. + // 64-bit SVR4 ABI: r2 is reserved for the TOC pointer. if (!MF.getTarget().getSubtarget().isDarwin()) return begin()+1; @@ -291,7 +292,7 @@ def GPRC : RegisterClass<"PPC", [i32], 32, // On PPC64, r13 is the thread pointer. Never allocate this register. // Note that this is overconservative, as it also prevents allocation of // R31 when the FP is not needed. - // When using the SVR4 ABI, r13 is reserved for the Small Data Area + // When using the 32-bit SVR4 ABI, r13 is reserved for the Small Data Area // pointer. const PPCSubtarget &Subtarget = MF.getTarget().getSubtarget(); @@ -318,6 +319,10 @@ def G8RC : RegisterClass<"PPC", [i64], 64, let MethodBodies = [{ G8RCClass::iterator G8RCClass::allocation_order_begin(const MachineFunction &MF) const { + // 64-bit SVR4 ABI: r2 is reserved for the TOC pointer. + if (!MF.getTarget().getSubtarget().isDarwin()) + return begin()+1; + return begin(); } G8RCClass::iterator @@ -372,4 +377,3 @@ def CRRC : RegisterClass<"PPC", [i32], 32, [CR0, CR1, CR5, CR6, CR7, CR2, def CTRRC : RegisterClass<"PPC", [i32], 32, [CTR]>; def CTRRC8 : RegisterClass<"PPC", [i64], 64, [CTR8]>; def VRSAVERC : RegisterClass<"PPC", [i32], 32, [VRSAVE]>; - diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 6787390f92f..02c8ad79bd3 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -138,8 +138,9 @@ public: /// getDarwinVers - Return the darwin version number, 8 = tiger, 9 = leopard. unsigned getDarwinVers() const { return DarwinVers; } - bool isDarwinABI() const { return isDarwin() || IsPPC64; } - bool isSVR4ABI() const { return !isDarwin() && !IsPPC64; } + bool isDarwinABI() const { return isDarwin(); } + bool isSVR4ABI() const { return !isDarwin(); } + }; } // End llvm namespace