Implement call lowering without parameters on AIX

Summary:dd
This patch implements call lowering for calls without parameters
on AIX as initial support.

Reviewers: sfertile, hubert.reinterpretcast, aheejin, efriedma

Differential Revision: https://reviews.llvm.org/D61948

llvm-svn: 361669
This commit is contained in:
Jason Liu 2019-05-24 20:54:35 +00:00
parent 905dc0f2a9
commit e973163b36
10 changed files with 168 additions and 19 deletions

View File

@ -1288,7 +1288,7 @@ def : InstRW<[P9_BR_2C, DISP_1C, DISP_1C],
(instregex "BCCTR(L)?(8)?(n)?$"),
(instregex "BD(N)?Z(8|A|Am|Ap|m|p)?$"),
(instregex "BD(N)?ZL(A|Am|Ap|R|R8|RL|RLm|RLp|Rm|Rp|m|p)?$"),
(instregex "BL(_TLS)?$"),
(instregex "BL(_TLS|_NOP)?$"),
(instregex "BL8(_TLS|_NOP|_NOP_TLS|_TLS_)?$"),
(instregex "BLA(8|8_NOP)?$"),
(instregex "BLR(8|L)?$"),

View File

@ -306,6 +306,13 @@ def CSR_SVR432_Altivec : CalleeSavedRegs<(add CSR_SVR432, CSR_Altivec)>;
def CSR_SVR432_SPE : CalleeSavedRegs<(add CSR_SVR432_COMM, CSR_SPE)>;
def CSR_AIX32 : CalleeSavedRegs<(add R13, R14, R15, R16, R17, R18, R19, R20,
R21, R22, R23, R24, R25, R26, R27, R28,
R29, R30, R31, F14, F15, F16, F17, F18,
F19, F20, F21, F22, F23, F24, F25, F26,
F27, F28, F29, F30, F31, CR2, CR3, CR4
)>;
def CSR_Darwin64 : CalleeSavedRegs<(add X13, X14, X15, X16, X17, X18, X19, X20,
X21, X22, X23, X24, X25, X26, X27, X28,
X29, X30, X31, F14, F15, F16, F17, F18,
@ -322,6 +329,13 @@ def CSR_SVR464 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20,
F27, F28, F29, F30, F31, CR2, CR3, CR4
)>;
def CSR_AIX64 : CalleeSavedRegs<(add X14, X15, X16, X17, X18, X19, X20,
X21, X22, X23, X24, X25, X26, X27, X28,
X29, X30, X31, F14, F15, F16, F17, F18,
F19, F20, F21, F22, F23, F24, F25, F26,
F27, F28, F29, F30, F31, CR2, CR3, CR4
)>;
// CSRs that are handled by prologue, epilogue.
def CSR_SRV464_TLS_PE : CalleeSavedRegs<(add)>;

View File

@ -71,10 +71,10 @@ static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) {
}
static unsigned computeLinkageSize(const PPCSubtarget &STI) {
if (STI.isDarwinABI() || STI.isPPC64())
if ((STI.isDarwinABI() || STI.isAIXABI()) || STI.isPPC64())
return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4);
// SVR4 ABI:
// 32-bit SVR4 ABI:
return 8;
}

View File

@ -5160,18 +5160,23 @@ SDValue PPCTargetLowering::FinishCall(
}
// Add a NOP immediately after the branch instruction when using the 64-bit
// SVR4 ABI. At link time, if caller and callee are in a different module and
// SVR4 or the AIX ABI.
// At link time, if caller and callee are in a different module and
// thus have a different TOC, the call will be replaced with a call to a stub
// function which saves the current TOC, loads the TOC of the callee and
// branches to the callee. The NOP will be replaced with a load instruction
// which restores the TOC of the caller from the TOC save slot of the current
// stack frame. If caller and callee belong to the same module (and have the
// same TOC), the NOP will remain unchanged.
// same TOC), the NOP will remain unchanged, or become some other NOP.
MachineFunction &MF = DAG.getMachineFunction();
if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64() &&
!isPatchPoint) {
if (!isTailCall && !isPatchPoint &&
((Subtarget.isSVR4ABI() && Subtarget.isPPC64()) ||
Subtarget.isAIXABI())) {
if (CallOpc == PPCISD::BCTRL) {
if (Subtarget.isAIXABI())
report_fatal_error("Indirect call on AIX is not implemented.");
// This is a call through a function pointer.
// Restore the caller TOC from the save area into R2.
// See PrepareCall() for more information about calls through function
@ -5268,16 +5273,20 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
!isTailCall)
Callee = LowerGlobalAddress(Callee, DAG);
if (Subtarget.isSVR4ABI()) {
if (Subtarget.isPPC64())
return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
isTailCall, isPatchPoint, Outs, OutVals, Ins,
dl, DAG, InVals, CS);
else
return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
isTailCall, isPatchPoint, Outs, OutVals, Ins,
dl, DAG, InVals, CS);
}
if (Subtarget.isSVR4ABI() && Subtarget.isPPC64())
return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
isTailCall, isPatchPoint, Outs, OutVals, Ins,
dl, DAG, InVals, CS);
if (Subtarget.isSVR4ABI())
return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
isTailCall, isPatchPoint, Outs, OutVals, Ins,
dl, DAG, InVals, CS);
if (Subtarget.isAIXABI())
return LowerCall_AIX(Chain, Callee, CallConv, isVarArg,
isTailCall, isPatchPoint, Outs, OutVals, Ins,
dl, DAG, InVals, CS);
return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
isTailCall, isPatchPoint, Outs, OutVals, Ins,
@ -6567,6 +6576,67 @@ SDValue PPCTargetLowering::LowerCall_Darwin(
NumBytes, Ins, InVals, CS);
}
SDValue PPCTargetLowering::LowerCall_AIX(
SDValue Chain, SDValue Callee, CallingConv::ID CallConv, bool isVarArg,
bool isTailCall, bool isPatchPoint,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals,
ImmutableCallSite CS) const {
assert((CallConv == CallingConv::C || CallConv == CallingConv::Fast) &&
"Unimplemented calling convention!");
if (isVarArg || isPatchPoint)
report_fatal_error("This call type is unimplemented on AIX.");
EVT PtrVT = getPointerTy(DAG.getDataLayout());
bool isPPC64 = PtrVT == MVT::i64;
unsigned PtrByteSize = isPPC64 ? 8 : 4;
unsigned NumOps = Outs.size();
if (NumOps != 0)
report_fatal_error("Call lowering with parameters is not implemented "
"on AIX yet.");
// Count how many bytes are to be pushed on the stack, including the linkage
// area, parameter list area.
// On XCOFF, we start with 24/48, which is reserved space for
// [SP][CR][LR][2 x reserved][TOC].
unsigned LinkageSize = Subtarget.getFrameLowering()->getLinkageSize();
// The prolog code of the callee may store up to 8 GPR argument registers to
// the stack, allowing va_start to index over them in memory if the callee
// is variadic.
// Because we cannot tell if this is needed on the caller side, we have to
// conservatively assume that it is needed. As such, make sure we have at
// least enough stack space for the caller to store the 8 GPRs.
unsigned NumBytes = LinkageSize + 8 * PtrByteSize;
// Adjust the stack pointer for the new arguments...
// These operations are automatically eliminated by the prolog/epilog
// inserter pass.
Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, dl);
SDValue CallSeqStart = Chain;
if (!isFunctionGlobalAddress(Callee) &&
!isa<ExternalSymbolSDNode>(Callee))
report_fatal_error("Handling of indirect call is unimplemented!");
SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
SDValue InFlag;
if (isTailCall)
report_fatal_error("Handling of tail call is unimplemented!");
int SPDiff = 0;
return FinishCall(CallConv, dl, isTailCall, isVarArg, isPatchPoint,
/* unused except on PPC64 ELFv1 */ false, DAG,
RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
NumBytes, Ins, InVals, CS);
}
bool
PPCTargetLowering::CanLowerReturn(CallingConv::ID CallConv,
MachineFunction &MF, bool isVarArg,

View File

@ -160,7 +160,7 @@ namespace llvm {
/// CALL - A direct function call.
/// CALL_NOP is a call with the special NOP which follows 64-bit
/// SVR4 calls.
/// SVR4 calls and 32-bit/64-bit AIX calls.
CALL, CALL_NOP,
/// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
@ -1120,6 +1120,15 @@ namespace llvm {
const SDLoc &dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals,
ImmutableCallSite CS) const;
SDValue LowerCall_AIX(SDValue Chain, SDValue Callee,
CallingConv::ID CallConv, bool isVarArg,
bool isTailCall, bool isPatchPoint,
const SmallVectorImpl<ISD::OutputArg> &Outs,
const SmallVectorImpl<SDValue> &OutVals,
const SmallVectorImpl<ISD::InputArg> &Ins,
const SDLoc &dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals,
ImmutableCallSite CS) const;
SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;

View File

@ -1469,6 +1469,9 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR] in {
def BCLn : BForm_4<16, 4, 0, 1, (outs),
(ins crbitrc:$bi, condbrtarget:$dst),
"bcl 4, $bi, $dst">;
def BL_NOP : IForm_and_DForm_4_zero<18, 0, 1, 24,
(outs), (ins calltarget:$func),
"bl $func\n\tnop", IIC_BrB, []>;
}
}
let Uses = [CTR, RM] in {
@ -3029,6 +3032,9 @@ def : Pat<(and (rotl i32:$in, i32:$sh), maskimm32:$imm),
// Calls
def : Pat<(PPCcall (i32 tglobaladdr:$dst)),
(BL tglobaladdr:$dst)>;
def : Pat<(PPCcall_nop (i32 tglobaladdr:$dst)),
(BL_NOP tglobaladdr:$dst)>;
def : Pat<(PPCcall (i32 texternalsym:$dst)),
(BL texternalsym:$dst)>;

View File

@ -228,6 +228,10 @@ PPCRegisterInfo::getCallPreservedMask(const MachineFunction &MF,
: CSR_Darwin64_RegMask)
: (Subtarget.hasAltivec() ? CSR_Darwin32_Altivec_RegMask
: CSR_Darwin32_RegMask);
if (Subtarget.isAIXABI()) {
assert(!Subtarget.hasAltivec() && "Altivec is not implemented on AIX yet.");
return TM.isPPC64() ? CSR_AIX64_RegMask : CSR_AIX32_RegMask;
}
if (CC == CallingConv::Cold) {
return TM.isPPC64() ? (Subtarget.hasAltivec() ? CSR_SVR64_ColdCC_Altivec_RegMask

View File

@ -314,7 +314,8 @@ public:
bool isTargetLinux() const { return TargetTriple.isOSLinux(); }
bool isDarwinABI() const { return isTargetMachO() || isDarwin(); }
bool isSVR4ABI() const { return !isDarwinABI(); }
bool isAIXABI() const { return TargetTriple.isOSAIX(); }
bool isSVR4ABI() const { return !isDarwinABI() && !isAIXABI(); }
bool isELFv2ABI() const;
/// Originally, this function return hasISEL(). Now we always enable it,

View File

@ -173,6 +173,11 @@ bool TargetMachine::shouldAssumeDSOLocal(const Module &M,
return GV && GV->isStrongDefinitionForLinker();
}
// Due to the AIX linkage model, any global with default visibility is
// considered non-local.
if (TT.isOSBinFormatXCOFF())
return false;
assert(TT.isOSBinFormatELF() || TT.isOSBinFormatWasm());
assert(RM != Reloc::DynamicNoPIC);

View File

@ -0,0 +1,40 @@
; RUN: llc -mtriple powerpc-ibm-aix-xcoff -stop-after=machine-cp < %s | \
; RUN: FileCheck --check-prefix=32BIT %s
; RUN: llc -mtriple powerpc64-ibm-aix-xcoff -stop-after=machine-cp < %s | \
; RUN: FileCheck --check-prefix=64BIT %s
declare void @foo(...)
define void @test_call() {
entry:
; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
; 32BIT: BL_NOP @foo, csr_aix32, implicit-def dead $lr, implicit $rm, implicit-def $r1
; 32BIT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1
; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
; 64BIT: BL8_NOP @foo, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit-def $r1
; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
call void bitcast (void (...)* @foo to void ()*)()
ret void
}
define hidden void @foo_local() {
entry:
ret void
}
define void @test_local_call() {
entry:
; 32BIT: ADJCALLSTACKDOWN 56, 0, implicit-def dead $r1, implicit $r1
; 32BIT: BL @foo_local, csr_aix32, implicit-def dead $lr, implicit $rm, implicit-def $r1
; 32BIT: ADJCALLSTACKUP 56, 0, implicit-def dead $r1, implicit $r1
; 64BIT: ADJCALLSTACKDOWN 112, 0, implicit-def dead $r1, implicit $r1
; 64BIT: BL8 @foo_local, csr_aix64, implicit-def dead $lr8, implicit $rm, implicit-def $r1
; 64BIT: ADJCALLSTACKUP 112, 0, implicit-def dead $r1, implicit $r1
call void @foo_local()
ret void
}