Add the Erlang/HiPE calling convention, patch by Yiannis Tsiouris.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@168166 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Duncan Sands 2012-11-16 12:36:39 +00:00
parent c5519d3b26
commit dc7f174b5e
8 changed files with 254 additions and 13 deletions

View File

@ -1982,8 +1982,8 @@ Tail call optimization
Tail call optimization, callee reusing the stack of the caller, is currently
supported on x86/x86-64 and PowerPC. It is performed if:
* Caller and callee have the calling convention ``fastcc`` or ``cc 10`` (GHC
call convention).
* Caller and callee have the calling convention ``fastcc``, ``cc 10`` (GHC
calling convention) or ``cc 11`` (HiPE calling convention).
* The call is a tail call - in tail position (ret immediately follows call and
ret uses value of call or is void).

View File

@ -729,10 +729,10 @@ define i32 @main() { <i>; i32()* </i>&nbsp;
target to use whatever tricks it wants to produce fast code for the
target, without having to conform to an externally specified ABI
(Application Binary Interface).
<a href="CodeGenerator.html#tailcallopt">Tail calls can only be optimized
when this or the GHC convention is used.</a> This calling convention
does not support varargs and requires the prototype of all callees to
exactly match the prototype of the function definition.</dd>
<a href="CodeGenerator.html#id80">Tail calls can only be optimized
when this, the GHC or the HiPE convention is used.</a> This calling
convention does not support varargs and requires the prototype of all
callees to exactly match the prototype of the function definition.</dd>
<dt><b>"<tt>coldcc</tt>" - The cold calling convention</b>:</dt>
<dd>This calling convention attempts to make code in the caller as efficient
@ -749,7 +749,7 @@ define i32 @main() { <i>; i32()* </i>&nbsp;
disabling callee save registers. This calling convention should not be
used lightly but only for specific situations such as an alternative to
the <em>register pinning</em> performance technique often used when
implementing functional programming languages.At the moment only X86
implementing functional programming languages. At the moment only X86
supports this convention and it has the following limitations:
<ul>
<li>On <em>X86-32</em> only supports up to 4 bit type parameters. No
@ -758,10 +758,25 @@ define i32 @main() { <i>; i32()* </i>&nbsp;
6 floating point parameters.</li>
</ul>
This calling convention supports
<a href="CodeGenerator.html#tailcallopt">tail call optimization</a> but
<a href="CodeGenerator.html#id80">tail call optimization</a> but
requires both the caller and callee are using it.
</dd>
<dt><b>"<tt>cc <em>11</em></tt>" - The HiPE calling convention</b>:</dt>
<dd>This calling convention has been implemented specifically for use by the
<a href="http://www.it.uu.se/research/group/hipe/">High-Performance Erlang
(HiPE)</a> compiler, <em>the</em> native code compiler of the
<a href="http://www.erlang.org/download.shtml">Ericsson's Open Source
Erlang/OTP system</a>. It uses more registers for argument passing than
the ordinary C calling convention and defines no callee-saved registers.
The calling convention properly supports
<a href="CodeGenerator.html#id80">tail call optimization</a> but requires
that both the caller and the callee use it. It uses a <em>register
pinning</em> mechanism, similar to GHC's convention, for keeping
frequently accessed runtime components pinned to specific hardware
registers. At the moment only X86 supports this convention (both 32 and 64
bit).</dd>
<dt><b>"<tt>cc &lt;<em>n</em>&gt;</tt>" - Numbered convention</b>:</dt>
<dd>Any calling convention may be specified by number, allowing
target-specific calling conventions to be used. Target specific calling

View File

@ -47,6 +47,10 @@ namespace CallingConv {
// GHC - Calling convention used by the Glasgow Haskell Compiler (GHC).
GHC = 10,
// HiPE - Calling convention used by the High-Performance Erlang Compiler
// (HiPE).
HiPE = 11,
// Target - This is the start of the target-specific calling conventions,
// e.g. fastcall and thiscall on X86.
FirstTargetCC = 64,

View File

@ -103,6 +103,15 @@ def RetCC_Intel_OCL_BI : CallingConv<[
CCDelegateTo<RetCC_X86Common>
]>;
// X86-32 HiPE return-value convention.
def RetCC_X86_32_HiPE : CallingConv<[
// Promote all types to i32
CCIfType<[i8, i16], CCPromoteToType<i32>>,
// Return: HP, P, VAL1, VAL2
CCIfType<[i32], CCAssignToReg<[ESI, EBP, EAX, EDX]>>
]>;
// X86-64 C return-value convention.
def RetCC_X86_64_C : CallingConv<[
// The X86-64 calling convention always returns FP values in XMM0.
@ -123,17 +132,30 @@ def RetCC_X86_Win64_C : CallingConv<[
CCDelegateTo<RetCC_X86_64_C>
]>;
// X86-64 HiPE return-value convention.
def RetCC_X86_64_HiPE : CallingConv<[
// Promote all types to i64
CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
// Return: HP, P, VAL1, VAL2
CCIfType<[i64], CCAssignToReg<[R15, RBP, RAX, RDX]>>
]>;
// This is the root return-value convention for the X86-32 backend.
def RetCC_X86_32 : CallingConv<[
// If FastCC, use RetCC_X86_32_Fast.
CCIfCC<"CallingConv::Fast", CCDelegateTo<RetCC_X86_32_Fast>>,
// If HiPE, use RetCC_X86_32_HiPE.
CCIfCC<"CallingConv::HiPE", CCDelegateTo<RetCC_X86_32_HiPE>>,
// Otherwise, use RetCC_X86_32_C.
CCDelegateTo<RetCC_X86_32_C>
]>;
// This is the root return-value convention for the X86-64 backend.
def RetCC_X86_64 : CallingConv<[
// HiPE uses RetCC_X86_64_HiPE
CCIfCC<"CallingConv::HiPE", CCDelegateTo<RetCC_X86_64_HiPE>>,
// Mingw64 and native Win64 use Win64 CC
CCIfSubtarget<"isTargetWin64()", CCDelegateTo<RetCC_X86_Win64_C>>,
@ -291,6 +313,18 @@ def CC_X86_64_GHC : CallingConv<[
CCAssignToReg<[XMM1, XMM2, XMM3, XMM4, XMM5, XMM6]>>>
]>;
def CC_X86_64_HiPE : CallingConv<[
// Promote i8/i16/i32 arguments to i64.
CCIfType<[i8, i16, i32], CCPromoteToType<i64>>,
// Pass in VM's registers: HP, P, ARG0, ARG1, ARG2, ARG3
CCIfType<[i64], CCAssignToReg<[R15, RBP, RSI, RDX, RCX, R8]>>,
// Integer/FP values get stored in stack slots that are 8 bytes in size and
// 8-byte aligned if there are no more registers to hold them.
CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>
]>;
//===----------------------------------------------------------------------===//
// X86 C Calling Convention
//===----------------------------------------------------------------------===//
@ -422,6 +456,18 @@ def CC_X86_32_GHC : CallingConv<[
CCIfType<[i32], CCAssignToReg<[EBX, EBP, EDI, ESI]>>
]>;
def CC_X86_32_HiPE : CallingConv<[
// Promote i8/i16 arguments to i32.
CCIfType<[i8, i16], CCPromoteToType<i32>>,
// Pass in VM's registers: HP, P, ARG0, ARG1, ARG2
CCIfType<[i32], CCAssignToReg<[ESI, EBP, EAX, EDX, ECX]>>,
// Integer/Float values get stored in stack slots that are 4 bytes in
// size and 4-byte aligned.
CCIfType<[i32, f32], CCAssignToStack<4, 4>>
]>;
//===----------------------------------------------------------------------===//
// X86 Root Argument Calling Conventions
//===----------------------------------------------------------------------===//
@ -432,6 +478,7 @@ def CC_X86_32 : CallingConv<[
CCIfCC<"CallingConv::X86_ThisCall", CCDelegateTo<CC_X86_32_ThisCall>>,
CCIfCC<"CallingConv::Fast", CCDelegateTo<CC_X86_32_FastCC>>,
CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_32_GHC>>,
CCIfCC<"CallingConv::HiPE", CCDelegateTo<CC_X86_32_HiPE>>,
// Otherwise, drop to normal X86-32 CC
CCDelegateTo<CC_X86_32_C>
@ -440,6 +487,7 @@ def CC_X86_32 : CallingConv<[
// This is the root argument convention for the X86-64 backend.
def CC_X86_64 : CallingConv<[
CCIfCC<"CallingConv::GHC", CCDelegateTo<CC_X86_64_GHC>>,
CCIfCC<"CallingConv::HiPE", CCDelegateTo<CC_X86_64_HiPE>>,
// Mingw64 and native Win64 use Win64 CC
CCIfSubtarget<"isTargetWin64()", CCDelegateTo<CC_X86_Win64_C>>,

View File

@ -1822,7 +1822,8 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain,
/// IsTailCallConvention - Return true if the calling convention is one that
/// supports tail call optimization.
static bool IsTailCallConvention(CallingConv::ID CC) {
return (CC == CallingConv::Fast || CC == CallingConv::GHC);
return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
CC == CallingConv::HiPE);
}
bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const {
@ -1909,7 +1910,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
bool IsWin64 = Subtarget->isTargetWin64();
assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
"Var args not supported with calling convention fastcc or ghc");
"Var args not supported with calling convention fastcc, ghc or hipe");
// Assign locations to all of the incoming arguments.
SmallVector<CCValAssign, 16> ArgLocs;
@ -2254,7 +2255,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
}
assert(!(isVarArg && IsTailCallConvention(CallConv)) &&
"Var args not supported with calling convention fastcc or ghc");
"Var args not supported with calling convention fastcc, ghc or hipe");
// Analyze operands of the call, assigning locations to each operand.
SmallVector<CCValAssign, 16> ArgLocs;
@ -3119,6 +3120,8 @@ bool X86::isCalleePop(CallingConv::ID CallingConv,
return TailCallOpt;
case CallingConv::GHC:
return TailCallOpt;
case CallingConv::HiPE:
return TailCallOpt;
}
}

View File

@ -190,6 +190,11 @@ X86RegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind)
return &X86::GR64_TCW64RegClass;
if (TM.getSubtarget<X86Subtarget>().is64Bit())
return &X86::GR64_TCRegClass;
const Function *F = MF.getFunction();
bool hasHipeCC = (F ? F->getCallingConv() == CallingConv::HiPE : false);
if (hasHipeCC)
return &X86::GR32RegClass;
return &X86::GR32_TCRegClass;
}
}
@ -230,6 +235,7 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
bool callsEHReturn = false;
bool ghcCall = false;
bool oclBiCall = false;
bool hipeCall = false;
bool HasAVX = TM.getSubtarget<X86Subtarget>().hasAVX();
if (MF) {
@ -237,9 +243,10 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
const Function *F = MF->getFunction();
ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false);
oclBiCall = (F ? F->getCallingConv() == CallingConv::Intel_OCL_BI : false);
hipeCall = (F ? F->getCallingConv() == CallingConv::HiPE : false);
}
if (ghcCall)
if (ghcCall || hipeCall)
return CSR_NoRegs_SaveList;
if (oclBiCall) {
if (HasAVX && IsWin64)
@ -273,7 +280,7 @@ X86RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const {
if (!HasAVX && !IsWin64 && Is64Bit)
return CSR_64_Intel_OCL_BI_RegMask;
}
if (CC == CallingConv::GHC)
if (CC == CallingConv::GHC || CC == CallingConv::HiPE)
return CSR_NoRegs_RegMask;
if (!Is64Bit)
return CSR_32_RegMask;

View File

@ -0,0 +1,77 @@
; RUN: llc < %s -tailcallopt -code-model=medium -stack-alignment=4 -mtriple=i686-linux-gnu | FileCheck %s
; Check the HiPE calling convention works (x86-32)
define void @zap(i32 %a, i32 %b) nounwind {
entry:
; CHECK: movl 40(%esp), %eax
; CHECK-NEXT: movl 44(%esp), %edx
; CHECK-NEXT: movl $8, %ecx
; CHECK-NEXT: calll addfour
%0 = call cc 11 {i32, i32, i32} @addfour(i32 undef, i32 undef, i32 %a, i32 %b, i32 8)
%res = extractvalue {i32, i32, i32} %0, 2
; CHECK: movl %eax, 16(%esp)
; CHECK-NEXT: movl $2, 12(%esp)
; CHECK-NEXT: movl $1, 8(%esp)
; CHECK: calll foo
tail call void @foo(i32 undef, i32 undef, i32 1, i32 2, i32 %res) nounwind
ret void
}
define cc 11 {i32, i32, i32} @addfour(i32 %hp, i32 %p, i32 %x, i32 %y, i32 %z) nounwind {
entry:
; CHECK: addl %edx, %eax
; CHECK-NEXT: addl %ecx, %eax
%0 = add i32 %x, %y
%1 = add i32 %0, %z
; CHECK: ret
%res = insertvalue {i32, i32, i32} undef, i32 %1, 2
ret {i32, i32, i32} %res
}
define cc 11 void @foo(i32 %hp, i32 %p, i32 %arg0, i32 %arg1, i32 %arg2) nounwind {
entry:
; CHECK: movl %esi, 16(%esp)
; CHECK-NEXT: movl %ebp, 12(%esp)
; CHECK-NEXT: movl %eax, 8(%esp)
; CHECK-NEXT: movl %edx, 4(%esp)
; CHECK-NEXT: movl %ecx, (%esp)
%hp_var = alloca i32
%p_var = alloca i32
%arg0_var = alloca i32
%arg1_var = alloca i32
%arg2_var = alloca i32
store i32 %hp, i32* %hp_var
store i32 %p, i32* %p_var
store i32 %arg0, i32* %arg0_var
store i32 %arg1, i32* %arg1_var
store i32 %arg2, i32* %arg2_var
; CHECK: movl 4(%esp), %edx
; CHECK-NEXT: movl 8(%esp), %eax
; CHECK-NEXT: movl 12(%esp), %ebp
; CHECK-NEXT: movl 16(%esp), %esi
%0 = load i32* %hp_var
%1 = load i32* %p_var
%2 = load i32* %arg0_var
%3 = load i32* %arg1_var
%4 = load i32* %arg2_var
; CHECK: jmp bar
tail call cc 11 void @bar(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4) nounwind
ret void
}
define cc 11 void @baz() nounwind {
%tmp_clos = load i32* @clos
%tmp_clos2 = inttoptr i32 %tmp_clos to i32*
%indirect_call = bitcast i32* %tmp_clos2 to void (i32, i32, i32)*
; CHECK: movl $42, %eax
; CHECK-NEXT: jmpl *clos
tail call cc 11 void %indirect_call(i32 undef, i32 undef, i32 42) nounwind
ret void
}
@clos = external constant i32
declare cc 11 void @bar(i32, i32, i32, i32, i32)

View File

@ -0,0 +1,87 @@
; RUN: llc < %s -tailcallopt -code-model=medium -stack-alignment=8 -mtriple=x86_64-linux-gnu | FileCheck %s
; Check the HiPE calling convention works (x86-64)
define void @zap(i64 %a, i64 %b) nounwind {
entry:
; CHECK: movq %rsi, %rax
; CHECK-NEXT: movq %rdi, %rsi
; CHECK-NEXT: movq %rax, %rdx
; CHECK-NEXT: movl $8, %ecx
; CHECK-NEXT: movl $9, %r8d
; CHECK-NEXT: callq addfour
%0 = call cc 11 {i64, i64, i64} @addfour(i64 undef, i64 undef, i64 %a, i64 %b, i64 8, i64 9)
%res = extractvalue {i64, i64, i64} %0, 2
; CHECK: movl $1, %edx
; CHECK-NEXT: movl $2, %ecx
; CHECK-NEXT: movl $3, %r8d
; CHECK-NEXT: movq %rax, %r9
; CHECK: callq foo
tail call void @foo(i64 undef, i64 undef, i64 1, i64 2, i64 3, i64 %res) nounwind
ret void
}
define cc 11 {i64, i64, i64} @addfour(i64 %hp, i64 %p, i64 %x, i64 %y, i64 %z, i64 %w) nounwind {
entry:
; CHECK: leaq (%rsi,%rdx), %rax
; CHECK-NEXT: addq %rcx, %rax
; CHECK-NEXT: addq %r8, %rax
%0 = add i64 %x, %y
%1 = add i64 %0, %z
%2 = add i64 %1, %w
; CHECK: ret
%res = insertvalue {i64, i64, i64} undef, i64 %2, 2
ret {i64, i64, i64} %res
}
define cc 11 void @foo(i64 %hp, i64 %p, i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3) nounwind {
entry:
; CHECK: movq %r15, 40(%rsp)
; CHECK-NEXT: movq %rbp, 32(%rsp)
; CHECK-NEXT: movq %rsi, 24(%rsp)
; CHECK-NEXT: movq %rdx, 16(%rsp)
; CHECK-NEXT: movq %rcx, 8(%rsp)
; CHECK-NEXT: movq %r8, (%rsp)
%hp_var = alloca i64
%p_var = alloca i64
%arg0_var = alloca i64
%arg1_var = alloca i64
%arg2_var = alloca i64
%arg3_var = alloca i64
store i64 %hp, i64* %hp_var
store i64 %p, i64* %p_var
store i64 %arg0, i64* %arg0_var
store i64 %arg1, i64* %arg1_var
store i64 %arg2, i64* %arg2_var
store i64 %arg3, i64* %arg3_var
; CHECK: movq 8(%rsp), %rcx
; CHECK-NEXT: movq 16(%rsp), %rdx
; CHECK-NEXT: movq 24(%rsp), %rsi
; CHECK-NEXT: movq 32(%rsp), %rbp
; CHECK-NEXT: movq 40(%rsp), %r15
%0 = load i64* %hp_var
%1 = load i64* %p_var
%2 = load i64* %arg0_var
%3 = load i64* %arg1_var
%4 = load i64* %arg2_var
%5 = load i64* %arg3_var
; CHECK: jmp bar
tail call cc 11 void @bar(i64 %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5) nounwind
ret void
}
define cc 11 void @baz() nounwind {
%tmp_clos = load i64* @clos
%tmp_clos2 = inttoptr i64 %tmp_clos to i64*
%indirect_call = bitcast i64* %tmp_clos2 to void (i64, i64, i64)*
; CHECK: movl $42, %esi
; CHECK-NEXT: jmpq *(%rax)
tail call cc 11 void %indirect_call(i64 undef, i64 undef, i64 42) nounwind
ret void
}
@clos = external constant i64
declare cc 11 void @bar(i64, i64, i64, i64, i64, i64)