From dc7f174b5e049172f085ff5957f58998bdc446a4 Mon Sep 17 00:00:00 2001 From: Duncan Sands Date: Fri, 16 Nov 2012 12:36:39 +0000 Subject: [PATCH] Add the Erlang/HiPE calling convention, patch by Yiannis Tsiouris. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@168166 91177308-0d34-0410-b5e6-96231b3b80d8 --- docs/CodeGenerator.rst | 4 +- docs/LangRef.html | 27 +++++++--- include/llvm/CallingConv.h | 4 ++ lib/Target/X86/X86CallingConv.td | 48 +++++++++++++++++ lib/Target/X86/X86ISelLowering.cpp | 9 ++-- lib/Target/X86/X86RegisterInfo.cpp | 11 +++- test/CodeGen/X86/hipe-cc.ll | 77 ++++++++++++++++++++++++++ test/CodeGen/X86/hipe-cc64.ll | 87 ++++++++++++++++++++++++++++++ 8 files changed, 254 insertions(+), 13 deletions(-) create mode 100644 test/CodeGen/X86/hipe-cc.ll create mode 100644 test/CodeGen/X86/hipe-cc64.ll diff --git a/docs/CodeGenerator.rst b/docs/CodeGenerator.rst index 21af969d42a..104b848ebf2 100644 --- a/docs/CodeGenerator.rst +++ b/docs/CodeGenerator.rst @@ -1982,8 +1982,8 @@ Tail call optimization Tail call optimization, callee reusing the stack of the caller, is currently supported on x86/x86-64 and PowerPC. It is performed if: -* Caller and callee have the calling convention ``fastcc`` or ``cc 10`` (GHC - call convention). +* Caller and callee have the calling convention ``fastcc``, ``cc 10`` (GHC + calling convention) or ``cc 11`` (HiPE calling convention). * The call is a tail call - in tail position (ret immediately follows call and ret uses value of call or is void). diff --git a/docs/LangRef.html b/docs/LangRef.html index b4dd976306f..b1ed4e6e48a 100644 --- a/docs/LangRef.html +++ b/docs/LangRef.html @@ -729,10 +729,10 @@ define i32 @main() { ; i32()*   target to use whatever tricks it wants to produce fast code for the target, without having to conform to an externally specified ABI (Application Binary Interface). - Tail calls can only be optimized - when this or the GHC convention is used. This calling convention - does not support varargs and requires the prototype of all callees to - exactly match the prototype of the function definition. + Tail calls can only be optimized + when this, the GHC or the HiPE convention is used. This calling + convention does not support varargs and requires the prototype of all + callees to exactly match the prototype of the function definition.
"coldcc" - The cold calling convention:
This calling convention attempts to make code in the caller as efficient @@ -749,7 +749,7 @@ define i32 @main() { ; i32()*   disabling callee save registers. This calling convention should not be used lightly but only for specific situations such as an alternative to the register pinning performance technique often used when - implementing functional programming languages.At the moment only X86 + implementing functional programming languages. At the moment only X86 supports this convention and it has the following limitations: This calling convention supports - tail call optimization but + tail call optimization but requires both the caller and callee are using it.
+
"cc 11" - The HiPE calling convention:
+
This calling convention has been implemented specifically for use by the + High-Performance Erlang + (HiPE) compiler, the native code compiler of the + Ericsson's Open Source + Erlang/OTP system. It uses more registers for argument passing than + the ordinary C calling convention and defines no callee-saved registers. + The calling convention properly supports + tail call optimization but requires + that both the caller and the callee use it. It uses a register + pinning mechanism, similar to GHC's convention, for keeping + frequently accessed runtime components pinned to specific hardware + registers. At the moment only X86 supports this convention (both 32 and 64 + bit).
+
"cc <n>" - Numbered convention:
Any calling convention may be specified by number, allowing target-specific calling conventions to be used. Target specific calling diff --git a/include/llvm/CallingConv.h b/include/llvm/CallingConv.h index 053f4eb326f..699cea331ca 100644 --- a/include/llvm/CallingConv.h +++ b/include/llvm/CallingConv.h @@ -47,6 +47,10 @@ namespace CallingConv { // GHC - Calling convention used by the Glasgow Haskell Compiler (GHC). GHC = 10, + // HiPE - Calling convention used by the High-Performance Erlang Compiler + // (HiPE). + HiPE = 11, + // Target - This is the start of the target-specific calling conventions, // e.g. fastcall and thiscall on X86. FirstTargetCC = 64, diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td index 6786756c7fa..947e7d78495 100644 --- a/lib/Target/X86/X86CallingConv.td +++ b/lib/Target/X86/X86CallingConv.td @@ -103,6 +103,15 @@ def RetCC_Intel_OCL_BI : CallingConv<[ CCDelegateTo ]>; +// X86-32 HiPE return-value convention. +def RetCC_X86_32_HiPE : CallingConv<[ + // Promote all types to i32 + CCIfType<[i8, i16], CCPromoteToType>, + + // Return: HP, P, VAL1, VAL2 + CCIfType<[i32], CCAssignToReg<[ESI, EBP, EAX, EDX]>> +]>; + // X86-64 C return-value convention. def RetCC_X86_64_C : CallingConv<[ // The X86-64 calling convention always returns FP values in XMM0. @@ -123,17 +132,30 @@ def RetCC_X86_Win64_C : CallingConv<[ CCDelegateTo ]>; +// X86-64 HiPE return-value convention. +def RetCC_X86_64_HiPE : CallingConv<[ + // Promote all types to i64 + CCIfType<[i8, i16, i32], CCPromoteToType>, + + // Return: HP, P, VAL1, VAL2 + CCIfType<[i64], CCAssignToReg<[R15, RBP, RAX, RDX]>> +]>; // This is the root return-value convention for the X86-32 backend. def RetCC_X86_32 : CallingConv<[ // If FastCC, use RetCC_X86_32_Fast. CCIfCC<"CallingConv::Fast", CCDelegateTo>, + // If HiPE, use RetCC_X86_32_HiPE. + CCIfCC<"CallingConv::HiPE", CCDelegateTo>, + // Otherwise, use RetCC_X86_32_C. CCDelegateTo ]>; // This is the root return-value convention for the X86-64 backend. def RetCC_X86_64 : CallingConv<[ + // HiPE uses RetCC_X86_64_HiPE + CCIfCC<"CallingConv::HiPE", CCDelegateTo>, // Mingw64 and native Win64 use Win64 CC CCIfSubtarget<"isTargetWin64()", CCDelegateTo>, @@ -291,6 +313,18 @@ def CC_X86_64_GHC : CallingConv<[ CCAssignToReg<[XMM1, XMM2, XMM3, XMM4, XMM5, XMM6]>>> ]>; +def CC_X86_64_HiPE : CallingConv<[ + // Promote i8/i16/i32 arguments to i64. + CCIfType<[i8, i16, i32], CCPromoteToType>, + + // Pass in VM's registers: HP, P, ARG0, ARG1, ARG2, ARG3 + CCIfType<[i64], CCAssignToReg<[R15, RBP, RSI, RDX, RCX, R8]>>, + + // Integer/FP values get stored in stack slots that are 8 bytes in size and + // 8-byte aligned if there are no more registers to hold them. + CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>> +]>; + //===----------------------------------------------------------------------===// // X86 C Calling Convention //===----------------------------------------------------------------------===// @@ -422,6 +456,18 @@ def CC_X86_32_GHC : CallingConv<[ CCIfType<[i32], CCAssignToReg<[EBX, EBP, EDI, ESI]>> ]>; +def CC_X86_32_HiPE : CallingConv<[ + // Promote i8/i16 arguments to i32. + CCIfType<[i8, i16], CCPromoteToType>, + + // Pass in VM's registers: HP, P, ARG0, ARG1, ARG2 + CCIfType<[i32], CCAssignToReg<[ESI, EBP, EAX, EDX, ECX]>>, + + // Integer/Float values get stored in stack slots that are 4 bytes in + // size and 4-byte aligned. + CCIfType<[i32, f32], CCAssignToStack<4, 4>> +]>; + //===----------------------------------------------------------------------===// // X86 Root Argument Calling Conventions //===----------------------------------------------------------------------===// @@ -432,6 +478,7 @@ def CC_X86_32 : CallingConv<[ CCIfCC<"CallingConv::X86_ThisCall", CCDelegateTo>, CCIfCC<"CallingConv::Fast", CCDelegateTo>, CCIfCC<"CallingConv::GHC", CCDelegateTo>, + CCIfCC<"CallingConv::HiPE", CCDelegateTo>, // Otherwise, drop to normal X86-32 CC CCDelegateTo @@ -440,6 +487,7 @@ def CC_X86_32 : CallingConv<[ // This is the root argument convention for the X86-64 backend. def CC_X86_64 : CallingConv<[ CCIfCC<"CallingConv::GHC", CCDelegateTo>, + CCIfCC<"CallingConv::HiPE", CCDelegateTo>, // Mingw64 and native Win64 use Win64 CC CCIfSubtarget<"isTargetWin64()", CCDelegateTo>, diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 80dd9ef613b..3192a43bdb4 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1822,7 +1822,8 @@ CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, /// IsTailCallConvention - Return true if the calling convention is one that /// supports tail call optimization. static bool IsTailCallConvention(CallingConv::ID CC) { - return (CC == CallingConv::Fast || CC == CallingConv::GHC); + return (CC == CallingConv::Fast || CC == CallingConv::GHC || + CC == CallingConv::HiPE); } bool X86TargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { @@ -1909,7 +1910,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain, bool IsWin64 = Subtarget->isTargetWin64(); assert(!(isVarArg && IsTailCallConvention(CallConv)) && - "Var args not supported with calling convention fastcc or ghc"); + "Var args not supported with calling convention fastcc, ghc or hipe"); // Assign locations to all of the incoming arguments. SmallVector ArgLocs; @@ -2254,7 +2255,7 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, } assert(!(isVarArg && IsTailCallConvention(CallConv)) && - "Var args not supported with calling convention fastcc or ghc"); + "Var args not supported with calling convention fastcc, ghc or hipe"); // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; @@ -3119,6 +3120,8 @@ bool X86::isCalleePop(CallingConv::ID CallingConv, return TailCallOpt; case CallingConv::GHC: return TailCallOpt; + case CallingConv::HiPE: + return TailCallOpt; } } diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index 73ac7477427..11f2d7aa707 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -190,6 +190,11 @@ X86RegisterInfo::getPointerRegClass(const MachineFunction &MF, unsigned Kind) return &X86::GR64_TCW64RegClass; if (TM.getSubtarget().is64Bit()) return &X86::GR64_TCRegClass; + + const Function *F = MF.getFunction(); + bool hasHipeCC = (F ? F->getCallingConv() == CallingConv::HiPE : false); + if (hasHipeCC) + return &X86::GR32RegClass; return &X86::GR32_TCRegClass; } } @@ -230,6 +235,7 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { bool callsEHReturn = false; bool ghcCall = false; bool oclBiCall = false; + bool hipeCall = false; bool HasAVX = TM.getSubtarget().hasAVX(); if (MF) { @@ -237,9 +243,10 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { const Function *F = MF->getFunction(); ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false); oclBiCall = (F ? F->getCallingConv() == CallingConv::Intel_OCL_BI : false); + hipeCall = (F ? F->getCallingConv() == CallingConv::HiPE : false); } - if (ghcCall) + if (ghcCall || hipeCall) return CSR_NoRegs_SaveList; if (oclBiCall) { if (HasAVX && IsWin64) @@ -273,7 +280,7 @@ X86RegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { if (!HasAVX && !IsWin64 && Is64Bit) return CSR_64_Intel_OCL_BI_RegMask; } - if (CC == CallingConv::GHC) + if (CC == CallingConv::GHC || CC == CallingConv::HiPE) return CSR_NoRegs_RegMask; if (!Is64Bit) return CSR_32_RegMask; diff --git a/test/CodeGen/X86/hipe-cc.ll b/test/CodeGen/X86/hipe-cc.ll new file mode 100644 index 00000000000..0de44915291 --- /dev/null +++ b/test/CodeGen/X86/hipe-cc.ll @@ -0,0 +1,77 @@ +; RUN: llc < %s -tailcallopt -code-model=medium -stack-alignment=4 -mtriple=i686-linux-gnu | FileCheck %s + +; Check the HiPE calling convention works (x86-32) + +define void @zap(i32 %a, i32 %b) nounwind { +entry: + ; CHECK: movl 40(%esp), %eax + ; CHECK-NEXT: movl 44(%esp), %edx + ; CHECK-NEXT: movl $8, %ecx + ; CHECK-NEXT: calll addfour + %0 = call cc 11 {i32, i32, i32} @addfour(i32 undef, i32 undef, i32 %a, i32 %b, i32 8) + %res = extractvalue {i32, i32, i32} %0, 2 + + ; CHECK: movl %eax, 16(%esp) + ; CHECK-NEXT: movl $2, 12(%esp) + ; CHECK-NEXT: movl $1, 8(%esp) + ; CHECK: calll foo + tail call void @foo(i32 undef, i32 undef, i32 1, i32 2, i32 %res) nounwind + ret void +} + +define cc 11 {i32, i32, i32} @addfour(i32 %hp, i32 %p, i32 %x, i32 %y, i32 %z) nounwind { +entry: + ; CHECK: addl %edx, %eax + ; CHECK-NEXT: addl %ecx, %eax + %0 = add i32 %x, %y + %1 = add i32 %0, %z + + ; CHECK: ret + %res = insertvalue {i32, i32, i32} undef, i32 %1, 2 + ret {i32, i32, i32} %res +} + +define cc 11 void @foo(i32 %hp, i32 %p, i32 %arg0, i32 %arg1, i32 %arg2) nounwind { +entry: + ; CHECK: movl %esi, 16(%esp) + ; CHECK-NEXT: movl %ebp, 12(%esp) + ; CHECK-NEXT: movl %eax, 8(%esp) + ; CHECK-NEXT: movl %edx, 4(%esp) + ; CHECK-NEXT: movl %ecx, (%esp) + %hp_var = alloca i32 + %p_var = alloca i32 + %arg0_var = alloca i32 + %arg1_var = alloca i32 + %arg2_var = alloca i32 + store i32 %hp, i32* %hp_var + store i32 %p, i32* %p_var + store i32 %arg0, i32* %arg0_var + store i32 %arg1, i32* %arg1_var + store i32 %arg2, i32* %arg2_var + + ; CHECK: movl 4(%esp), %edx + ; CHECK-NEXT: movl 8(%esp), %eax + ; CHECK-NEXT: movl 12(%esp), %ebp + ; CHECK-NEXT: movl 16(%esp), %esi + %0 = load i32* %hp_var + %1 = load i32* %p_var + %2 = load i32* %arg0_var + %3 = load i32* %arg1_var + %4 = load i32* %arg2_var + ; CHECK: jmp bar + tail call cc 11 void @bar(i32 %0, i32 %1, i32 %2, i32 %3, i32 %4) nounwind + ret void +} + +define cc 11 void @baz() nounwind { + %tmp_clos = load i32* @clos + %tmp_clos2 = inttoptr i32 %tmp_clos to i32* + %indirect_call = bitcast i32* %tmp_clos2 to void (i32, i32, i32)* + ; CHECK: movl $42, %eax + ; CHECK-NEXT: jmpl *clos + tail call cc 11 void %indirect_call(i32 undef, i32 undef, i32 42) nounwind + ret void +} + +@clos = external constant i32 +declare cc 11 void @bar(i32, i32, i32, i32, i32) diff --git a/test/CodeGen/X86/hipe-cc64.ll b/test/CodeGen/X86/hipe-cc64.ll new file mode 100644 index 00000000000..6354a4c1ecb --- /dev/null +++ b/test/CodeGen/X86/hipe-cc64.ll @@ -0,0 +1,87 @@ +; RUN: llc < %s -tailcallopt -code-model=medium -stack-alignment=8 -mtriple=x86_64-linux-gnu | FileCheck %s + +; Check the HiPE calling convention works (x86-64) + +define void @zap(i64 %a, i64 %b) nounwind { +entry: + ; CHECK: movq %rsi, %rax + ; CHECK-NEXT: movq %rdi, %rsi + ; CHECK-NEXT: movq %rax, %rdx + ; CHECK-NEXT: movl $8, %ecx + ; CHECK-NEXT: movl $9, %r8d + ; CHECK-NEXT: callq addfour + %0 = call cc 11 {i64, i64, i64} @addfour(i64 undef, i64 undef, i64 %a, i64 %b, i64 8, i64 9) + %res = extractvalue {i64, i64, i64} %0, 2 + + ; CHECK: movl $1, %edx + ; CHECK-NEXT: movl $2, %ecx + ; CHECK-NEXT: movl $3, %r8d + ; CHECK-NEXT: movq %rax, %r9 + ; CHECK: callq foo + tail call void @foo(i64 undef, i64 undef, i64 1, i64 2, i64 3, i64 %res) nounwind + ret void +} + +define cc 11 {i64, i64, i64} @addfour(i64 %hp, i64 %p, i64 %x, i64 %y, i64 %z, i64 %w) nounwind { +entry: + ; CHECK: leaq (%rsi,%rdx), %rax + ; CHECK-NEXT: addq %rcx, %rax + ; CHECK-NEXT: addq %r8, %rax + %0 = add i64 %x, %y + %1 = add i64 %0, %z + %2 = add i64 %1, %w + + ; CHECK: ret + %res = insertvalue {i64, i64, i64} undef, i64 %2, 2 + ret {i64, i64, i64} %res +} + +define cc 11 void @foo(i64 %hp, i64 %p, i64 %arg0, i64 %arg1, i64 %arg2, i64 %arg3) nounwind { +entry: + ; CHECK: movq %r15, 40(%rsp) + ; CHECK-NEXT: movq %rbp, 32(%rsp) + ; CHECK-NEXT: movq %rsi, 24(%rsp) + ; CHECK-NEXT: movq %rdx, 16(%rsp) + ; CHECK-NEXT: movq %rcx, 8(%rsp) + ; CHECK-NEXT: movq %r8, (%rsp) + %hp_var = alloca i64 + %p_var = alloca i64 + %arg0_var = alloca i64 + %arg1_var = alloca i64 + %arg2_var = alloca i64 + %arg3_var = alloca i64 + store i64 %hp, i64* %hp_var + store i64 %p, i64* %p_var + store i64 %arg0, i64* %arg0_var + store i64 %arg1, i64* %arg1_var + store i64 %arg2, i64* %arg2_var + store i64 %arg3, i64* %arg3_var + + ; CHECK: movq 8(%rsp), %rcx + ; CHECK-NEXT: movq 16(%rsp), %rdx + ; CHECK-NEXT: movq 24(%rsp), %rsi + ; CHECK-NEXT: movq 32(%rsp), %rbp + ; CHECK-NEXT: movq 40(%rsp), %r15 + %0 = load i64* %hp_var + %1 = load i64* %p_var + %2 = load i64* %arg0_var + %3 = load i64* %arg1_var + %4 = load i64* %arg2_var + %5 = load i64* %arg3_var + ; CHECK: jmp bar + tail call cc 11 void @bar(i64 %0, i64 %1, i64 %2, i64 %3, i64 %4, i64 %5) nounwind + ret void +} + +define cc 11 void @baz() nounwind { + %tmp_clos = load i64* @clos + %tmp_clos2 = inttoptr i64 %tmp_clos to i64* + %indirect_call = bitcast i64* %tmp_clos2 to void (i64, i64, i64)* + ; CHECK: movl $42, %esi + ; CHECK-NEXT: jmpq *(%rax) + tail call cc 11 void %indirect_call(i64 undef, i64 undef, i64 42) nounwind + ret void +} + +@clos = external constant i64 +declare cc 11 void @bar(i64, i64, i64, i64, i64, i64)