diff --git a/include/llvm/CallingConv.h b/include/llvm/CallingConv.h index bb76defc266..8b6a01a0cf5 100644 --- a/include/llvm/CallingConv.h +++ b/include/llvm/CallingConv.h @@ -57,7 +57,12 @@ namespace CallingConv { /// X86_FastCall - 'fast' analog of X86_StdCall. Passes first two arguments /// in ECX:EDX registers, others - via stack. Callee is responsible for /// stack cleaning. - X86_FastCall = 65 + X86_FastCall = 65, + + /// X86_Ocaml - This is a weird ABI used by Objective Caml. Formally, it + /// supports only one to six integer/address arguments, all in-reg. It also + /// supports tail call emission. + X86_Ocaml = 66 }; } // End CallingConv namespace diff --git a/include/llvm/CodeGen/LinkAllCodegenComponents.h b/include/llvm/CodeGen/LinkAllCodegenComponents.h index 48be5411bcd..ac580355939 100644 --- a/include/llvm/CodeGen/LinkAllCodegenComponents.h +++ b/include/llvm/CodeGen/LinkAllCodegenComponents.h @@ -36,6 +36,7 @@ namespace { (void) llvm::createSimpleRegisterCoalescer(); + (void) llvm::createOcamlCollector(); (void) llvm::createShadowStackCollector(); (void) llvm::createBURRListDAGScheduler(NULL, NULL, NULL); diff --git a/lib/CodeGen/OcamlCollector.cpp b/lib/CodeGen/OcamlCollector.cpp new file mode 100644 index 00000000000..8848fe13716 --- /dev/null +++ b/lib/CodeGen/OcamlCollector.cpp @@ -0,0 +1,177 @@ +//===-- OcamlCollector.cpp - Ocaml frametable emitter ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements lowering for the llvm.gc* intrinsics compatible with +// Objective Caml 3.10.0, which uses a liveness-accurate static stack map. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/Collectors.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/Collector.h" +#include "llvm/CodeGen/CollectorMetadata.h" +#include "llvm/Function.h" +#include "llvm/Module.h" +#include "llvm/PassManager.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Target/TargetAsmInfo.h" +#include "llvm/Target/TargetData.h" +#include "llvm/Target/TargetMachine.h" +#include + +using namespace llvm; + +namespace { + + class VISIBILITY_HIDDEN OcamlCollector : public Collector { + public: + OcamlCollector(); + + void beginAssembly(std::ostream &OS, AsmPrinter &AP, + const TargetAsmInfo &TAI); + + void finishAssembly(std::ostream &OS, AsmPrinter &AP, + const TargetAsmInfo &TAI); + }; + + CollectorRegistry::Add + X("ocaml", "ocaml 3.10-compatible collector"); + +} + +// ----------------------------------------------------------------------------- + +static void EmitCamlGlobal(const Module &M, std::ostream &OS, AsmPrinter &AP, + const TargetAsmInfo &TAI, const char *Id) { + const std::string &MId = M.getModuleIdentifier(); + + std::string Mangled; + Mangled += TAI.getGlobalPrefix(); + Mangled += "caml"; + size_t Letter = Mangled.size(); + Mangled.append(MId.begin(), std::find(MId.begin(), MId.end(), '.')); + Mangled += "__"; + Mangled += Id; + + // Capitalize the first letter of the module name. + Mangled[Letter] = toupper(Mangled[Letter]); + + if (const char *GlobalDirective = TAI.getGlobalDirective()) + OS << GlobalDirective << Mangled << "\n"; + OS << Mangled << ":\n"; +} + +Collector *llvm::createOcamlCollector() { + return new OcamlCollector(); +} + +OcamlCollector::OcamlCollector() { + NeededSafePoints = 1 << GC::PostCall; +} + +void OcamlCollector::beginAssembly(std::ostream &OS, AsmPrinter &AP, + const TargetAsmInfo &TAI) { + AP.SwitchToTextSection(TAI.getTextSection()); + EmitCamlGlobal(getModule(), OS, AP, TAI, "code_begin"); + + AP.SwitchToDataSection(TAI.getDataSection()); + EmitCamlGlobal(getModule(), OS, AP, TAI, "data_begin"); +} + +/// emitAssembly - Print the frametable. The ocaml frametable format is thus: +/// +/// extern "C" struct align(sizeof(intptr_t)) { +/// uint16_t NumDescriptors; +/// struct align(sizeof(intptr_t)) { +/// void *ReturnAddress; +/// uint16_t FrameSize; +/// uint16_t NumLiveOffsets; +/// uint16_t LiveOffsets[NumLiveOffsets]; +/// } Descriptors[NumDescriptors]; +/// } caml${module}__frametable; +/// +/// Note that this precludes programs from stack frames larger than 64K +/// (FrameSize and LiveOffsets would overflow). FrameTablePrinter will abort if +/// either condition is detected in a function which uses the collector. +/// +void OcamlCollector::finishAssembly(std::ostream &OS, AsmPrinter &AP, + const TargetAsmInfo &TAI) { + const char *AddressDirective; + int AddressAlignLog; + if (AP.TM.getTargetData()->getPointerSize() == sizeof(int32_t)) { + AddressDirective = TAI.getData32bitsDirective(); + AddressAlignLog = 2; + } else { + AddressDirective = TAI.getData64bitsDirective(); + AddressAlignLog = 3; + } + + AP.SwitchToTextSection(TAI.getTextSection()); + EmitCamlGlobal(getModule(), OS, AP, TAI, "code_end"); + + AP.SwitchToDataSection(TAI.getDataSection()); + EmitCamlGlobal(getModule(), OS, AP, TAI, "data_end"); + + OS << AddressDirective << 0; // FIXME: Why does ocaml emit this?? + AP.EOL(); + + AP.SwitchToDataSection(TAI.getDataSection()); + EmitCamlGlobal(getModule(), OS, AP, TAI, "frametable"); + + for (iterator FI = begin(), FE = end(); FI != FE; ++FI) { + CollectorMetadata &MD = **FI; + + OS << "\t" << TAI.getCommentString() << " live roots for " + << MD.getFunction().getNameStart() << "\n"; + + for (CollectorMetadata::iterator PI = MD.begin(), + PE = MD.end(); PI != PE; ++PI) { + + uint64_t FrameSize = MD.getFrameSize(); + if (FrameSize >= 2<<16) { + cerr << "Function '" << MD.getFunction().getNameStart() + << "' is too large for the ocaml collector! " + << "Frame size " << FrameSize << " >= 65536.\n"; + abort(); // Very rude! + } + + size_t LiveCount = MD.live_size(PI); + if (LiveCount >= 2<<16) { + cerr << "Function '" << MD.getFunction().getNameStart() + << "' is too large for the ocaml collector! " + << "Live root count " << LiveCount << " >= 65536.\n"; + abort(); // Very rude! + } + + OS << AddressDirective + << TAI.getPrivateGlobalPrefix() << "label" << PI->Num; + AP.EOL("call return address"); + + AP.EmitInt16(FrameSize); + AP.EOL("stack frame size"); + + AP.EmitInt16(LiveCount); + AP.EOL("live root count"); + + for (CollectorMetadata::live_iterator LI = MD.live_begin(PI), + LE = MD.live_end(PI); + LI != LE; ++LI) { + assert(LI->StackOffset < 2<<16 && + "GC root stack offset is outside of fixed stack frame and out " + "of range for Ocaml collector!"); + + OS << "\t.word\t" << LI->StackOffset; + AP.EOL("stack offset"); + } + + AP.EmitAlignment(AddressAlignLog); + } + } +} diff --git a/test/CodeGen/Generic/GC/frame_size.ll b/test/CodeGen/Generic/GC/frame_size.ll new file mode 100644 index 00000000000..75626c18c5b --- /dev/null +++ b/test/CodeGen/Generic/GC/frame_size.ll @@ -0,0 +1,14 @@ +; RUN: llvm-as < %s | llc -asm-verbose | grep {frame size} | grep -v 0x0 + +declare void @llvm.gcroot(i8** %value, i8* %tag) +declare void @g() gc "ocaml" + +define void @f(i8* %arg.0, void()* %arg.1) gc "ocaml" { +entry: + %gcroot.0 = alloca i8* + call void @llvm.gcroot(i8** %gcroot.0, i8* null) + store i8* %arg.0, i8** %gcroot.0 + call void @g() + call void %arg.1() + ret void +} diff --git a/test/CodeGen/Generic/GC/simple_ocaml.ll b/test/CodeGen/Generic/GC/simple_ocaml.ll new file mode 100644 index 00000000000..a33e0351f7f --- /dev/null +++ b/test/CodeGen/Generic/GC/simple_ocaml.ll @@ -0,0 +1,42 @@ +; RUN: llvm-as < %s | llc | grep caml.*__frametable +; RUN: llvm-as < %s | llc -march=x86 | grep {movl .0} + +%struct.obj = type { i8*, %struct.obj* } + +define %struct.obj* @fun(%struct.obj* %head) gc "ocaml" { +entry: + %gcroot.0 = alloca i8* + %gcroot.1 = alloca i8* + + call void @llvm.gcroot(i8** %gcroot.0, i8* null) + call void @llvm.gcroot(i8** %gcroot.1, i8* null) + + %local.0 = bitcast i8** %gcroot.0 to %struct.obj** + %local.1 = bitcast i8** %gcroot.1 to %struct.obj** + + store %struct.obj* %head, %struct.obj** %local.0 + br label %bb.loop +bb.loop: + %t0 = load %struct.obj** %local.0 + %t1 = getelementptr %struct.obj* %t0, i32 0, i32 1 + %t2 = bitcast %struct.obj* %t0 to i8* + %t3 = bitcast %struct.obj** %t1 to i8** + %t4 = call i8* @llvm.gcread(i8* %t2, i8** %t3) + %t5 = bitcast i8* %t4 to %struct.obj* + %t6 = icmp eq %struct.obj* %t5, null + br i1 %t6, label %bb.loop, label %bb.end +bb.end: + %t7 = malloc %struct.obj + store %struct.obj* %t7, %struct.obj** %local.1 + %t8 = bitcast %struct.obj* %t7 to i8* + %t9 = load %struct.obj** %local.0 + %t10 = getelementptr %struct.obj* %t9, i32 0, i32 1 + %t11 = bitcast %struct.obj* %t9 to i8* + %t12 = bitcast %struct.obj** %t10 to i8** + call void @llvm.gcwrite(i8* %t8, i8* %t11, i8** %t12) + ret %struct.obj* %t7 +} + +declare void @llvm.gcroot(i8** %value, i8* %tag) +declare void @llvm.gcwrite(i8* %value, i8* %obj, i8** %field) +declare i8* @llvm.gcread(i8* %obj, i8** %field)