[Orc] Rename OrcArchitectureSupport to OrcABISupport and add Win32 ABI support.

This enables lazy JITing on Windows x86-64.

Patch by David. Thanks David!



git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@268845 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Lang Hames 2016-05-07 03:36:38 +00:00
parent cc8923b4e4
commit c14b8e9703
7 changed files with 233 additions and 110 deletions

View File

@ -4,7 +4,7 @@
#include "llvm/ExecutionEngine/Orc/LambdaResolver.h"
#include "llvm/ExecutionEngine/Orc/LazyEmittingLayer.h"
#include "llvm/ExecutionEngine/Orc/ObjectLinkingLayer.h"
#include "llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h"
#include "llvm/ExecutionEngine/Orc/OrcABISupport.h"
#include "llvm/IR/DataLayout.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/IRBuilder.h"
@ -1309,7 +1309,7 @@ private:
std::map<std::string, std::unique_ptr<FunctionAST>> FunctionDefs;
LocalJITCompileCallbackManager<OrcX86_64> CompileCallbacks;
LocalJITCompileCallbackManager<OrcX86_64_SysV> CompileCallbacks;
};
static void HandleDefinition(SessionContext &S, KaleidoscopeJIT &J) {

View File

@ -1,4 +1,4 @@
//===-- OrcArchitectureSupport.h - Architecture support code ---*- C++ -*-===//
//===-------------- OrcABISupport.h - ABI support code ---------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@ -7,16 +7,16 @@
//
//===----------------------------------------------------------------------===//
//
// Architecture specific code for Orc, e.g. callback assembly.
// ABI specific code for Orc, e.g. callback assembly.
//
// Architecture classes should be part of the JIT *target* process, not the host
// ABI classes should be part of the JIT *target* process, not the host
// process (except where you're doing hosted JITing and the two are one and the
// same).
//
//===----------------------------------------------------------------------===//
#ifndef LLVM_EXECUTIONENGINE_ORC_ORCARCHITECTURESUPPORT_H
#define LLVM_EXECUTIONENGINE_ORC_ORCARCHITECTURESUPPORT_H
#ifndef LLVM_EXECUTIONENGINE_ORC_ORCABISUPPORT_H
#define LLVM_EXECUTIONENGINE_ORC_ORCABISUPPORT_H
#include "IndirectionUtils.h"
#include "llvm/Support/Memory.h"
@ -25,13 +25,13 @@
namespace llvm {
namespace orc {
/// Generic ORC Architecture support.
/// Generic ORC ABI support.
///
/// This class can be substituted as the target architecure support class for
/// ORC templates that require one (e.g. IndirectStubsManagers). It does not
/// support lazy JITing however, and any attempt to use that functionality
/// will result in execution of an llvm_unreachable.
class OrcGenericArchitecture {
class OrcGenericABI {
public:
static const unsigned PointerSize = sizeof(uintptr_t);
static const unsigned TrampolineSize = 1;
@ -138,24 +138,16 @@ public:
unsigned MinStubs, void *InitialPtrVal);
};
/// @brief X86_64 support.
/// @brief X86_64 code that's common to all ABIs.
///
/// X86_64 supports lazy JITing.
class OrcX86_64 {
class OrcX86_64_Base {
public:
static const unsigned PointerSize = 8;
static const unsigned TrampolineSize = 8;
static const unsigned ResolverCodeSize = 0x6C;
typedef GenericIndirectStubsInfo<8> IndirectStubsInfo;
typedef TargetAddress (*JITReentryFn)(void *CallbackMgr, void *TrampolineId);
/// @brief Write the resolver code into the given memory. The user is be
/// responsible for allocating the memory and setting permissions.
static void writeResolverCode(uint8_t *ResolveMem, JITReentryFn Reentry,
void *CallbackMgr);
/// @brief Write the requsted number of trampolines into the given memory,
/// which must be big enough to hold 1 pointer, plus NumTrampolines
/// trampolines.
@ -172,6 +164,34 @@ public:
unsigned MinStubs, void *InitialPtrVal);
};
/// @brief X86_64 support for SysV ABI (Linux, MacOSX).
///
/// X86_64_SysV supports lazy JITing.
class OrcX86_64_SysV : public OrcX86_64_Base {
public:
static const unsigned ResolverCodeSize = 0x6C;
typedef TargetAddress(*JITReentryFn)(void *CallbackMgr, void *TrampolineId);
/// @brief Write the resolver code into the given memory. The user is be
/// responsible for allocating the memory and setting permissions.
static void writeResolverCode(uint8_t *ResolveMem, JITReentryFn Reentry,
void *CallbackMgr);
};
/// @brief X86_64 support for Win32.
///
/// X86_64_Win32 supports lazy JITing.
class OrcX86_64_Win32 : public OrcX86_64_Base {
public:
static const unsigned ResolverCodeSize = 0x74;
typedef TargetAddress(*JITReentryFn)(void *CallbackMgr, void *TrampolineId);
/// @brief Write the resolver code into the given memory. The user is be
/// responsible for allocating the memory and setting permissions.
static void writeResolverCode(uint8_t *ResolveMem, JITReentryFn Reentry,
void *CallbackMgr);
};
/// @brief I386 support.
///
/// I386 supports lazy JITing.
@ -209,4 +229,4 @@ public:
} // End namespace orc.
} // End namespace llvm.
#endif // LLVM_EXECUTIONENGINE_ORC_ORCARCHITECTURESUPPORT_H
#endif // LLVM_EXECUTIONENGINE_ORC_ORCABISUPPORT_H

View File

@ -2,7 +2,7 @@ add_llvm_library(LLVMOrcJIT
ExecutionUtils.cpp
IndirectionUtils.cpp
NullResolver.cpp
OrcArchitectureSupport.cpp
OrcABISupport.cpp
OrcCBindings.cpp
OrcCBindingsStack.cpp
OrcError.cpp

View File

@ -1,4 +1,4 @@
//===------ OrcArchSupport.cpp - Architecture specific support code -------===//
//===------------- OrcABISupport.cpp - ABI specific support code ----------===//
//
// The LLVM Compiler Infrastructure
//
@ -7,7 +7,7 @@
//
//===----------------------------------------------------------------------===//
#include "llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h"
#include "llvm/ExecutionEngine/Orc/OrcABISupport.h"
#include "llvm/ADT/Triple.h"
#include "llvm/Support/Process.h"
@ -190,7 +190,89 @@ Error OrcAArch64::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
return Error::success();
}
void OrcX86_64::writeResolverCode(uint8_t *ResolverMem, JITReentryFn ReentryFn,
void OrcX86_64_Base::writeTrampolines(uint8_t *TrampolineMem,
void *ResolverAddr,
unsigned NumTrampolines) {
unsigned OffsetToPtr = NumTrampolines * TrampolineSize;
memcpy(TrampolineMem + OffsetToPtr, &ResolverAddr, sizeof(void *));
uint64_t *Trampolines = reinterpret_cast<uint64_t *>(TrampolineMem);
uint64_t CallIndirPCRel = 0xf1c40000000015ff;
for (unsigned I = 0; I < NumTrampolines; ++I, OffsetToPtr -= TrampolineSize)
Trampolines[I] = CallIndirPCRel | ((OffsetToPtr - 6) << 16);
}
Error OrcX86_64_Base::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
unsigned MinStubs,
void *InitialPtrVal) {
// Stub format is:
//
// .section __orc_stubs
// stub1:
// jmpq *ptr1(%rip)
// .byte 0xC4 ; <- Invalid opcode padding.
// .byte 0xF1
// stub2:
// jmpq *ptr2(%rip)
//
// ...
//
// .section __orc_ptrs
// ptr1:
// .quad 0x0
// ptr2:
// .quad 0x0
//
// ...
const unsigned StubSize = IndirectStubsInfo::StubSize;
// Emit at least MinStubs, rounded up to fill the pages allocated.
unsigned PageSize = sys::Process::getPageSize();
unsigned NumPages = ((MinStubs * StubSize) + (PageSize - 1)) / PageSize;
unsigned NumStubs = (NumPages * PageSize) / StubSize;
// Allocate memory for stubs and pointers in one call.
std::error_code EC;
auto StubsMem = sys::OwningMemoryBlock(sys::Memory::allocateMappedMemory(
2 * NumPages * PageSize, nullptr,
sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC));
if (EC)
return errorCodeToError(EC);
// Create separate MemoryBlocks representing the stubs and pointers.
sys::MemoryBlock StubsBlock(StubsMem.base(), NumPages * PageSize);
sys::MemoryBlock PtrsBlock(static_cast<char *>(StubsMem.base()) +
NumPages * PageSize,
NumPages * PageSize);
// Populate the stubs page stubs and mark it executable.
uint64_t *Stub = reinterpret_cast<uint64_t *>(StubsBlock.base());
uint64_t PtrOffsetField = static_cast<uint64_t>(NumPages * PageSize - 6)
<< 16;
for (unsigned I = 0; I < NumStubs; ++I)
Stub[I] = 0xF1C40000000025ff | PtrOffsetField;
if (auto EC = sys::Memory::protectMappedMemory(
StubsBlock, sys::Memory::MF_READ | sys::Memory::MF_EXEC))
return errorCodeToError(EC);
// Initialize all pointers to point at FailureAddress.
void **Ptr = reinterpret_cast<void **>(PtrsBlock.base());
for (unsigned I = 0; I < NumStubs; ++I)
Ptr[I] = InitialPtrVal;
StubsInfo = IndirectStubsInfo(NumStubs, std::move(StubsMem));
return Error::success();
}
void OrcX86_64_SysV::writeResolverCode(uint8_t *ResolverMem,
JITReentryFn ReentryFn,
void *CallbackMgr) {
const uint8_t ResolverCode[] = {
@ -256,84 +338,80 @@ void OrcX86_64::writeResolverCode(uint8_t *ResolverMem, JITReentryFn ReentryFn,
sizeof(CallbackMgr));
}
void OrcX86_64::writeTrampolines(uint8_t *TrampolineMem, void *ResolverAddr,
unsigned NumTrampolines) {
void OrcX86_64_Win32::writeResolverCode(uint8_t *ResolverMem,
JITReentryFn ReentryFn,
void *CallbackMgr) {
unsigned OffsetToPtr = NumTrampolines * TrampolineSize;
// resolverCode is similar to OrcX86_64 with differences specific to windows x64 calling convention:
// arguments go into rcx, rdx and come in reverse order, shadow space allocation on stack
const uint8_t ResolverCode[] = {
// resolver_entry:
0x55, // 0x00: pushq %rbp
0x48, 0x89, 0xe5, // 0x01: movq %rsp, %rbp
0x50, // 0x04: pushq %rax
0x53, // 0x05: pushq %rbx
0x51, // 0x06: pushq %rcx
0x52, // 0x07: pushq %rdx
0x56, // 0x08: pushq %rsi
0x57, // 0x09: pushq %rdi
0x41, 0x50, // 0x0a: pushq %r8
0x41, 0x51, // 0x0c: pushq %r9
0x41, 0x52, // 0x0e: pushq %r10
0x41, 0x53, // 0x10: pushq %r11
0x41, 0x54, // 0x12: pushq %r12
0x41, 0x55, // 0x14: pushq %r13
0x41, 0x56, // 0x16: pushq %r14
0x41, 0x57, // 0x18: pushq %r15
0x48, 0x81, 0xec, 0x08, 0x02, 0x00, 0x00, // 0x1a: subq 0x208, %rsp
0x48, 0x0f, 0xae, 0x04, 0x24, // 0x21: fxsave64 (%rsp)
memcpy(TrampolineMem + OffsetToPtr, &ResolverAddr, sizeof(void *));
0x48, 0xb9, // 0x26: movabsq <CBMgr>, %rcx
// 0x28: Callback manager addr.
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
uint64_t *Trampolines = reinterpret_cast<uint64_t *>(TrampolineMem);
uint64_t CallIndirPCRel = 0xf1c40000000015ff;
0x48, 0x8B, 0x55, 0x08, // 0x30: mov rdx, [rbp+0x8]
0x48, 0x83, 0xea, 0x06, // 0x34: sub rdx, 0x6
for (unsigned I = 0; I < NumTrampolines; ++I, OffsetToPtr -= TrampolineSize)
Trampolines[I] = CallIndirPCRel | ((OffsetToPtr - 6) << 16);
}
0x48, 0xb8, // 0x38: movabsq <REntry>, %rax
// 0x3a: JIT re-entry fn addr:
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
Error OrcX86_64::emitIndirectStubsBlock(IndirectStubsInfo &StubsInfo,
unsigned MinStubs,
void *InitialPtrVal) {
// Stub format is:
//
// .section __orc_stubs
// stub1:
// jmpq *ptr1(%rip)
// .byte 0xC4 ; <- Invalid opcode padding.
// .byte 0xF1
// stub2:
// jmpq *ptr2(%rip)
//
// ...
//
// .section __orc_ptrs
// ptr1:
// .quad 0x0
// ptr2:
// .quad 0x0
//
// ...
// 0x42: sub rsp, 0x20 (Allocate shadow space)
0x48, 0x83, 0xEC, 0x20,
0xff, 0xd0, // 0x46: callq *%rax
const unsigned StubSize = IndirectStubsInfo::StubSize;
// 0x48: add rsp, 0x20 (Free shadow space)
0x48, 0x83, 0xC4, 0x20,
// Emit at least MinStubs, rounded up to fill the pages allocated.
unsigned PageSize = sys::Process::getPageSize();
unsigned NumPages = ((MinStubs * StubSize) + (PageSize - 1)) / PageSize;
unsigned NumStubs = (NumPages * PageSize) / StubSize;
0x48, 0x89, 0x45, 0x08, // 0x4C: movq %rax, 8(%rbp)
0x48, 0x0f, 0xae, 0x0c, 0x24, // 0x50: fxrstor64 (%rsp)
0x48, 0x81, 0xc4, 0x08, 0x02, 0x00, 0x00, // 0x55: addq 0x208, %rsp
0x41, 0x5f, // 0x5C: popq %r15
0x41, 0x5e, // 0x5E: popq %r14
0x41, 0x5d, // 0x60: popq %r13
0x41, 0x5c, // 0x62: popq %r12
0x41, 0x5b, // 0x64: popq %r11
0x41, 0x5a, // 0x66: popq %r10
0x41, 0x59, // 0x68: popq %r9
0x41, 0x58, // 0x6a: popq %r8
0x5f, // 0x6c: popq %rdi
0x5e, // 0x6d: popq %rsi
0x5a, // 0x6e: popq %rdx
0x59, // 0x6f: popq %rcx
0x5b, // 0x70: popq %rbx
0x58, // 0x71: popq %rax
0x5d, // 0x72: popq %rbp
0xc3, // 0x73: retq
};
// Allocate memory for stubs and pointers in one call.
std::error_code EC;
auto StubsMem = sys::OwningMemoryBlock(sys::Memory::allocateMappedMemory(
2 * NumPages * PageSize, nullptr,
sys::Memory::MF_READ | sys::Memory::MF_WRITE, EC));
if (EC)
return errorCodeToError(EC);
const unsigned ReentryFnAddrOffset = 0x3a;
const unsigned CallbackMgrAddrOffset = 0x28;
// Create separate MemoryBlocks representing the stubs and pointers.
sys::MemoryBlock StubsBlock(StubsMem.base(), NumPages * PageSize);
sys::MemoryBlock PtrsBlock(static_cast<char *>(StubsMem.base()) +
NumPages * PageSize,
NumPages * PageSize);
// Populate the stubs page stubs and mark it executable.
uint64_t *Stub = reinterpret_cast<uint64_t *>(StubsBlock.base());
uint64_t PtrOffsetField = static_cast<uint64_t>(NumPages * PageSize - 6)
<< 16;
for (unsigned I = 0; I < NumStubs; ++I)
Stub[I] = 0xF1C40000000025ff | PtrOffsetField;
if (auto EC = sys::Memory::protectMappedMemory(
StubsBlock, sys::Memory::MF_READ | sys::Memory::MF_EXEC))
return errorCodeToError(EC);
// Initialize all pointers to point at FailureAddress.
void **Ptr = reinterpret_cast<void **>(PtrsBlock.base());
for (unsigned I = 0; I < NumStubs; ++I)
Ptr[I] = InitialPtrVal;
StubsInfo = IndirectStubsInfo(NumStubs, std::move(StubsMem));
return Error::success();
memcpy(ResolverMem, ResolverCode, sizeof(ResolverCode));
memcpy(ResolverMem + ReentryFnAddrOffset, &ReentryFn, sizeof(ReentryFn));
memcpy(ResolverMem + CallbackMgrAddrOffset, &CallbackMgr,
sizeof(CallbackMgr));
}
void OrcI386::writeResolverCode(uint8_t *ResolverMem, JITReentryFn ReentryFn,

View File

@ -9,7 +9,7 @@
#include "OrcCBindingsStack.h"
#include "llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h"
#include "llvm/ExecutionEngine/Orc/OrcABISupport.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/DynamicLibrary.h"
#include <cstdio>
@ -29,8 +29,13 @@ OrcCBindingsStack::createCompileCallbackMgr(Triple T) {
};
case Triple::x86_64: {
typedef orc::LocalJITCompileCallbackManager<orc::OrcX86_64> CCMgrT;
if ( T.getOS() == Triple::OSType::Win32 ) {
typedef orc::LocalJITCompileCallbackManager<orc::OrcX86_64_Win32> CCMgrT;
return llvm::make_unique<CCMgrT>(0);
} else {
typedef orc::LocalJITCompileCallbackManager<orc::OrcX86_64_SysV> CCMgrT;
return llvm::make_unique<CCMgrT>(0);
}
}
}
}
@ -47,9 +52,16 @@ OrcCBindingsStack::createIndirectStubsMgrBuilder(Triple T) {
};
case Triple::x86_64:
return []() {
if (T.getOS() == Triple::OSType::Win32) {
return [](){
return llvm::make_unique<
orc::LocalIndirectStubsManager<orc::OrcX86_64>>();
orc::LocalIndirectStubsManager<orc::OrcX86_64_Win32>>();
};
} else {
return [](){
return llvm::make_unique<
orc::LocalIndirectStubsManager<orc::OrcX86_64_SysV>>();
};
}
}
}

View File

@ -1,4 +1,4 @@
#include "llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h"
#include "llvm/ExecutionEngine/Orc/OrcABISupport.h"
#include "llvm/ExecutionEngine/Orc/OrcRemoteTargetServer.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/DynamicLibrary.h"
@ -12,7 +12,7 @@ using namespace llvm::orc;
using namespace llvm::sys;
#ifdef __x86_64__
typedef OrcX86_64 HostOrcArch;
typedef OrcX86_64_SysV HostOrcArch;
#else
typedef OrcGenericArchitecture HostOrcArch;
#endif

View File

@ -8,7 +8,7 @@
//===----------------------------------------------------------------------===//
#include "OrcLazyJIT.h"
#include "llvm/ExecutionEngine/Orc/OrcArchitectureSupport.h"
#include "llvm/ExecutionEngine/Orc/OrcABISupport.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/DynamicLibrary.h"
#include <cstdio>
@ -57,8 +57,13 @@ OrcLazyJIT::createCompileCallbackMgr(Triple T) {
}
case Triple::x86_64: {
typedef orc::LocalJITCompileCallbackManager<orc::OrcX86_64> CCMgrT;
if ( T.getOS() == Triple::OSType::Win32 ) {
typedef orc::LocalJITCompileCallbackManager<orc::OrcX86_64_Win32> CCMgrT;
return llvm::make_unique<CCMgrT>(0);
} else {
typedef orc::LocalJITCompileCallbackManager<orc::OrcX86_64_SysV> CCMgrT;
return llvm::make_unique<CCMgrT>(0);
}
}
}
}
@ -75,10 +80,17 @@ OrcLazyJIT::createIndirectStubsMgrBuilder(Triple T) {
};
case Triple::x86_64:
if (T.getOS() == Triple::OSType::Win32) {
return [](){
return llvm::make_unique<
orc::LocalIndirectStubsManager<orc::OrcX86_64>>();
orc::LocalIndirectStubsManager<orc::OrcX86_64_Win32>>();
};
} else {
return [](){
return llvm::make_unique<
orc::LocalIndirectStubsManager<orc::OrcX86_64_SysV>>();
};
}
}
}
@ -192,3 +204,4 @@ int llvm::runOrcLazyJIT(std::unique_ptr<Module> M, int ArgC, char* ArgV[]) {
auto Main = fromTargetAddress<MainFnPtr>(MainSym.getAddress());
return Main(ArgC, ArgV);
}