Merge pull request #3868 from bylaws/arm64ec-oldnew

ARM64EC frontend
This commit is contained in:
Ryan Houdek 2024-07-15 09:54:56 -07:00 committed by GitHub
commit 6df51a57b3
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
11 changed files with 929 additions and 4 deletions

View File

@ -1,5 +1,5 @@
cmake_minimum_required(VERSION 3.14)
project(FEX)
project(FEX C CXX ASM)
INCLUDE (CheckIncludeFiles)
CHECK_INCLUDE_FILES ("gdb/jit-reader.h" HAVE_GDB_JIT_READER_H)

2
External/jemalloc vendored

@ -1 +1 @@
Subproject commit 569545241370457e2d14b0458b0ae9261491ea83
Subproject commit f3149a8c3b6aaa523befdd09e34064fbad949768

View File

@ -0,0 +1,21 @@
// SPDX-License-Identifier: MIT
#pragma once
#include <windef.h>
#include <ntstatus.h>
#include <winternl.h>
extern "C" {
void STDMETHODCALLTYPE ProcessInit();
void STDMETHODCALLTYPE ProcessTerm();
NTSTATUS STDMETHODCALLTYPE ThreadInit();
NTSTATUS STDMETHODCALLTYPE ThreadTerm(HANDLE Thread);
NTSTATUS STDMETHODCALLTYPE ResetToConsistentState(EXCEPTION_POINTERS* Ptrs, ARM64_NT_CONTEXT* Context, BOOLEAN* Continue);
void STDMETHODCALLTYPE BTCpu64FlushInstructionCache(const void* Address, SIZE_T Size);
void STDMETHODCALLTYPE NotifyMemoryAlloc(void* Address, SIZE_T Size, ULONG Type, ULONG Prot);
void STDMETHODCALLTYPE NotifyMemoryFree(void* Address, SIZE_T Size, ULONG FreeType);
void STDMETHODCALLTYPE NotifyMemoryProtect(void* Address, SIZE_T Size, ULONG NewProt);
void STDMETHODCALLTYPE NotifyUnmapViewOfSection(void* Address);
BOOLEAN STDMETHODCALLTYPE BTCpu64IsProcessorFeaturePresent(UINT Feature);
void STDMETHODCALLTYPE UpdateProcessorInformation(SYSTEM_CPU_INFORMATION* Info);
}

View File

@ -0,0 +1,28 @@
add_library(arm64ecfex SHARED
Module.cpp
Module.S
libarm64ecfex.def
)
patch_library_wine(arm64ecfex)
target_include_directories(arm64ecfex PRIVATE
"${CMAKE_SOURCE_DIR}/Source/Windows/include/"
"${CMAKE_SOURCE_DIR}/Source/"
"${CMAKE_SOURCE_DIR}/Source/Windows/"
)
target_link_libraries(arm64ecfex
PRIVATE
FEXCore
FEXCore_Base
Common
CommonTools
CommonWindows
ntdll_ex
ntdll
)
install(TARGETS arm64ecfex
RUNTIME
DESTINATION lib
COMPONENT runtime)

View File

@ -0,0 +1,80 @@
.text
.balign 16
// __os_arm64x_x64_jump in ARM64EC docs
// Expects target code address in x9
.globl DispatchJump
DispatchJump:
str lr, [sp, #-8]! // Push return address to stack, this will be popped by the x86 RET instr.
b check_target_ec
// __os_arm64x_dispatch_ret in ARM64EC docs
// Expects target code address in lr
.globl RetToEntryThunk
RetToEntryThunk:
mov x9, lr
check_target_ec:
// Check if target is in fact x86 code
ldr x16, [x18, #0x60] // TEB->PEB
ldr x16, [x16, #0x368] // PEB->EcCodeBitMap
lsr x17, x9, #15
and x17, x17, #0x1fffffffffff8
ldr x16, [x16, x17]
lsr x17, x9, #12
lsr x16, x16, x17
tbnz x16, #0, ExitFunctionEC
b enter_jit
// __os_arm64x_dispatch_call_no_redirect in ARM64EC docs
// Expects target code address in x9, and to be called using a 'blr x16' instruction.
.globl ExitToX64
ExitToX64:
str lr, [sp, #-8]! // Push return address to stack, this will be popped by the x86 RET instr.
enter_jit:
ldr x17, [x18, #0x1788] // TEB->ChpeV2CpuAreaInfo
ldr x16, [x17, #0x40] // ChpeV2CpuAreaInfo->EmulatorData[2] - DispatcherLoopTopEnterEC
br x16 // DispatcherLoopTopEnterEC(RIP:x9, CPUArea:x17)
// Invoked by KiUserEmulationDispatcher after e.g. an NtContinue to x86 code
// Expects a CONTEXT pointer in x0
.global BeginSimulation
BeginSimulation:
bl "#SyncThreadContext"
ldr x17, [x18, #0x1788] // TEB->ChpeV2CpuAreaInfo
ldr x16, [x17, #0x48] // ChpeV2CpuAreaInfo->EmulatorData[3] - DispatcherLoopTopEnterECFillSRA
br x16 // DispatcherLoopTopEnterECFillSRA(CPUArea:x17)
// Called into by FEXCore
// Expects the target code address in x9
.global ExitFunctionEC
ExitFunctionEC:
// Either return to an exit thunk (return to ARM64EC function) or call an entry thunk (call to ARM64EC function).
// It is assumed that a 'blr x16' instruction is only ever used to call into x86 code from an exit thunk, and that all
// exported ARM64EC functions have a 4-byte offset to their entry thunk immediately before their first instruction.
mov x17, x9
mov w16, #0x200
movk w16, #0xd63f, lsl 16 // blr x16
ldursw x23, [x17, #-0x4] // Load either the entry thunk offset or the calling instruction.
cmp w23, w16
beq ret_sp_aligned
and x23, x23, #-0x4
add x17, x17, x23 // Resolve entry thunk address.
mov x4, sp
tbz x4, #3, ret_sp_misaligned
ldr lr, [x4], #0x8 // Pop the return address into lr.
mov sp, x4
ret_sp_aligned:
br x17
ret_sp_misaligned:
// In the case of the x64 caller leaving sp only 8-byte aligned, leave the return address on the stack to keep 16-byte
// alignment and have the callee return to an x86 ret instruction. FEX can then return to the actual caller keeping
// the misaligned RSP.
adrp lr, X64ReturnInstr
ldr lr, [lr, #:lo12:X64ReturnInstr]
br x17

View File

@ -0,0 +1,630 @@
// SPDX-License-Identifier: MIT
/*
$info$
tags: Bin|ARM64EC
desc: Implements the ARM64EC BT module API using FEXCore
$end_info$
*/
#include <FEXCore/fextl/fmt.h>
#include <FEXCore/Core/X86Enums.h>
#include <FEXCore/Core/SignalDelegator.h>
#include <FEXCore/Core/Context.h>
#include <FEXCore/Core/CoreState.h>
#include <FEXCore/Debug/InternalThreadState.h>
#include <FEXCore/HLE/SyscallHandler.h>
#include <FEXCore/Config/Config.h>
#include <FEXCore/Utils/Allocator.h>
#include <FEXCore/Utils/LogManager.h>
#include <FEXCore/Utils/Threads.h>
#include <FEXCore/Utils/EnumOperators.h>
#include <FEXCore/Utils/EnumUtils.h>
#include <FEXCore/Utils/FPState.h>
#include <FEXCore/Utils/ArchHelpers/Arm64.h>
#include <FEXCore/Utils/MathUtils.h>
#include <FEXCore/Utils/TypeDefines.h>
#include "Common/Config.h"
#include "Common/InvalidationTracker.h"
#include "Common/TSOHandlerConfig.h"
#include "Common/CPUFeatures.h"
#include "DummyHandlers.h"
#include "BTInterface.h"
#include <cstdint>
#include <cstdio>
#include <type_traits>
#include <mutex>
#include <optional>
#include <unordered_map>
#include <utility>
#include <ntstatus.h>
#include <windef.h>
#include <winternl.h>
#include <winnt.h>
#include <wine/debug.h>
class ECSyscallHandler;
void* X64ReturnInstr; // See Module.S
extern void* ExitFunctionEC;
struct ThreadCPUArea {
static constexpr size_t TEBCPUAreaOffset = 0x1788;
CHPE_V2_CPU_AREA_INFO* Area;
explicit ThreadCPUArea(_TEB* TEB)
: Area(*reinterpret_cast<CHPE_V2_CPU_AREA_INFO**>(reinterpret_cast<uintptr_t>(TEB) + TEBCPUAreaOffset)) {}
uint64_t EmulatorStackLimit() const {
return Area->EmulatorStackLimit;
}
uint64_t EmulatorStackBase() const {
return Area->EmulatorStackBase;
}
ARM64EC_NT_CONTEXT& ContextAmd64() const {
return *Area->ContextAmd64;
}
FEXCore::Core::CpuStateFrame*& StateFrame() const {
return reinterpret_cast<FEXCore::Core::CpuStateFrame*&>(Area->EmulatorData[0]);
}
FEXCore::Core::InternalThreadState*& ThreadState() const {
return reinterpret_cast<FEXCore::Core::InternalThreadState*&>(Area->EmulatorData[1]);
}
uint64_t& DispatcherLoopTopEnterEC() const {
return reinterpret_cast<uint64_t&>(Area->EmulatorData[2]);
}
uint64_t& DispatcherLoopTopEnterECFillSRA() const {
return reinterpret_cast<uint64_t&>(Area->EmulatorData[3]);
}
};
namespace {
fextl::unique_ptr<FEXCore::Context::Context> CTX;
fextl::unique_ptr<FEX::DummyHandlers::DummySignalDelegator> SignalDelegator;
fextl::unique_ptr<ECSyscallHandler> SyscallHandler;
std::optional<FEX::Windows::InvalidationTracker> InvalidationTracker;
std::optional<FEX::Windows::CPUFeatures> CPUFeatures;
std::recursive_mutex ThreadCreationMutex;
// Map of TIDs to their FEX thread state, `ThreadCreationMutex` must be locked when accessing
std::unordered_map<DWORD, FEXCore::Core::InternalThreadState*> Threads;
std::pair<NTSTATUS, ThreadCPUArea> GetThreadCPUArea(HANDLE Thread) {
THREAD_BASIC_INFORMATION Info;
const NTSTATUS Err = NtQueryInformationThread(Thread, ThreadBasicInformation, &Info, sizeof(Info), nullptr);
return {Err, ThreadCPUArea(reinterpret_cast<_TEB*>(Info.TebBaseAddress))};
}
ThreadCPUArea GetCPUArea() {
return ThreadCPUArea(NtCurrentTeb());
}
bool IsEmulatorStackAddress(uint64_t Address) {
return Address <= GetCPUArea().EmulatorStackBase() && Address >= GetCPUArea().EmulatorStackLimit();
}
bool IsDispatcherAddress(uint64_t Address) {
const auto& Config = SignalDelegator->GetConfig();
return Address >= Config.DispatcherBegin && Address < Config.DispatcherEnd;
}
// GetProcAddress on ARM64EC returns a pointer to an x64 fast forward sequence to allow for redirecting to the JIT if functions are
// hotpatched. This looks up the procedure address of the native code even if the fast forward sequence has been patched.
uintptr_t GetRedirectedProcAddress(HMODULE Module, const char* ProcName) {
const uintptr_t Proc = reinterpret_cast<uintptr_t>(GetProcAddress(Module, ProcName));
if (!Proc) {
return 0;
}
ULONG Size;
const auto* LoadConfig =
reinterpret_cast<_IMAGE_LOAD_CONFIG_DIRECTORY64*>(RtlImageDirectoryEntryToData(Module, true, IMAGE_DIRECTORY_ENTRY_LOAD_CONFIG, &Size));
const auto* CHPEMetadata = reinterpret_cast<IMAGE_ARM64EC_METADATA*>(LoadConfig->CHPEMetadataPointer);
const uintptr_t ModuleBase = reinterpret_cast<uintptr_t>(Module);
const uintptr_t ProcRVA = Proc - ModuleBase;
const auto* RedirectionTableBegin = reinterpret_cast<IMAGE_ARM64EC_REDIRECTION_ENTRY*>(ModuleBase + CHPEMetadata->RedirectionMetadata);
const auto* RedirectionTableEnd = RedirectionTableBegin + CHPEMetadata->RedirectionMetadataCount;
const auto* It =
std::lower_bound(RedirectionTableBegin, RedirectionTableEnd, ProcRVA, [](const auto& Entry, uintptr_t RVA) { return Entry.Source < RVA; });
if (It->Source != ProcRVA) {
return 0;
}
return ModuleBase + It->Destination;
}
} // namespace
namespace Exception {
static std::optional<FEX::Windows::TSOHandlerConfig> HandlerConfig;
static uintptr_t KiUserExceptionDispatcher;
static EXCEPTION_RECORD HandleGuestException(const EXCEPTION_RECORD& Src, ARM64_NT_CONTEXT& Context) {
auto* Thread = GetCPUArea().ThreadState();
auto& Fault = Thread->CurrentFrame->SynchronousFaultData;
EXCEPTION_RECORD Dst = Src;
Dst.ExceptionAddress = reinterpret_cast<void*>(Context.Pc);
// Windows always clears TF, DF and AF when handling an exception, restoring after.
// TODO: Check windows behaviour for the restoring after, quite awkward to achieve with the BT API. Would need to fixup flags after a
// rethrow and keep track of context pointers on the stack so if a SEH handler changes flags they can be restored in BeginContext after
// the NtContinue syscall (which will convert to an ARM64 context and back, losing these flags).
uint32_t EFlags = CTX->ReconstructCompactedEFLAGS(Thread, true, Context.X, Context.Cpsr);
EFlags &= ~((1 << FEXCore::X86State::RFLAG_DF_RAW_LOC) | (1 << FEXCore::X86State::RFLAG_TF_LOC) | (1 << FEXCore::X86State::RFLAG_AF_RAW_LOC));
CTX->SetFlagsFromCompactedEFLAGS(Thread, EFlags);
if (!Fault.FaultToTopAndGeneratedException) {
return Dst;
}
Fault.FaultToTopAndGeneratedException = false;
Dst.ExceptionFlags = 0;
Dst.NumberParameters = 0;
switch (Fault.Signal) {
case FEXCore::Core::FAULT_SIGILL: Dst.ExceptionCode = EXCEPTION_ILLEGAL_INSTRUCTION; return Dst;
case FEXCore::Core::FAULT_SIGTRAP:
switch (Fault.TrapNo) {
case FEXCore::X86State::X86_TRAPNO_DB: Dst.ExceptionCode = EXCEPTION_SINGLE_STEP; return Dst;
case FEXCore::X86State::X86_TRAPNO_BP:
Context.Pc -= 1;
Dst.ExceptionAddress = reinterpret_cast<void*>(Context.Pc);
Dst.ExceptionCode = EXCEPTION_BREAKPOINT;
Dst.NumberParameters = 1;
Dst.ExceptionInformation[0] = 0;
return Dst;
default: LogMan::Msg::EFmt("Unknown SIGTRAP trap: {}", Fault.TrapNo); break;
}
break;
case FEXCore::Core::FAULT_SIGSEGV:
switch (Fault.TrapNo) {
case FEXCore::X86State::X86_TRAPNO_GP:
if ((Fault.err_code & 0b111) == 0b010) {
switch (Fault.err_code >> 3) {
case 0x2d:
Context.Pc += 2;
Dst.ExceptionCode = EXCEPTION_BREAKPOINT;
Dst.ExceptionAddress = reinterpret_cast<void*>(Context.Pc + 1);
Dst.NumberParameters = 1;
Dst.ExceptionInformation[0] = Context.X8; // RAX
// Note that ExceptionAddress doesn't equal the reported context RIP here, this discrepancy expected and not having it can trigger anti-debug logic.
return Dst;
default: LogMan::Msg::EFmt("Unknown interrupt: 0x{:X}", Fault.err_code >> 3); break;
}
} else {
Dst.ExceptionCode = EXCEPTION_PRIV_INSTRUCTION;
return Dst;
}
break;
case FEXCore::X86State::X86_TRAPNO_OF: Dst.ExceptionCode = EXCEPTION_INT_OVERFLOW; return Dst;
default: LogMan::Msg::EFmt("Unknown SIGSEGV trap: {}", Fault.TrapNo); break;
}
break;
default: LogMan::Msg::EFmt("Unknown signal type: {}", Fault.Signal); break;
}
// Default to SIGILL
Dst.ExceptionCode = EXCEPTION_ILLEGAL_INSTRUCTION;
return Dst;
}
static bool HandleUnalignedAccess(ARM64_NT_CONTEXT& Context) {
if (!CTX->IsAddressInCodeBuffer(GetCPUArea().ThreadState(), Context.Pc)) {
return false;
}
const auto Result = FEXCore::ArchHelpers::Arm64::HandleUnalignedAccess(GetCPUArea().ThreadState(),
HandlerConfig->GetUnalignedHandlerType(), Context.Pc, &Context.X0);
if (!Result.first) {
return false;
}
Context.Pc += Result.second;
return true;
}
static void LoadStateFromECContext(FEXCore::Core::InternalThreadState* Thread, CONTEXT& Context) {
auto& State = Thread->CurrentFrame->State;
// General register state
State.gregs[FEXCore::X86State::REG_RAX] = Context.Rax;
State.gregs[FEXCore::X86State::REG_RCX] = Context.Rcx;
State.gregs[FEXCore::X86State::REG_RDX] = Context.Rdx;
State.gregs[FEXCore::X86State::REG_RBX] = Context.Rbx;
State.gregs[FEXCore::X86State::REG_RSP] = Context.Rsp;
State.gregs[FEXCore::X86State::REG_RBP] = Context.Rbp;
State.gregs[FEXCore::X86State::REG_RSI] = Context.Rsi;
State.gregs[FEXCore::X86State::REG_RDI] = Context.Rdi;
State.gregs[FEXCore::X86State::REG_R8] = Context.R8;
State.gregs[FEXCore::X86State::REG_R9] = Context.R9;
State.gregs[FEXCore::X86State::REG_R10] = Context.R10;
State.gregs[FEXCore::X86State::REG_R11] = Context.R11;
State.gregs[FEXCore::X86State::REG_R12] = Context.R12;
State.gregs[FEXCore::X86State::REG_R13] = Context.R13;
State.gregs[FEXCore::X86State::REG_R14] = Context.R14;
State.gregs[FEXCore::X86State::REG_R15] = Context.R15;
State.rip = Context.Rip;
CTX->SetFlagsFromCompactedEFLAGS(Thread, Context.EFlags);
State.es_idx = Context.SegEs & 0xffff;
State.cs_idx = Context.SegCs & 0xffff;
State.ss_idx = Context.SegSs & 0xffff;
State.ds_idx = Context.SegDs & 0xffff;
State.fs_idx = Context.SegFs & 0xffff;
State.gs_idx = Context.SegGs & 0xffff;
// The TEB is the only populated GDT entry by default
const auto TEB = reinterpret_cast<uint64_t>(NtCurrentTeb());
State.gdt[(Context.SegGs & 0xffff) >> 3].base = TEB;
State.gs_cached = TEB;
State.fs_cached = 0;
State.es_cached = 0;
State.cs_cached = 0;
State.ss_cached = 0;
State.ds_cached = 0;
// Floating-point register state
CTX->SetXMMRegistersFromState(Thread, reinterpret_cast<const __uint128_t*>(Context.FltSave.XmmRegisters), nullptr);
memcpy(State.mm, Context.FltSave.FloatRegisters, sizeof(State.mm));
State.FCW = Context.FltSave.ControlWord;
State.flags[FEXCore::X86State::X87FLAG_C0_LOC] = (Context.FltSave.StatusWord >> 8) & 1;
State.flags[FEXCore::X86State::X87FLAG_C1_LOC] = (Context.FltSave.StatusWord >> 9) & 1;
State.flags[FEXCore::X86State::X87FLAG_C2_LOC] = (Context.FltSave.StatusWord >> 10) & 1;
State.flags[FEXCore::X86State::X87FLAG_C3_LOC] = (Context.FltSave.StatusWord >> 14) & 1;
State.flags[FEXCore::X86State::X87FLAG_TOP_LOC] = (Context.FltSave.StatusWord >> 11) & 0b111;
State.AbridgedFTW = Context.FltSave.TagWord;
}
static void ReconstructThreadState(ARM64_NT_CONTEXT& Context) {
const auto& Config = SignalDelegator->GetConfig();
auto* Thread = GetCPUArea().ThreadState();
auto& State = Thread->CurrentFrame->State;
State.rip = CTX->RestoreRIPFromHostPC(Thread, Context.Pc);
// Spill all SRA GPRs
for (size_t i = 0; i < Config.SRAGPRCount; i++) {
State.gregs[i] = Context.X[Config.SRAGPRMapping[i]];
}
// Spill all SRA FPRs
for (size_t i = 0; i < Config.SRAFPRCount; i++) {
memcpy(State.xmm.sse.data[i], &Context.V[Config.SRAFPRMapping[i]], sizeof(__uint128_t));
}
}
// Reconstructs an x64 context from the input context within the JIT, packed into a regular ARM64 context following the ARM64EC register mapping
static ARM64_NT_CONTEXT ReconstructPackedECContext(ARM64_NT_CONTEXT& Context) {
ReconstructThreadState(Context);
ARM64_NT_CONTEXT ECContext {};
ECContext.ContextFlags = CONTEXT_ARM64_CONTROL | CONTEXT_ARM64_INTEGER | CONTEXT_ARM64_FLOATING_POINT;
auto* Thread = GetCPUArea().ThreadState();
auto& State = Thread->CurrentFrame->State;
ECContext.X8 = State.gregs[FEXCore::X86State::REG_RAX];
ECContext.X0 = State.gregs[FEXCore::X86State::REG_RCX];
ECContext.X1 = State.gregs[FEXCore::X86State::REG_RDX];
ECContext.X27 = State.gregs[FEXCore::X86State::REG_RBX];
ECContext.Sp = State.gregs[FEXCore::X86State::REG_RSP];
ECContext.Fp = State.gregs[FEXCore::X86State::REG_RBP];
ECContext.X25 = State.gregs[FEXCore::X86State::REG_RSI];
ECContext.X26 = State.gregs[FEXCore::X86State::REG_RDI];
ECContext.X2 = State.gregs[FEXCore::X86State::REG_R8];
ECContext.X3 = State.gregs[FEXCore::X86State::REG_R9];
ECContext.X4 = State.gregs[FEXCore::X86State::REG_R10];
ECContext.X5 = State.gregs[FEXCore::X86State::REG_R11];
ECContext.X19 = State.gregs[FEXCore::X86State::REG_R12];
ECContext.X20 = State.gregs[FEXCore::X86State::REG_R13];
ECContext.X21 = State.gregs[FEXCore::X86State::REG_R14];
ECContext.X22 = State.gregs[FEXCore::X86State::REG_R15];
ECContext.Pc = State.rip;
CTX->ReconstructXMMRegisters(Thread, reinterpret_cast<__uint128_t*>(&ECContext.V[0]), nullptr);
ECContext.Lr = State.mm[0][0];
ECContext.X6 = State.mm[1][0];
ECContext.X7 = State.mm[2][0];
ECContext.X9 = State.mm[3][0];
ECContext.X16 = (State.mm[3][1] & 0xffff) << 48 | (State.mm[2][1] & 0xffff) << 32 | (State.mm[1][1] & 0xffff) << 16 | (State.mm[0][1] & 0xffff);
ECContext.X10 = State.mm[4][0];
ECContext.X11 = State.mm[5][0];
ECContext.X12 = State.mm[6][0];
ECContext.X15 = State.mm[7][0];
ECContext.X17 = (State.mm[7][1] & 0xffff) << 48 | (State.mm[6][1] & 0xffff) << 32 | (State.mm[5][1] & 0xffff) << 16 | (State.mm[4][1] & 0xffff);
// Zero all disallowed registers
ECContext.X13 = 0;
ECContext.X14 = 0;
ECContext.X18 = 0;
ECContext.X23 = 0;
ECContext.X24 = 0;
ECContext.X28 = 0;
// NZCV will be converted into EFlags by ntdll, the rest are lost during exception handling.
// See HandleGuestException
ECContext.Cpsr = Context.Cpsr;
ECContext.Fpcr = Context.Fpcr;
ECContext.Fpsr = Context.Fpsr;
return ECContext;
}
static void RethrowGuestException(const EXCEPTION_RECORD& Rec, ARM64_NT_CONTEXT& Context) {
const auto& Config = SignalDelegator->GetConfig();
uint64_t GuestSp = Context.X[Config.SRAGPRMapping[static_cast<size_t>(FEXCore::X86State::REG_RSP)]];
struct DispatchArgs {
ARM64_NT_CONTEXT Context;
EXCEPTION_RECORD Rec;
uint64_t Align;
uint64_t Redzone[2];
}* Args = reinterpret_cast<DispatchArgs*>(FEXCore::AlignDown(GuestSp, 64)) - 1;
LogMan::Msg::DFmt("Reconstructing context");
Args->Context = ReconstructPackedECContext(Context);
LogMan::Msg::DFmt("pc: {:X} rip: {:X}", Context.Pc, Args->Context.Pc);
Args->Rec = HandleGuestException(Rec, Args->Context);
Context.Sp = reinterpret_cast<uint64_t>(Args);
Context.Pc = KiUserExceptionDispatcher;
}
} // namespace Exception
namespace Logging {
static void MsgHandler(LogMan::DebugLevels Level, const char* Message) {
const auto Output = fextl::fmt::format("[{}][{:X}] {}\n", LogMan::DebugLevelStr(Level), GetCurrentThreadId(), Message);
__wine_dbg_output(Output.c_str());
}
static void AssertHandler(const char* Message) {
const auto Output = fextl::fmt::format("[ASSERT] {}\n", Message);
__wine_dbg_output(Output.c_str());
}
static void Init() {
LogMan::Throw::InstallHandler(AssertHandler);
LogMan::Msg::InstallHandler(MsgHandler);
}
} // namespace Logging
class ECSyscallHandler : public FEXCore::HLE::SyscallHandler, public FEXCore::Allocator::FEXAllocOperators {
public:
ECSyscallHandler() {
OSABI = FEXCore::HLE::SyscallOSABI::OS_WIN32;
}
uint64_t HandleSyscall(FEXCore::Core::CpuStateFrame* Frame, FEXCore::HLE::SyscallArguments* Args) override {
return 0;
}
FEXCore::HLE::SyscallABI GetSyscallABI(uint64_t Syscall) override {
return {.NumArgs = 0, .HasReturn = false, .HostSyscallNumber = -1};
}
FEXCore::HLE::AOTIRCacheEntryLookupResult LookupAOTIRCacheEntry(FEXCore::Core::InternalThreadState* Thread, uint64_t GuestAddr) override {
return {0, 0};
}
void MarkGuestExecutableRange(FEXCore::Core::InternalThreadState* Thread, uint64_t Start, uint64_t Length) override {
InvalidationTracker->ReprotectRWXIntervals(Start, Length);
}
};
extern "C" void SyncThreadContext(CONTEXT* Context) {
auto* Thread = GetCPUArea().ThreadState();
Exception::LoadStateFromECContext(Thread, *Context);
}
void ProcessInit() {
Logging::Init();
FEX::Config::InitializeConfigs();
FEXCore::Config::Initialize();
FEXCore::Config::AddLayer(FEX::Config::CreateGlobalMainLayer());
FEXCore::Config::AddLayer(FEX::Config::CreateMainLayer());
FEXCore::Config::Load();
FEXCore::Config::ReloadMetaLayer();
FEXCore::Config::EraseSet(FEXCore::Config::CONFIG_IS64BIT_MODE, "1");
// Not applicable to Windows
FEXCore::Config::EraseSet(FEXCore::Config::ConfigOption::CONFIG_TSOAUTOMIGRATION, "0");
FEXCore::Context::InitializeStaticTables(FEXCore::Context::MODE_64BIT);
SignalDelegator = fextl::make_unique<FEX::DummyHandlers::DummySignalDelegator>();
SyscallHandler = fextl::make_unique<ECSyscallHandler>();
Exception::HandlerConfig.emplace();
CTX = FEXCore::Context::Context::CreateNewContext();
CTX->SetSignalDelegator(SignalDelegator.get());
CTX->SetSyscallHandler(SyscallHandler.get());
CTX->InitCore();
InvalidationTracker.emplace(*CTX, Threads);
CPUFeatures.emplace(*CTX);
X64ReturnInstr = ::VirtualAlloc(nullptr, FEXCore::Utils::FEX_PAGE_SIZE, MEM_COMMIT, PAGE_EXECUTE_READWRITE);
*reinterpret_cast<uint8_t*>(X64ReturnInstr) = 0xc3;
Exception::KiUserExceptionDispatcher = GetRedirectedProcAddress(GetModuleHandle("ntdll.dll"), "KiUserExceptionDispatcher");
}
void ProcessTerm() {}
class ScopedCallbackDisable {
private:
bool Prev;
public:
ScopedCallbackDisable() {
Prev = GetCPUArea().Area->InSyscallCallback;
GetCPUArea().Area->InSyscallCallback = true;
}
~ScopedCallbackDisable() {
GetCPUArea().Area->InSyscallCallback = Prev;
}
};
NTSTATUS ResetToConsistentState(EXCEPTION_POINTERS* Ptrs, ARM64_NT_CONTEXT* Context, BOOLEAN* Continue) {
ScopedCallbackDisable Guard;
const auto* Exception = Ptrs->ExceptionRecord;
if (Exception->ExceptionCode == EXCEPTION_DATATYPE_MISALIGNMENT && Exception::HandleUnalignedAccess(*Context)) {
LogMan::Msg::DFmt("Handled unaligned atomic: new pc: {:X}", Context->Pc);
*Continue = true;
return STATUS_SUCCESS;
}
if (Exception->ExceptionCode == EXCEPTION_ACCESS_VIOLATION) {
const auto FaultAddress = static_cast<uint64_t>(Exception->ExceptionInformation[1]);
bool HandledRWX = false;
if (InvalidationTracker && GetCPUArea().ThreadState()) {
std::scoped_lock Lock(ThreadCreationMutex);
HandledRWX = InvalidationTracker->HandleRWXAccessViolation(FaultAddress);
}
if (HandledRWX) {
LogMan::Msg::DFmt("Handled self-modifying code: pc: {:X} fault: {:X}", Context->Pc, FaultAddress);
*Continue = true;
return STATUS_SUCCESS;
}
}
if (!CTX->IsAddressInCodeBuffer(GetCPUArea().ThreadState(), Context->Pc) && !IsDispatcherAddress(Context->Pc)) {
return STATUS_SUCCESS;
}
if (IsEmulatorStackAddress(reinterpret_cast<uint64_t>(__builtin_frame_address(0)))) {
Exception::RethrowGuestException(*Exception, *Context);
LogMan::Msg::DFmt("Rethrowing onto guest stack: {:X}", Context->Sp);
*Continue = true;
return STATUS_SUCCESS;
} else {
LogMan::Msg::EFmt("Unexpected exception in JIT code on guest stack");
return STATUS_SUCCESS;
}
}
void NotifyMemoryAlloc(void* Address, SIZE_T Size, ULONG Type, ULONG Prot) {
if (!InvalidationTracker || !GetCPUArea().ThreadState()) {
return;
}
std::scoped_lock Lock(ThreadCreationMutex);
InvalidationTracker->HandleMemoryProtectionNotification(reinterpret_cast<uint64_t>(Address), static_cast<uint64_t>(Size), Prot);
}
void NotifyMemoryFree(void* Address, SIZE_T Size, ULONG FreeType) {
if (!InvalidationTracker || !GetCPUArea().ThreadState()) {
return;
}
std::scoped_lock Lock(ThreadCreationMutex);
if (!Size) {
InvalidationTracker->InvalidateContainingSection(reinterpret_cast<uint64_t>(Address), true);
} else if (FreeType & MEM_DECOMMIT) {
InvalidationTracker->InvalidateAlignedInterval(reinterpret_cast<uint64_t>(Address), static_cast<uint64_t>(Size), true);
}
}
void NotifyMemoryProtect(void* Address, SIZE_T Size, ULONG NewProt) {
if (!InvalidationTracker || !GetCPUArea().ThreadState()) {
return;
}
std::scoped_lock Lock(ThreadCreationMutex);
InvalidationTracker->HandleMemoryProtectionNotification(reinterpret_cast<uint64_t>(Address), static_cast<uint64_t>(Size), NewProt);
}
void NotifyUnmapViewOfSection(void* Address) {
if (!InvalidationTracker || !GetCPUArea().ThreadState()) {
return;
}
std::scoped_lock Lock(ThreadCreationMutex);
InvalidationTracker->InvalidateContainingSection(reinterpret_cast<uint64_t>(Address), true);
}
void BTCpu64FlushInstructionCache(const void* Address, SIZE_T Size) {
if (!InvalidationTracker || !GetCPUArea().ThreadState()) {
return;
}
std::scoped_lock Lock(ThreadCreationMutex);
InvalidationTracker->InvalidateAlignedInterval(reinterpret_cast<uint64_t>(Address), static_cast<uint64_t>(Size), false);
}
NTSTATUS ThreadInit() {
const auto CPUArea = GetCPUArea();
auto* Thread = CTX->CreateThread(0, 0);
Thread->CurrentFrame->Pointers.Common.ExitFunctionEC = reinterpret_cast<uintptr_t>(&ExitFunctionEC);
CPUArea.StateFrame() = Thread->CurrentFrame;
uint64_t EnterEC = Thread->CurrentFrame->Pointers.Common.DispatcherLoopTopEnterEC;
CPUArea.DispatcherLoopTopEnterEC() = EnterEC;
uint64_t EnterECFillSRA = Thread->CurrentFrame->Pointers.Common.DispatcherLoopTopEnterECFillSRA;
CPUArea.DispatcherLoopTopEnterECFillSRA() = EnterECFillSRA;
CPUArea.ContextAmd64() = {.ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_FLOATING_POINT,
.AMD64_SegCs = 0x33,
.AMD64_SegDs = 0x2b,
.AMD64_SegEs = 0x2b,
.AMD64_SegFs = 0x53,
.AMD64_SegGs = 0x2b,
.AMD64_SegSs = 0x2b,
.AMD64_EFlags = 0x202,
.AMD64_MxCsr = 0x1f80,
.AMD64_MxCsr_copy = 0x1f80,
.AMD64_ControlWord = 0x27f};
Exception::LoadStateFromECContext(Thread, CPUArea.ContextAmd64().AMD64_Context);
{
std::scoped_lock Lock(ThreadCreationMutex);
Threads.emplace(GetCurrentThreadId(), Thread);
}
CPUArea.ThreadState() = Thread;
return STATUS_SUCCESS;
}
NTSTATUS ThreadTerm(HANDLE Thread) {
const auto [Err, CPUArea] = GetThreadCPUArea(Thread);
if (Err) {
return Err;
}
auto* OldThreadState = CPUArea.ThreadState();
CPUArea.ThreadState() = nullptr;
{
THREAD_BASIC_INFORMATION Info;
if (NTSTATUS Err = NtQueryInformationThread(Thread, ThreadBasicInformation, &Info, sizeof(Info), nullptr); Err) {
return Err;
}
const auto ThreadTID = reinterpret_cast<uint64_t>(Info.ClientId.UniqueThread);
std::scoped_lock Lock(ThreadCreationMutex);
Threads.erase(ThreadTID);
}
CTX->DestroyThread(OldThreadState);
return STATUS_SUCCESS;
}
BOOLEAN BTCpu64IsProcessorFeaturePresent(UINT Feature) {
return CPUFeatures->IsFeaturePresent(Feature) ? TRUE : FALSE;
}
void UpdateProcessorInformation(SYSTEM_CPU_INFORMATION* Info) {
CPUFeatures->UpdateInformation(Info);
}

View File

@ -0,0 +1,21 @@
LIBRARY libarm64ecfex.dll
EXPORTS
BTCpu64FlushInstructionCache
BTCpu64IsProcessorFeaturePresent
DispatchJump DATA
RetToEntryThunk DATA
ExitToX64 DATA
BeginSimulation DATA
; FlushInstructionCacheHeavy
; NotifyMapViewOfSection
NotifyMemoryAlloc
NotifyMemoryFree
NotifyMemoryProtect
NotifyUnmapViewOfSection
ProcessInit
ProcessTerm
ResetToConsistentState
ThreadInit
ThreadTerm
UpdateProcessorInformation

View File

@ -24,6 +24,8 @@ build_implib(wow64)
add_subdirectory(Common)
if (_M_ARM_64 AND (NOT _M_ARM_64EC))
if (_M_ARM_64EC)
add_subdirectory(ARM64EC)
elseif (_M_ARM_64)
add_subdirectory(WOW64)
endif()

View File

@ -23,5 +23,5 @@ target_link_libraries(wow64fex
install(TARGETS wow64fex
RUNTIME
DESTINATION bin
DESTINATION lib
COMPONENT runtime)

View File

@ -0,0 +1,128 @@
// SPDX-License-Identifier: LGPL-2.1-or-later
// SPDX-FileCopyrightText: Copyright (C) the Wine project
#pragma once
#include_next <winnt.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef struct _IMAGE_LOAD_CONFIG_CODE_INTEGRITY {
WORD Flags;
WORD Catalog;
DWORD CatalogOffset;
DWORD Reserved;
} IMAGE_LOAD_CONFIG_CODE_INTEGRITY, *PIMAGE_LOAD_CONFIG_CODE_INTEGRITY;
typedef struct __IMAGE_LOAD_CONFIG_DIRECTORY64 {
DWORD Size; /* 000 */
DWORD TimeDateStamp;
WORD MajorVersion;
WORD MinorVersion;
DWORD GlobalFlagsClear;
DWORD GlobalFlagsSet; /* 010 */
DWORD CriticalSectionDefaultTimeout;
ULONGLONG DeCommitFreeBlockThreshold;
ULONGLONG DeCommitTotalFreeThreshold; /* 020 */
ULONGLONG LockPrefixTable;
ULONGLONG MaximumAllocationSize; /* 030 */
ULONGLONG VirtualMemoryThreshold;
ULONGLONG ProcessAffinityMask; /* 040 */
DWORD ProcessHeapFlags;
WORD CSDVersion;
WORD DependentLoadFlags;
ULONGLONG EditList; /* 050 */
ULONGLONG SecurityCookie;
ULONGLONG SEHandlerTable; /* 060 */
ULONGLONG SEHandlerCount;
ULONGLONG GuardCFCheckFunctionPointer; /* 070 */
ULONGLONG GuardCFDispatchFunctionPointer;
ULONGLONG GuardCFFunctionTable; /* 080 */
ULONGLONG GuardCFFunctionCount;
DWORD GuardFlags; /* 090 */
IMAGE_LOAD_CONFIG_CODE_INTEGRITY CodeIntegrity;
ULONGLONG GuardAddressTakenIatEntryTable; /* 0a0 */
ULONGLONG GuardAddressTakenIatEntryCount;
ULONGLONG GuardLongJumpTargetTable; /* 0b0 */
ULONGLONG GuardLongJumpTargetCount;
ULONGLONG DynamicValueRelocTable; /* 0c0 */
ULONGLONG CHPEMetadataPointer;
ULONGLONG GuardRFFailureRoutine; /* 0d0 */
ULONGLONG GuardRFFailureRoutineFunctionPointer;
DWORD DynamicValueRelocTableOffset; /* 0e0 */
WORD DynamicValueRelocTableSection;
WORD Reserved2;
ULONGLONG GuardRFVerifyStackPointerFunctionPointer;
DWORD HotPatchTableOffset; /* 0f0 */
DWORD Reserved3;
ULONGLONG EnclaveConfigurationPointer;
ULONGLONG VolatileMetadataPointer; /* 100 */
ULONGLONG GuardEHContinuationTable;
ULONGLONG GuardEHContinuationCount; /* 110 */
ULONGLONG GuardXFGCheckFunctionPointer;
ULONGLONG GuardXFGDispatchFunctionPointer; /* 120 */
ULONGLONG GuardXFGTableDispatchFunctionPointer;
ULONGLONG CastGuardOsDeterminedFailureMode; /* 130 */
ULONGLONG GuardMemcpyFunctionPointer;
} _IMAGE_LOAD_CONFIG_DIRECTORY64, *_PIMAGE_LOAD_CONFIG_DIRECTORY64;
typedef struct _IMAGE_CHPE_RANGE_ENTRY {
union {
ULONG StartOffset;
struct {
ULONG NativeCode : 1;
ULONG AddressBits : 31;
} DUMMYSTRUCTNAME;
} DUMMYUNIONNAME;
ULONG Length;
} IMAGE_CHPE_RANGE_ENTRY, *PIMAGE_CHPE_RANGE_ENTRY;
typedef struct _IMAGE_ARM64EC_METADATA {
ULONG Version;
ULONG CodeMap;
ULONG CodeMapCount;
ULONG CodeRangesToEntryPoints;
ULONG RedirectionMetadata;
ULONG __os_arm64x_dispatch_call_no_redirect;
ULONG __os_arm64x_dispatch_ret;
ULONG __os_arm64x_dispatch_call;
ULONG __os_arm64x_dispatch_icall;
ULONG __os_arm64x_dispatch_icall_cfg;
ULONG AlternateEntryPoint;
ULONG AuxiliaryIAT;
ULONG CodeRangesToEntryPointsCount;
ULONG RedirectionMetadataCount;
ULONG GetX64InformationFunctionPointer;
ULONG SetX64InformationFunctionPointer;
ULONG ExtraRFETable;
ULONG ExtraRFETableSize;
ULONG __os_arm64x_dispatch_fptr;
ULONG AuxiliaryIATCopy;
ULONG __os_arm64x_helper0;
ULONG __os_arm64x_helper1;
ULONG __os_arm64x_helper2;
ULONG __os_arm64x_helper3;
ULONG __os_arm64x_helper4;
ULONG __os_arm64x_helper5;
ULONG __os_arm64x_helper6;
ULONG __os_arm64x_helper7;
ULONG __os_arm64x_helper8;
} IMAGE_ARM64EC_METADATA;
typedef struct _IMAGE_ARM64EC_REDIRECTION_ENTRY {
ULONG Source;
ULONG Destination;
} IMAGE_ARM64EC_REDIRECTION_ENTRY;
typedef struct _IMAGE_ARM64EC_CODE_RANGE_ENTRY_POINT {
ULONG StartRva;
ULONG EndRva;
ULONG EntryPoint;
} IMAGE_ARM64EC_CODE_RANGE_ENTRY_POINT;
#ifdef __cplusplus
}
#endif

View File

@ -13,6 +13,20 @@ extern "C" {
#define WOW64_TLS_MAX_NUMBER 19
#ifdef _M_ARM_64EC
typedef struct _CHPE_V2_CPU_AREA_INFO {
BOOLEAN InSimulation; /* 000 */
BOOLEAN InSyscallCallback; /* 001 */
ULONG64 EmulatorStackBase; /* 008 */
ULONG64 EmulatorStackLimit; /* 010 */
ARM64EC_NT_CONTEXT* ContextAmd64; /* 018 */
ULONG* SuspendDoorbell; /* 020 */
ULONG64 LoadingModuleModflag; /* 028 */
void* EmulatorData[4]; /* 030 */
ULONG64 EmulatorDataInline; /* 050 */
} CHPE_V2_CPU_AREA_INFO, *PCHPE_V2_CPU_AREA_INFO;
#endif
typedef struct _THREAD_BASIC_INFORMATION {
NTSTATUS ExitStatus;
PVOID TebBaseAddress;
@ -94,6 +108,7 @@ void WINAPI Wow64ProcessPendingCrossProcessItems(void);
NTSTATUS WINAPI RtlWow64SetThreadContext(HANDLE, const WOW64_CONTEXT*);
NTSTATUS WINAPI RtlWow64GetThreadContext(HANDLE, WOW64_CONTEXT*);
NTSTATUS WINAPI RtlWow64GetCurrentCpuArea(USHORT*, void**, void**);
NTSYSAPI PVOID WINAPI RtlImageDirectoryEntryToData(HMODULE, BOOL, WORD, ULONG*);
NTSTATUS WINAPI NtSuspendThread(HANDLE, PULONG);
NTSTATUS WINAPI NtGetContextThread(HANDLE, CONTEXT*);