From 3b189f6d7d242d24349eef2647fc8c37b144f5e7 Mon Sep 17 00:00:00 2001 From: Billy Laws Date: Wed, 26 Jun 2024 13:03:57 +0000 Subject: [PATCH 01/13] WOW64: Install into lib This convention is used by most other projects. --- Source/Windows/WOW64/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Source/Windows/WOW64/CMakeLists.txt b/Source/Windows/WOW64/CMakeLists.txt index ace1a3309..0732a2aa2 100644 --- a/Source/Windows/WOW64/CMakeLists.txt +++ b/Source/Windows/WOW64/CMakeLists.txt @@ -23,5 +23,5 @@ target_link_libraries(wow64fex install(TARGETS wow64fex RUNTIME - DESTINATION bin + DESTINATION lib COMPONENT runtime) From 549e06aade964d2d2ebf63046c5334ea041cd519 Mon Sep 17 00:00:00 2001 From: Billy Laws Date: Sat, 6 Jan 2024 23:08:22 +0000 Subject: [PATCH 02/13] CMake: Enable assembly source file support --- CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index a26f709a0..ccce9f1e6 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.14) -project(FEX) +project(FEX C CXX ASM) INCLUDE (CheckIncludeFiles) CHECK_INCLUDE_FILES ("gdb/jit-reader.h" HAVE_GDB_JIT_READER_H) From 5dc85307a6e05e7e8f98f75fa5e8521506d4cee5 Mon Sep 17 00:00:00 2001 From: Billy Laws Date: Sun, 7 Jul 2024 22:43:09 +0100 Subject: [PATCH 03/13] Windows: Introduce an initial ARM64EC frontend This allows for running x64 applications under wine without having to run all of wine under FEX. The JIT is invoked when ARM64EC code performs an indirect branch to x64 code, and left whenever the x64 code calls into ARM64EC code. --- Source/Windows/ARM64EC/BTInterface.h | 21 ++ Source/Windows/ARM64EC/CMakeLists.txt | 28 +++ Source/Windows/ARM64EC/Module.S | 62 +++++ Source/Windows/ARM64EC/Module.cpp | 273 +++++++++++++++++++++++ Source/Windows/ARM64EC/libarm64ecfex.def | 21 ++ Source/Windows/CMakeLists.txt | 4 +- Source/Windows/include/winternl.h | 14 ++ 7 files changed, 422 insertions(+), 1 deletion(-) create mode 100644 Source/Windows/ARM64EC/BTInterface.h create mode 100644 Source/Windows/ARM64EC/CMakeLists.txt create mode 100644 Source/Windows/ARM64EC/Module.S create mode 100644 Source/Windows/ARM64EC/Module.cpp create mode 100644 Source/Windows/ARM64EC/libarm64ecfex.def diff --git a/Source/Windows/ARM64EC/BTInterface.h b/Source/Windows/ARM64EC/BTInterface.h new file mode 100644 index 000000000..7f14a5f2e --- /dev/null +++ b/Source/Windows/ARM64EC/BTInterface.h @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: MIT +#pragma once + +#include +#include +#include + +extern "C" { +void STDMETHODCALLTYPE ProcessInit(); +void STDMETHODCALLTYPE ProcessTerm(); +NTSTATUS STDMETHODCALLTYPE ThreadInit(); +NTSTATUS STDMETHODCALLTYPE ThreadTerm(HANDLE Thread); +NTSTATUS STDMETHODCALLTYPE ResetToConsistentState(EXCEPTION_POINTERS* Ptrs, ARM64_NT_CONTEXT* Context, BOOLEAN* Continue); +void STDMETHODCALLTYPE BTCpu64FlushInstructionCache(const void* Address, SIZE_T Size); +void STDMETHODCALLTYPE NotifyMemoryAlloc(void* Address, SIZE_T Size, ULONG Type, ULONG Prot); +void STDMETHODCALLTYPE NotifyMemoryFree(void* Address, SIZE_T Size, ULONG FreeType); +void STDMETHODCALLTYPE NotifyMemoryProtect(void* Address, SIZE_T Size, ULONG NewProt); +void STDMETHODCALLTYPE NotifyUnmapViewOfSection(void* Address); +BOOLEAN STDMETHODCALLTYPE BTCpu64IsProcessorFeaturePresent(UINT Feature); +void STDMETHODCALLTYPE UpdateProcessorInformation(SYSTEM_CPU_INFORMATION* Info); +} diff --git a/Source/Windows/ARM64EC/CMakeLists.txt b/Source/Windows/ARM64EC/CMakeLists.txt new file mode 100644 index 000000000..e2f26a7c3 --- /dev/null +++ b/Source/Windows/ARM64EC/CMakeLists.txt @@ -0,0 +1,28 @@ +add_library(arm64ecfex SHARED + Module.cpp + Module.S + libarm64ecfex.def +) +patch_library_wine(arm64ecfex) + +target_include_directories(arm64ecfex PRIVATE + "${CMAKE_SOURCE_DIR}/Source/Windows/include/" + "${CMAKE_SOURCE_DIR}/Source/" + "${CMAKE_SOURCE_DIR}/Source/Windows/" +) + +target_link_libraries(arm64ecfex + PRIVATE + FEXCore + FEXCore_Base + Common + CommonTools + CommonWindows + ntdll_ex + ntdll +) + +install(TARGETS arm64ecfex + RUNTIME + DESTINATION lib + COMPONENT runtime) diff --git a/Source/Windows/ARM64EC/Module.S b/Source/Windows/ARM64EC/Module.S new file mode 100644 index 000000000..2d24ecf4c --- /dev/null +++ b/Source/Windows/ARM64EC/Module.S @@ -0,0 +1,62 @@ +.text +.balign 16 + + // __os_arm64x_x64_jump in ARM64EC docs + // Expects target code address in x9 +.globl DispatchJump +DispatchJump: + str lr, [sp, #-8]! // Push return address to stack, this will be popped by the x86 RET instr. + b check_target_ec + + // __os_arm64x_dispatch_ret in ARM64EC docs + // Expects target code address in lr +.globl RetToEntryThunk +RetToEntryThunk: + mov x9, lr + +check_target_ec: + // Check if target is in fact x86 code + ldr x16, [x18, #0x60] // TEB->PEB + ldr x16, [x16, #0x368] // PEB->EcCodeBitMap + lsr x17, x9, #15 + and x17, x17, #0x1fffffffffff8 + ldr x16, [x16, x17] + lsr x17, x9, #12 + lsr x16, x16, x17 + tbnz x16, #0, ExitFunctionEC + b enter_jit + + // __os_arm64x_dispatch_call_no_redirect in ARM64EC docs + // Expects target code address in x9, and to be called using a 'blr x16' instruction. +.globl ExitToX64 +ExitToX64: + str lr, [sp, #-8]! // Push return address to stack, this will be popped by the x86 RET instr. + +enter_jit: + ldr x17, [x18, #0x1788] // TEB->ChpeV2CpuAreaInfo + ldr x16, [x17, #0x40] // ChpeV2CpuAreaInfo->EmulatorData[2] - DispatcherLoopTopEnterEC + br x16 // DispatcherLoopTopEnterEC(RIP:x9, CPUArea:x17) + + // Called into by FEXCore + // Expects the target code address in x9 +.global ExitFunctionEC +ExitFunctionEC: + // Either return to an exit thunk (return to ARM64EC function) or call an entry thunk (call to ARM64EC function). + // It is assumed that a 'blr x16' instruction is only ever used to call into x86 code from an exit thunk, and that all + // exported ARM64EC functions have a 4-byte offset to their entry thunk immediately before their first instruction. + mov x17, x9 + mov w16, #0x200 + movk w16, #0xd63f, lsl 16 // blr x16 + ldursw x23, [x17, #-0x4] // Load either the entry thunk offset or the calling instruction. + cmp w23, w16 + beq ret_sp_aligned + + and x23, x23, #-0x4 + add x17, x17, x23 // Resolve entry thunk address. + + mov x4, sp + ldr lr, [x4], #0x8 // Pop the return address into lr. + mov sp, x4 + +ret_sp_aligned: + br x17 diff --git a/Source/Windows/ARM64EC/Module.cpp b/Source/Windows/ARM64EC/Module.cpp new file mode 100644 index 000000000..a31f4c06b --- /dev/null +++ b/Source/Windows/ARM64EC/Module.cpp @@ -0,0 +1,273 @@ +// SPDX-License-Identifier: MIT +/* +$info$ +tags: Bin|ARM64EC +desc: Implements the ARM64EC BT module API using FEXCore +$end_info$ +*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "Common/Config.h" +#include "Common/InvalidationTracker.h" +#include "Common/CPUFeatures.h" +#include "DummyHandlers.h" +#include "BTInterface.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +class ECSyscallHandler; +extern void* ExitFunctionEC; + +struct ThreadCPUArea { + static constexpr size_t TEBCPUAreaOffset = 0x1788; + CHPE_V2_CPU_AREA_INFO* Area; + + explicit ThreadCPUArea(_TEB* TEB) + : Area(*reinterpret_cast(reinterpret_cast(TEB) + TEBCPUAreaOffset)) {} + + uint64_t EmulatorStackLimit() const { + return Area->EmulatorStackLimit; + } + + uint64_t EmulatorStackBase() const { + return Area->EmulatorStackBase; + } + + FEXCore::Core::CpuStateFrame*& StateFrame() const { + return reinterpret_cast(Area->EmulatorData[0]); + } + + FEXCore::Core::InternalThreadState*& ThreadState() const { + return reinterpret_cast(Area->EmulatorData[1]); + } + + uint64_t& DispatcherLoopTopEnterEC() const { + return reinterpret_cast(Area->EmulatorData[2]); + } + + uint64_t& DispatcherLoopTopEnterECFillSRA() const { + return reinterpret_cast(Area->EmulatorData[3]); + } +}; + +namespace { +fextl::unique_ptr CTX; +fextl::unique_ptr SignalDelegator; +fextl::unique_ptr SyscallHandler; +std::optional InvalidationTracker; +std::optional CPUFeatures; + +std::recursive_mutex ThreadCreationMutex; +// Map of TIDs to their FEX thread state, `ThreadCreationMutex` must be locked when accessing +std::unordered_map Threads; + + +std::pair GetThreadCPUArea(HANDLE Thread) { + THREAD_BASIC_INFORMATION Info; + const NTSTATUS Err = NtQueryInformationThread(Thread, ThreadBasicInformation, &Info, sizeof(Info), nullptr); + return {Err, ThreadCPUArea(reinterpret_cast<_TEB*>(Info.TebBaseAddress))}; +} + +ThreadCPUArea GetCPUArea() { + return ThreadCPUArea(NtCurrentTeb()); +} + +} // namespace + +namespace Logging { +static void MsgHandler(LogMan::DebugLevels Level, const char* Message) { + const auto Output = fextl::fmt::format("[{}][{:X}] {}\n", LogMan::DebugLevelStr(Level), GetCurrentThreadId(), Message); + __wine_dbg_output(Output.c_str()); +} + +static void AssertHandler(const char* Message) { + const auto Output = fextl::fmt::format("[ASSERT] {}\n", Message); + __wine_dbg_output(Output.c_str()); +} + +static void Init() { + LogMan::Throw::InstallHandler(AssertHandler); + LogMan::Msg::InstallHandler(MsgHandler); +} +} // namespace Logging + +class ECSyscallHandler : public FEXCore::HLE::SyscallHandler, public FEXCore::Allocator::FEXAllocOperators { +public: + ECSyscallHandler() { + OSABI = FEXCore::HLE::SyscallOSABI::OS_WIN32; + } + + uint64_t HandleSyscall(FEXCore::Core::CpuStateFrame* Frame, FEXCore::HLE::SyscallArguments* Args) override { + return 0; + } + + FEXCore::HLE::SyscallABI GetSyscallABI(uint64_t Syscall) override { + return {.NumArgs = 0, .HasReturn = false, .HostSyscallNumber = -1}; + } + + FEXCore::HLE::AOTIRCacheEntryLookupResult LookupAOTIRCacheEntry(FEXCore::Core::InternalThreadState* Thread, uint64_t GuestAddr) override { + return {0, 0}; + } + + void MarkGuestExecutableRange(FEXCore::Core::InternalThreadState* Thread, uint64_t Start, uint64_t Length) override { + InvalidationTracker->ReprotectRWXIntervals(Start, Length); + } +}; + +void ProcessInit() { + Logging::Init(); + FEX::Config::InitializeConfigs(); + FEXCore::Config::Initialize(); + FEXCore::Config::AddLayer(FEX::Config::CreateGlobalMainLayer()); + FEXCore::Config::AddLayer(FEX::Config::CreateMainLayer()); + FEXCore::Config::Load(); + FEXCore::Config::ReloadMetaLayer(); + + FEXCore::Config::EraseSet(FEXCore::Config::CONFIG_IS64BIT_MODE, "1"); + + // Not applicable to Windows + FEXCore::Config::EraseSet(FEXCore::Config::ConfigOption::CONFIG_TSOAUTOMIGRATION, "0"); + + FEXCore::Context::InitializeStaticTables(FEXCore::Context::MODE_64BIT); + + SignalDelegator = fextl::make_unique(); + SyscallHandler = fextl::make_unique(); + + CTX = FEXCore::Context::Context::CreateNewContext(); + CTX->SetSignalDelegator(SignalDelegator.get()); + CTX->SetSyscallHandler(SyscallHandler.get()); + CTX->InitCore(); + InvalidationTracker.emplace(*CTX, Threads); + CPUFeatures.emplace(*CTX); +} + +void ProcessTerm() {} + +void NotifyMemoryAlloc(void* Address, SIZE_T Size, ULONG Type, ULONG Prot) { + if (!InvalidationTracker || !GetCPUArea().ThreadState()) { + return; + } + + std::scoped_lock Lock(ThreadCreationMutex); + InvalidationTracker->HandleMemoryProtectionNotification(reinterpret_cast(Address), static_cast(Size), Prot); +} + +void NotifyMemoryFree(void* Address, SIZE_T Size, ULONG FreeType) { + if (!InvalidationTracker || !GetCPUArea().ThreadState()) { + return; + } + + std::scoped_lock Lock(ThreadCreationMutex); + if (!Size) { + InvalidationTracker->InvalidateContainingSection(reinterpret_cast(Address), true); + } else if (FreeType & MEM_DECOMMIT) { + InvalidationTracker->InvalidateAlignedInterval(reinterpret_cast(Address), static_cast(Size), true); + } +} + +void NotifyMemoryProtect(void* Address, SIZE_T Size, ULONG NewProt) { + if (!InvalidationTracker || !GetCPUArea().ThreadState()) { + return; + } + + std::scoped_lock Lock(ThreadCreationMutex); + InvalidationTracker->HandleMemoryProtectionNotification(reinterpret_cast(Address), static_cast(Size), NewProt); +} + +void NotifyUnmapViewOfSection(void* Address) { + if (!InvalidationTracker || !GetCPUArea().ThreadState()) { + return; + } + + std::scoped_lock Lock(ThreadCreationMutex); + InvalidationTracker->InvalidateContainingSection(reinterpret_cast(Address), true); +} + +void BTCpu64FlushInstructionCache(const void* Address, SIZE_T Size) { + if (!InvalidationTracker || !GetCPUArea().ThreadState()) { + return; + } + + std::scoped_lock Lock(ThreadCreationMutex); + InvalidationTracker->InvalidateAlignedInterval(reinterpret_cast(Address), static_cast(Size), false); +} + +NTSTATUS ThreadInit() { + const auto CPUArea = GetCPUArea(); + + auto* Thread = CTX->CreateThread(0, 0); + Thread->CurrentFrame->Pointers.Common.ExitFunctionEC = reinterpret_cast(&ExitFunctionEC); + CPUArea.StateFrame() = Thread->CurrentFrame; + + uint64_t EnterEC = Thread->CurrentFrame->Pointers.Common.DispatcherLoopTopEnterEC; + CPUArea.DispatcherLoopTopEnterEC() = EnterEC; + + uint64_t EnterECFillSRA = Thread->CurrentFrame->Pointers.Common.DispatcherLoopTopEnterECFillSRA; + CPUArea.DispatcherLoopTopEnterECFillSRA() = EnterECFillSRA; + + { + std::scoped_lock Lock(ThreadCreationMutex); + Threads.emplace(GetCurrentThreadId(), Thread); + } + + CPUArea.ThreadState() = Thread; + return STATUS_SUCCESS; +} + +NTSTATUS ThreadTerm(HANDLE Thread) { + const auto [Err, CPUArea] = GetThreadCPUArea(Thread); + if (Err) { + return Err; + } + auto* OldThreadState = CPUArea.ThreadState(); + CPUArea.ThreadState() = nullptr; + + { + THREAD_BASIC_INFORMATION Info; + if (NTSTATUS Err = NtQueryInformationThread(Thread, ThreadBasicInformation, &Info, sizeof(Info), nullptr); Err) { + return Err; + } + + const auto ThreadTID = reinterpret_cast(Info.ClientId.UniqueThread); + std::scoped_lock Lock(ThreadCreationMutex); + Threads.erase(ThreadTID); + } + + CTX->DestroyThread(OldThreadState); + return STATUS_SUCCESS; +} + +BOOLEAN BTCpu64IsProcessorFeaturePresent(UINT Feature) { + return CPUFeatures->IsFeaturePresent(Feature) ? TRUE : FALSE; +} + +void UpdateProcessorInformation(SYSTEM_CPU_INFORMATION* Info) { + CPUFeatures->UpdateInformation(Info); +} diff --git a/Source/Windows/ARM64EC/libarm64ecfex.def b/Source/Windows/ARM64EC/libarm64ecfex.def new file mode 100644 index 000000000..e364baf2d --- /dev/null +++ b/Source/Windows/ARM64EC/libarm64ecfex.def @@ -0,0 +1,21 @@ +LIBRARY libarm64ecfex.dll + +EXPORTS + BTCpu64FlushInstructionCache + BTCpu64IsProcessorFeaturePresent + DispatchJump DATA + RetToEntryThunk DATA + ExitToX64 DATA + BeginSimulation DATA +; FlushInstructionCacheHeavy +; NotifyMapViewOfSection + NotifyMemoryAlloc + NotifyMemoryFree + NotifyMemoryProtect + NotifyUnmapViewOfSection + ProcessInit + ProcessTerm +; ResetToConsistentState + ThreadInit + ThreadTerm + UpdateProcessorInformation diff --git a/Source/Windows/CMakeLists.txt b/Source/Windows/CMakeLists.txt index 9b58ec9d0..431e37db7 100644 --- a/Source/Windows/CMakeLists.txt +++ b/Source/Windows/CMakeLists.txt @@ -24,6 +24,8 @@ build_implib(wow64) add_subdirectory(Common) -if (_M_ARM_64 AND (NOT _M_ARM_64EC)) +if (_M_ARM_64EC) + add_subdirectory(ARM64EC) +elseif (_M_ARM_64) add_subdirectory(WOW64) endif() diff --git a/Source/Windows/include/winternl.h b/Source/Windows/include/winternl.h index cf5a4d25a..365d9e0da 100644 --- a/Source/Windows/include/winternl.h +++ b/Source/Windows/include/winternl.h @@ -13,6 +13,20 @@ extern "C" { #define WOW64_TLS_MAX_NUMBER 19 +#ifdef _M_ARM_64EC +typedef struct _CHPE_V2_CPU_AREA_INFO { + BOOLEAN InSimulation; /* 000 */ + BOOLEAN InSyscallCallback; /* 001 */ + ULONG64 EmulatorStackBase; /* 008 */ + ULONG64 EmulatorStackLimit; /* 010 */ + ARM64EC_NT_CONTEXT* ContextAmd64; /* 018 */ + ULONG* SuspendDoorbell; /* 020 */ + ULONG64 LoadingModuleModflag; /* 028 */ + void* EmulatorData[4]; /* 030 */ + ULONG64 EmulatorDataInline; /* 050 */ +} CHPE_V2_CPU_AREA_INFO, *PCHPE_V2_CPU_AREA_INFO; +#endif + typedef struct _THREAD_BASIC_INFORMATION { NTSTATUS ExitStatus; PVOID TebBaseAddress; From 1059279d5d7bd81e23b86ebc62d7217056ac1f0d Mon Sep 17 00:00:00 2001 From: Billy Laws Date: Sun, 7 Jul 2024 23:24:28 +0100 Subject: [PATCH 04/13] ARM64EC: Handle calls into ARM64EC code with an 8-byte-aligned SP ARM64 requires that SP is always 16-byte aligned for memory accesses, but ARM64EC shares the SP between x64 code and ARM64 code, the former of which doesn't enforce such a restriction. This causes crashes in programs such as HITMAN 3 that don't correctly follow the Windows ABI and call into system library functions with SP only 8-byte-aligned. Fixup stack alignment in such cases by leaving the 8-byte return address on the stack and returning to a lone 'ret' instruction instead. --- Source/Windows/ARM64EC/Module.S | 9 +++++++++ Source/Windows/ARM64EC/Module.cpp | 4 ++++ 2 files changed, 13 insertions(+) diff --git a/Source/Windows/ARM64EC/Module.S b/Source/Windows/ARM64EC/Module.S index 2d24ecf4c..b7d8516fa 100644 --- a/Source/Windows/ARM64EC/Module.S +++ b/Source/Windows/ARM64EC/Module.S @@ -55,8 +55,17 @@ ExitFunctionEC: add x17, x17, x23 // Resolve entry thunk address. mov x4, sp + tbz x4, #3, ret_sp_misaligned ldr lr, [x4], #0x8 // Pop the return address into lr. mov sp, x4 ret_sp_aligned: br x17 + +ret_sp_misaligned: + // In the case of the x64 caller leaving sp only 8-byte aligned, leave the return address on the stack to keep 16-byte + // alignment and have the callee return to an x86 ret instruction. FEX can then return to the actual caller keeping + // the misaligned RSP. + adrp lr, X64ReturnInstr + ldr lr, [lr, #:lo12:X64ReturnInstr] + br x17 diff --git a/Source/Windows/ARM64EC/Module.cpp b/Source/Windows/ARM64EC/Module.cpp index a31f4c06b..6b16f9bf3 100644 --- a/Source/Windows/ARM64EC/Module.cpp +++ b/Source/Windows/ARM64EC/Module.cpp @@ -43,6 +43,7 @@ $end_info$ #include class ECSyscallHandler; +void* X64ReturnInstr; // See Module.S extern void* ExitFunctionEC; struct ThreadCPUArea { @@ -166,6 +167,9 @@ void ProcessInit() { CTX->InitCore(); InvalidationTracker.emplace(*CTX, Threads); CPUFeatures.emplace(*CTX); + + X64ReturnInstr = ::VirtualAlloc(nullptr, FEXCore::Utils::FEX_PAGE_SIZE, MEM_COMMIT, PAGE_EXECUTE_READWRITE); + *reinterpret_cast(X64ReturnInstr) = 0xc3; } void ProcessTerm() {} From b9da95838ace4dda3a33cddc322f6921d7bb257b Mon Sep 17 00:00:00 2001 From: Billy Laws Date: Thu, 11 Jul 2024 12:24:26 +0100 Subject: [PATCH 05/13] ARM64EC: Handle unaligned atomic accesses --- Source/Windows/ARM64EC/Module.cpp | 62 ++++++++++++++++++++++++ Source/Windows/ARM64EC/libarm64ecfex.def | 2 +- 2 files changed, 63 insertions(+), 1 deletion(-) diff --git a/Source/Windows/ARM64EC/Module.cpp b/Source/Windows/ARM64EC/Module.cpp index 6b16f9bf3..fdc47a122 100644 --- a/Source/Windows/ARM64EC/Module.cpp +++ b/Source/Windows/ARM64EC/Module.cpp @@ -26,6 +26,7 @@ $end_info$ #include "Common/Config.h" #include "Common/InvalidationTracker.h" +#include "Common/TSOHandlerConfig.h" #include "Common/CPUFeatures.h" #include "DummyHandlers.h" #include "BTInterface.h" @@ -100,8 +101,35 @@ ThreadCPUArea GetCPUArea() { return ThreadCPUArea(NtCurrentTeb()); } +bool IsEmulatorStackAddress(uint64_t Address) { + return Address <= GetCPUArea().EmulatorStackBase() && Address >= GetCPUArea().EmulatorStackLimit(); +} + +bool IsDispatcherAddress(uint64_t Address) { + const auto& Config = SignalDelegator->GetConfig(); + return Address >= Config.DispatcherBegin && Address < Config.DispatcherEnd; +} } // namespace +namespace Exception { +static std::optional HandlerConfig; + +static bool HandleUnalignedAccess(ARM64_NT_CONTEXT& Context) { + if (!CTX->IsAddressInCodeBuffer(GetCPUArea().ThreadState(), Context.Pc)) { + return false; + } + + const auto Result = FEXCore::ArchHelpers::Arm64::HandleUnalignedAccess(GetCPUArea().ThreadState(), + HandlerConfig->GetUnalignedHandlerType(), Context.Pc, &Context.X0); + if (!Result.first) { + return false; + } + + Context.Pc += Result.second; + return true; +} +} // namespace Exception + namespace Logging { static void MsgHandler(LogMan::DebugLevels Level, const char* Message) { const auto Output = fextl::fmt::format("[{}][{:X}] {}\n", LogMan::DebugLevelStr(Level), GetCurrentThreadId(), Message); @@ -160,6 +188,7 @@ void ProcessInit() { SignalDelegator = fextl::make_unique(); SyscallHandler = fextl::make_unique(); + Exception::HandlerConfig.emplace(); CTX = FEXCore::Context::Context::CreateNewContext(); CTX->SetSignalDelegator(SignalDelegator.get()); @@ -174,6 +203,39 @@ void ProcessInit() { void ProcessTerm() {} +class ScopedCallbackDisable { +private: + bool Prev; + +public: + ScopedCallbackDisable() { + Prev = GetCPUArea().Area->InSyscallCallback; + GetCPUArea().Area->InSyscallCallback = true; + } + + ~ScopedCallbackDisable() { + GetCPUArea().Area->InSyscallCallback = Prev; + } +}; + +NTSTATUS ResetToConsistentState(EXCEPTION_POINTERS* Ptrs, ARM64_NT_CONTEXT* Context, BOOLEAN* Continue) { + ScopedCallbackDisable Guard; + const auto* Exception = Ptrs->ExceptionRecord; + if (Exception->ExceptionCode == EXCEPTION_DATATYPE_MISALIGNMENT && Exception::HandleUnalignedAccess(*Context)) { + LogMan::Msg::DFmt("Handled unaligned atomic: new pc: {:X}", Context->Pc); + *Continue = true; + return STATUS_SUCCESS; + } + + if (!CTX->IsAddressInCodeBuffer(GetCPUArea().ThreadState(), Context->Pc) && !IsDispatcherAddress(Context->Pc)) { + return STATUS_SUCCESS; + } + + LogMan::Msg::EFmt("Exception rethrow is unimplemented"); + + return STATUS_SUCCESS; +} + void NotifyMemoryAlloc(void* Address, SIZE_T Size, ULONG Type, ULONG Prot) { if (!InvalidationTracker || !GetCPUArea().ThreadState()) { return; diff --git a/Source/Windows/ARM64EC/libarm64ecfex.def b/Source/Windows/ARM64EC/libarm64ecfex.def index e364baf2d..035e03be0 100644 --- a/Source/Windows/ARM64EC/libarm64ecfex.def +++ b/Source/Windows/ARM64EC/libarm64ecfex.def @@ -15,7 +15,7 @@ EXPORTS NotifyUnmapViewOfSection ProcessInit ProcessTerm -; ResetToConsistentState + ResetToConsistentState ThreadInit ThreadTerm UpdateProcessorInformation From 95fc69b6288036ddaa18e6d021c0d77eb0fc8bbc Mon Sep 17 00:00:00 2001 From: Billy Laws Date: Thu, 11 Jul 2024 12:24:55 +0100 Subject: [PATCH 06/13] ARM64EC: Handle SMC --- Source/Windows/ARM64EC/Module.cpp | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/Source/Windows/ARM64EC/Module.cpp b/Source/Windows/ARM64EC/Module.cpp index fdc47a122..ee578f4ed 100644 --- a/Source/Windows/ARM64EC/Module.cpp +++ b/Source/Windows/ARM64EC/Module.cpp @@ -227,6 +227,22 @@ NTSTATUS ResetToConsistentState(EXCEPTION_POINTERS* Ptrs, ARM64_NT_CONTEXT* Cont return STATUS_SUCCESS; } + if (Exception->ExceptionCode == EXCEPTION_ACCESS_VIOLATION) { + const auto FaultAddress = static_cast(Exception->ExceptionInformation[1]); + + bool HandledRWX = false; + if (InvalidationTracker && GetCPUArea().ThreadState()) { + std::scoped_lock Lock(ThreadCreationMutex); + HandledRWX = InvalidationTracker->HandleRWXAccessViolation(FaultAddress); + } + + if (HandledRWX) { + LogMan::Msg::DFmt("Handled self-modifying code: pc: {:X} fault: {:X}", Context->Pc, FaultAddress); + *Continue = true; + return STATUS_SUCCESS; + } + } + if (!CTX->IsAddressInCodeBuffer(GetCPUArea().ThreadState(), Context->Pc) && !IsDispatcherAddress(Context->Pc)) { return STATUS_SUCCESS; } From 839f9ecd3b74add62f18acf5a61aadce872000fe Mon Sep 17 00:00:00 2001 From: Billy Laws Date: Thu, 11 Jul 2024 13:10:37 +0100 Subject: [PATCH 07/13] Windows: Add ARM64EC image structures --- Source/Windows/include/winnt.h | 128 ++++++++++++++++++++++++++++++ Source/Windows/include/winternl.h | 1 + 2 files changed, 129 insertions(+) create mode 100644 Source/Windows/include/winnt.h diff --git a/Source/Windows/include/winnt.h b/Source/Windows/include/winnt.h new file mode 100644 index 000000000..e644ed8f2 --- /dev/null +++ b/Source/Windows/include/winnt.h @@ -0,0 +1,128 @@ +// SPDX-License-Identifier: LGPL-2.1-or-later +// SPDX-FileCopyrightText: Copyright (C) the Wine project + +#pragma once + +#include_next + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct _IMAGE_LOAD_CONFIG_CODE_INTEGRITY { + WORD Flags; + WORD Catalog; + DWORD CatalogOffset; + DWORD Reserved; +} IMAGE_LOAD_CONFIG_CODE_INTEGRITY, *PIMAGE_LOAD_CONFIG_CODE_INTEGRITY; + +typedef struct __IMAGE_LOAD_CONFIG_DIRECTORY64 { + DWORD Size; /* 000 */ + DWORD TimeDateStamp; + WORD MajorVersion; + WORD MinorVersion; + DWORD GlobalFlagsClear; + DWORD GlobalFlagsSet; /* 010 */ + DWORD CriticalSectionDefaultTimeout; + ULONGLONG DeCommitFreeBlockThreshold; + ULONGLONG DeCommitTotalFreeThreshold; /* 020 */ + ULONGLONG LockPrefixTable; + ULONGLONG MaximumAllocationSize; /* 030 */ + ULONGLONG VirtualMemoryThreshold; + ULONGLONG ProcessAffinityMask; /* 040 */ + DWORD ProcessHeapFlags; + WORD CSDVersion; + WORD DependentLoadFlags; + ULONGLONG EditList; /* 050 */ + ULONGLONG SecurityCookie; + ULONGLONG SEHandlerTable; /* 060 */ + ULONGLONG SEHandlerCount; + ULONGLONG GuardCFCheckFunctionPointer; /* 070 */ + ULONGLONG GuardCFDispatchFunctionPointer; + ULONGLONG GuardCFFunctionTable; /* 080 */ + ULONGLONG GuardCFFunctionCount; + DWORD GuardFlags; /* 090 */ + IMAGE_LOAD_CONFIG_CODE_INTEGRITY CodeIntegrity; + ULONGLONG GuardAddressTakenIatEntryTable; /* 0a0 */ + ULONGLONG GuardAddressTakenIatEntryCount; + ULONGLONG GuardLongJumpTargetTable; /* 0b0 */ + ULONGLONG GuardLongJumpTargetCount; + ULONGLONG DynamicValueRelocTable; /* 0c0 */ + ULONGLONG CHPEMetadataPointer; + ULONGLONG GuardRFFailureRoutine; /* 0d0 */ + ULONGLONG GuardRFFailureRoutineFunctionPointer; + DWORD DynamicValueRelocTableOffset; /* 0e0 */ + WORD DynamicValueRelocTableSection; + WORD Reserved2; + ULONGLONG GuardRFVerifyStackPointerFunctionPointer; + DWORD HotPatchTableOffset; /* 0f0 */ + DWORD Reserved3; + ULONGLONG EnclaveConfigurationPointer; + ULONGLONG VolatileMetadataPointer; /* 100 */ + ULONGLONG GuardEHContinuationTable; + ULONGLONG GuardEHContinuationCount; /* 110 */ + ULONGLONG GuardXFGCheckFunctionPointer; + ULONGLONG GuardXFGDispatchFunctionPointer; /* 120 */ + ULONGLONG GuardXFGTableDispatchFunctionPointer; + ULONGLONG CastGuardOsDeterminedFailureMode; /* 130 */ + ULONGLONG GuardMemcpyFunctionPointer; +} _IMAGE_LOAD_CONFIG_DIRECTORY64, *_PIMAGE_LOAD_CONFIG_DIRECTORY64; + +typedef struct _IMAGE_CHPE_RANGE_ENTRY { + union { + ULONG StartOffset; + struct { + ULONG NativeCode : 1; + ULONG AddressBits : 31; + } DUMMYSTRUCTNAME; + } DUMMYUNIONNAME; + ULONG Length; +} IMAGE_CHPE_RANGE_ENTRY, *PIMAGE_CHPE_RANGE_ENTRY; + +typedef struct _IMAGE_ARM64EC_METADATA { + ULONG Version; + ULONG CodeMap; + ULONG CodeMapCount; + ULONG CodeRangesToEntryPoints; + ULONG RedirectionMetadata; + ULONG __os_arm64x_dispatch_call_no_redirect; + ULONG __os_arm64x_dispatch_ret; + ULONG __os_arm64x_dispatch_call; + ULONG __os_arm64x_dispatch_icall; + ULONG __os_arm64x_dispatch_icall_cfg; + ULONG AlternateEntryPoint; + ULONG AuxiliaryIAT; + ULONG CodeRangesToEntryPointsCount; + ULONG RedirectionMetadataCount; + ULONG GetX64InformationFunctionPointer; + ULONG SetX64InformationFunctionPointer; + ULONG ExtraRFETable; + ULONG ExtraRFETableSize; + ULONG __os_arm64x_dispatch_fptr; + ULONG AuxiliaryIATCopy; + ULONG __os_arm64x_helper0; + ULONG __os_arm64x_helper1; + ULONG __os_arm64x_helper2; + ULONG __os_arm64x_helper3; + ULONG __os_arm64x_helper4; + ULONG __os_arm64x_helper5; + ULONG __os_arm64x_helper6; + ULONG __os_arm64x_helper7; + ULONG __os_arm64x_helper8; +} IMAGE_ARM64EC_METADATA; + +typedef struct _IMAGE_ARM64EC_REDIRECTION_ENTRY { + ULONG Source; + ULONG Destination; +} IMAGE_ARM64EC_REDIRECTION_ENTRY; + +typedef struct _IMAGE_ARM64EC_CODE_RANGE_ENTRY_POINT { + ULONG StartRva; + ULONG EndRva; + ULONG EntryPoint; +} IMAGE_ARM64EC_CODE_RANGE_ENTRY_POINT; + + +#ifdef __cplusplus +} +#endif diff --git a/Source/Windows/include/winternl.h b/Source/Windows/include/winternl.h index 365d9e0da..f34b1c1f0 100644 --- a/Source/Windows/include/winternl.h +++ b/Source/Windows/include/winternl.h @@ -108,6 +108,7 @@ void WINAPI Wow64ProcessPendingCrossProcessItems(void); NTSTATUS WINAPI RtlWow64SetThreadContext(HANDLE, const WOW64_CONTEXT*); NTSTATUS WINAPI RtlWow64GetThreadContext(HANDLE, WOW64_CONTEXT*); NTSTATUS WINAPI RtlWow64GetCurrentCpuArea(USHORT*, void**, void**); +NTSYSAPI PVOID WINAPI RtlImageDirectoryEntryToData(HMODULE, BOOL, WORD, ULONG*); NTSTATUS WINAPI NtSuspendThread(HANDLE, PULONG); NTSTATUS WINAPI NtGetContextThread(HANDLE, CONTEXT*); From 8e0fdfc325359d99c53e258eee6c501c1c3cf228 Mon Sep 17 00:00:00 2001 From: Billy Laws Date: Thu, 11 Jul 2024 13:17:11 +0100 Subject: [PATCH 08/13] ARM64EC: Add a helper to lookup the redirected address of an export FEX is unable to deal with reentrant compilation of any x64 hotpatches so they need to be ignored by bypassing FFSs and calling directly into the native target. --- Source/Windows/ARM64EC/Module.cpp | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/Source/Windows/ARM64EC/Module.cpp b/Source/Windows/ARM64EC/Module.cpp index ee578f4ed..7e5c22673 100644 --- a/Source/Windows/ARM64EC/Module.cpp +++ b/Source/Windows/ARM64EC/Module.cpp @@ -41,6 +41,7 @@ $end_info$ #include #include #include +#include #include class ECSyscallHandler; @@ -109,6 +110,30 @@ bool IsDispatcherAddress(uint64_t Address) { const auto& Config = SignalDelegator->GetConfig(); return Address >= Config.DispatcherBegin && Address < Config.DispatcherEnd; } + +// GetProcAddress on ARM64EC returns a pointer to an x64 fast forward sequence to allow for redirecting to the JIT if functions are hotpatched. +// This looks up the procedure address of the native code even if the fast forward sequence has been patched. +uintptr_t GetRedirectedProcAddress(HMODULE Module, const char* ProcName) { + const uintptr_t Proc = reinterpret_cast(GetProcAddress(Module, ProcName)); + if (!Proc) { + return 0; + } + + ULONG Size; + const auto* LoadConfig = + reinterpret_cast<_IMAGE_LOAD_CONFIG_DIRECTORY64*>(RtlImageDirectoryEntryToData(Module, true, IMAGE_DIRECTORY_ENTRY_LOAD_CONFIG, &Size)); + const auto* CHPEMetadata = reinterpret_cast(LoadConfig->CHPEMetadataPointer); + const uintptr_t ModuleBase = reinterpret_cast(Module); + const uintptr_t ProcRVA = Proc - ModuleBase; + const auto* RedirectionTableBegin = reinterpret_cast(ModuleBase + CHPEMetadata->RedirectionMetadata); + const auto* RedirectionTableEnd = RedirectionTableBegin + CHPEMetadata->RedirectionMetadataCount; + const auto* It = + std::lower_bound(RedirectionTableBegin, RedirectionTableEnd, ProcRVA, [](const auto& Entry, uintptr_t RVA) { return Entry.Source < RVA; }); + if (It->Source != ProcRVA) { + return 0; + } + return ModuleBase + It->Destination; +} } // namespace namespace Exception { From f964a5187e9b0c9af2442c4258eb9c2bfc9f5a48 Mon Sep 17 00:00:00 2001 From: Billy Laws Date: Thu, 11 Jul 2024 13:40:38 +0100 Subject: [PATCH 09/13] ARM64EC: Implement BeginSimulation This is used by the kernel (or UNIX side of ntdll in wine) to jump into x86 code with the given context as is necessary when e.g. returning from an exception. --- Source/Windows/ARM64EC/Module.S | 9 +++++ Source/Windows/ARM64EC/Module.cpp | 59 +++++++++++++++++++++++++++++++ 2 files changed, 68 insertions(+) diff --git a/Source/Windows/ARM64EC/Module.S b/Source/Windows/ARM64EC/Module.S index b7d8516fa..1ed2bd599 100644 --- a/Source/Windows/ARM64EC/Module.S +++ b/Source/Windows/ARM64EC/Module.S @@ -37,6 +37,15 @@ enter_jit: ldr x16, [x17, #0x40] // ChpeV2CpuAreaInfo->EmulatorData[2] - DispatcherLoopTopEnterEC br x16 // DispatcherLoopTopEnterEC(RIP:x9, CPUArea:x17) + // Invoked by KiUserEmulationDispatcher after e.g. an NtContinue to x86 code + // Expects a CONTEXT pointer in x0 +.global BeginSimulation +BeginSimulation: + bl "#SyncThreadContext" + ldr x17, [x18, #0x1788] // TEB->ChpeV2CpuAreaInfo + ldr x16, [x17, #0x48] // ChpeV2CpuAreaInfo->EmulatorData[3] - DispatcherLoopTopEnterECFillSRA + br x16 // DispatcherLoopTopEnterECFillSRA(CPUArea:x17) + // Called into by FEXCore // Expects the target code address in x9 .global ExitFunctionEC diff --git a/Source/Windows/ARM64EC/Module.cpp b/Source/Windows/ARM64EC/Module.cpp index 7e5c22673..02e8d08bd 100644 --- a/Source/Windows/ARM64EC/Module.cpp +++ b/Source/Windows/ARM64EC/Module.cpp @@ -153,6 +153,60 @@ static bool HandleUnalignedAccess(ARM64_NT_CONTEXT& Context) { Context.Pc += Result.second; return true; } + +static void LoadStateFromECContext(FEXCore::Core::InternalThreadState* Thread, CONTEXT& Context) { + auto& State = Thread->CurrentFrame->State; + + // General register state + State.gregs[FEXCore::X86State::REG_RAX] = Context.Rax; + State.gregs[FEXCore::X86State::REG_RCX] = Context.Rcx; + State.gregs[FEXCore::X86State::REG_RDX] = Context.Rdx; + State.gregs[FEXCore::X86State::REG_RBX] = Context.Rbx; + State.gregs[FEXCore::X86State::REG_RSP] = Context.Rsp; + State.gregs[FEXCore::X86State::REG_RBP] = Context.Rbp; + State.gregs[FEXCore::X86State::REG_RSI] = Context.Rsi; + State.gregs[FEXCore::X86State::REG_RDI] = Context.Rdi; + State.gregs[FEXCore::X86State::REG_R8] = Context.R8; + State.gregs[FEXCore::X86State::REG_R9] = Context.R9; + State.gregs[FEXCore::X86State::REG_R10] = Context.R10; + State.gregs[FEXCore::X86State::REG_R11] = Context.R11; + State.gregs[FEXCore::X86State::REG_R12] = Context.R12; + State.gregs[FEXCore::X86State::REG_R13] = Context.R13; + State.gregs[FEXCore::X86State::REG_R14] = Context.R14; + State.gregs[FEXCore::X86State::REG_R15] = Context.R15; + + State.rip = Context.Rip; + CTX->SetFlagsFromCompactedEFLAGS(Thread, Context.EFlags); + + State.es_idx = Context.SegEs & 0xffff; + State.cs_idx = Context.SegCs & 0xffff; + State.ss_idx = Context.SegSs & 0xffff; + State.ds_idx = Context.SegDs & 0xffff; + State.fs_idx = Context.SegFs & 0xffff; + State.gs_idx = Context.SegGs & 0xffff; + + // The TEB is the only populated GDT entry by default + const auto TEB = reinterpret_cast(NtCurrentTeb()); + State.gdt[(Context.SegGs & 0xffff) >> 3].base = TEB; + State.gs_cached = TEB; + State.fs_cached = 0; + State.es_cached = 0; + State.cs_cached = 0; + State.ss_cached = 0; + State.ds_cached = 0; + + // Floating-point register state + CTX->SetXMMRegistersFromState(Thread, reinterpret_cast(Context.FltSave.XmmRegisters), nullptr); + memcpy(State.mm, Context.FltSave.FloatRegisters, sizeof(State.mm)); + + State.FCW = Context.FltSave.ControlWord; + State.flags[FEXCore::X86State::X87FLAG_C0_LOC] = (Context.FltSave.StatusWord >> 8) & 1; + State.flags[FEXCore::X86State::X87FLAG_C1_LOC] = (Context.FltSave.StatusWord >> 9) & 1; + State.flags[FEXCore::X86State::X87FLAG_C2_LOC] = (Context.FltSave.StatusWord >> 10) & 1; + State.flags[FEXCore::X86State::X87FLAG_C3_LOC] = (Context.FltSave.StatusWord >> 14) & 1; + State.flags[FEXCore::X86State::X87FLAG_TOP_LOC] = (Context.FltSave.StatusWord >> 11) & 0b111; + State.AbridgedFTW = Context.FltSave.TagWord; +} } // namespace Exception namespace Logging { @@ -195,6 +249,11 @@ public: } }; +extern "C" void SyncThreadContext(CONTEXT* Context) { + auto* Thread = GetCPUArea().ThreadState(); + Exception::LoadStateFromECContext(Thread, *Context); +} + void ProcessInit() { Logging::Init(); FEX::Config::InitializeConfigs(); From 3c19e634b36eb2ebe9f4bb5d894c157b9c083517 Mon Sep 17 00:00:00 2001 From: Billy Laws Date: Fri, 12 Jul 2024 00:23:27 +0100 Subject: [PATCH 10/13] ARM64EC: Rethrow exceptions from within the JIT As the exception dispatcher is initially invoked on the emulator stack, control needs to be transferred to the dispatcher on the guest stack after recovering the x86 RSP to allow for invoking x86 exception handlers. --- Source/Windows/ARM64EC/Module.cpp | 113 ++++++++++++++++++++++++++++-- 1 file changed, 109 insertions(+), 4 deletions(-) diff --git a/Source/Windows/ARM64EC/Module.cpp b/Source/Windows/ARM64EC/Module.cpp index 02e8d08bd..e10b9adc8 100644 --- a/Source/Windows/ARM64EC/Module.cpp +++ b/Source/Windows/ARM64EC/Module.cpp @@ -111,8 +111,8 @@ bool IsDispatcherAddress(uint64_t Address) { return Address >= Config.DispatcherBegin && Address < Config.DispatcherEnd; } -// GetProcAddress on ARM64EC returns a pointer to an x64 fast forward sequence to allow for redirecting to the JIT if functions are hotpatched. -// This looks up the procedure address of the native code even if the fast forward sequence has been patched. +// GetProcAddress on ARM64EC returns a pointer to an x64 fast forward sequence to allow for redirecting to the JIT if functions are +// hotpatched. This looks up the procedure address of the native code even if the fast forward sequence has been patched. uintptr_t GetRedirectedProcAddress(HMODULE Module, const char* ProcName) { const uintptr_t Proc = reinterpret_cast(GetProcAddress(Module, ProcName)); if (!Proc) { @@ -138,6 +138,7 @@ uintptr_t GetRedirectedProcAddress(HMODULE Module, const char* ProcName) { namespace Exception { static std::optional HandlerConfig; +static uintptr_t KiUserExceptionDispatcher; static bool HandleUnalignedAccess(ARM64_NT_CONTEXT& Context) { if (!CTX->IsAddressInCodeBuffer(GetCPUArea().ThreadState(), Context.Pc)) { @@ -207,6 +208,101 @@ static void LoadStateFromECContext(FEXCore::Core::InternalThreadState* Thread, C State.flags[FEXCore::X86State::X87FLAG_TOP_LOC] = (Context.FltSave.StatusWord >> 11) & 0b111; State.AbridgedFTW = Context.FltSave.TagWord; } + +static void ReconstructThreadState(ARM64_NT_CONTEXT& Context) { + const auto& Config = SignalDelegator->GetConfig(); + auto* Thread = GetCPUArea().ThreadState(); + auto& State = Thread->CurrentFrame->State; + + State.rip = CTX->RestoreRIPFromHostPC(Thread, Context.Pc); + + // Spill all SRA GPRs + for (size_t i = 0; i < Config.SRAGPRCount; i++) { + State.gregs[i] = Context.X[Config.SRAGPRMapping[i]]; + } + + // Spill all SRA FPRs + for (size_t i = 0; i < Config.SRAFPRCount; i++) { + memcpy(State.xmm.sse.data[i], &Context.V[Config.SRAFPRMapping[i]], sizeof(__uint128_t)); + } +} + +// Reconstructs an x64 context from the input context within the JIT, packed into a regular ARM64 context following the ARM64EC register mapping +static ARM64_NT_CONTEXT ReconstructPackedECContext(ARM64_NT_CONTEXT& Context) { + ReconstructThreadState(Context); + ARM64_NT_CONTEXT ECContext {}; + + ECContext.ContextFlags = CONTEXT_ARM64_CONTROL | CONTEXT_ARM64_INTEGER | CONTEXT_ARM64_FLOATING_POINT; + + auto* Thread = GetCPUArea().ThreadState(); + auto& State = Thread->CurrentFrame->State; + + ECContext.X8 = State.gregs[FEXCore::X86State::REG_RAX]; + ECContext.X0 = State.gregs[FEXCore::X86State::REG_RCX]; + ECContext.X1 = State.gregs[FEXCore::X86State::REG_RDX]; + ECContext.X27 = State.gregs[FEXCore::X86State::REG_RBX]; + ECContext.Sp = State.gregs[FEXCore::X86State::REG_RSP]; + ECContext.Fp = State.gregs[FEXCore::X86State::REG_RBP]; + ECContext.X25 = State.gregs[FEXCore::X86State::REG_RSI]; + ECContext.X26 = State.gregs[FEXCore::X86State::REG_RDI]; + ECContext.X2 = State.gregs[FEXCore::X86State::REG_R8]; + ECContext.X3 = State.gregs[FEXCore::X86State::REG_R9]; + ECContext.X4 = State.gregs[FEXCore::X86State::REG_R10]; + ECContext.X5 = State.gregs[FEXCore::X86State::REG_R11]; + ECContext.X19 = State.gregs[FEXCore::X86State::REG_R12]; + ECContext.X20 = State.gregs[FEXCore::X86State::REG_R13]; + ECContext.X21 = State.gregs[FEXCore::X86State::REG_R14]; + ECContext.X22 = State.gregs[FEXCore::X86State::REG_R15]; + + ECContext.Pc = State.rip; + + CTX->ReconstructXMMRegisters(Thread, reinterpret_cast<__uint128_t*>(&ECContext.V[0]), nullptr); + + ECContext.Lr = State.mm[0][0]; + ECContext.X6 = State.mm[1][0]; + ECContext.X7 = State.mm[2][0]; + ECContext.X9 = State.mm[3][0]; + ECContext.X16 = (State.mm[3][1] & 0xffff) << 48 | (State.mm[2][1] & 0xffff) << 32 | (State.mm[1][1] & 0xffff) << 16 | (State.mm[0][1] & 0xffff); + ECContext.X10 = State.mm[4][0]; + ECContext.X11 = State.mm[5][0]; + ECContext.X12 = State.mm[6][0]; + ECContext.X15 = State.mm[7][0]; + ECContext.X17 = (State.mm[7][1] & 0xffff) << 48 | (State.mm[6][1] & 0xffff) << 32 | (State.mm[5][1] & 0xffff) << 16 | (State.mm[4][1] & 0xffff); + + // Zero all disallowed registers + ECContext.X13 = 0; + ECContext.X14 = 0; + ECContext.X18 = 0; + ECContext.X23 = 0; + ECContext.X24 = 0; + ECContext.X28 = 0; + + // NZCV will be converted into EFlags by ntdll, the rest are lost during exception handling. + // See HandleGuestException + ECContext.Cpsr = Context.Cpsr; + ECContext.Fpcr = Context.Fpcr; + ECContext.Fpsr = Context.Fpsr; + + return ECContext; +} + +static void RethrowGuestException(const EXCEPTION_RECORD& Rec, ARM64_NT_CONTEXT& Context) { + const auto& Config = SignalDelegator->GetConfig(); + uint64_t GuestSp = Context.X[Config.SRAGPRMapping[static_cast(FEXCore::X86State::REG_RSP)]]; + struct DispatchArgs { + ARM64_NT_CONTEXT Context; + EXCEPTION_RECORD Rec; + uint64_t Align; + uint64_t Redzone[2]; + }* Args = reinterpret_cast(FEXCore::AlignDown(GuestSp, 64)) - 1; + + LogMan::Msg::DFmt("Reconstructing context"); + Args->Context = ReconstructPackedECContext(Context); + LogMan::Msg::DFmt("pc: {:X} rip: {:X}", Context.Pc, Args->Context.Pc); + Args->Rec = *Ptrs->ExceptionRecord; + Context.Sp = reinterpret_cast(Args); + Context.Pc = KiUserExceptionDispatcher; +} } // namespace Exception namespace Logging { @@ -283,6 +379,8 @@ void ProcessInit() { X64ReturnInstr = ::VirtualAlloc(nullptr, FEXCore::Utils::FEX_PAGE_SIZE, MEM_COMMIT, PAGE_EXECUTE_READWRITE); *reinterpret_cast(X64ReturnInstr) = 0xc3; + + Exception::KiUserExceptionDispatcher = GetRedirectedProcAddress(GetModuleHandle("ntdll.dll"), "KiUserExceptionDispatcher"); } void ProcessTerm() {} @@ -331,9 +429,16 @@ NTSTATUS ResetToConsistentState(EXCEPTION_POINTERS* Ptrs, ARM64_NT_CONTEXT* Cont return STATUS_SUCCESS; } - LogMan::Msg::EFmt("Exception rethrow is unimplemented"); - return STATUS_SUCCESS; + if (IsEmulatorStackAddress(reinterpret_cast(__builtin_frame_address(0)))) { + Exception::RethrowGuestException(*Exception, *Context); + LogMan::Msg::DFmt("Rethrowing onto guest stack: {:X}", Context->Sp); + *Continue = true; + return STATUS_SUCCESS; + } else { + LogMan::Msg::EFmt("Unexpected exception in JIT code on guest stack"); + return STATUS_SUCCESS; + } } void NotifyMemoryAlloc(void* Address, SIZE_T Size, ULONG Type, ULONG Prot) { From af3145674ef22f6065d61adccf88c6e7f88fae44 Mon Sep 17 00:00:00 2001 From: Billy Laws Date: Fri, 12 Jul 2024 00:58:17 +0100 Subject: [PATCH 11/13] ARM64EC: Fixup exception information for faulting x86 instructions FEX emulates faulting instructions (e.g. ud2 or int 2d) by jumping to the dispatcher and filling out a structure with fault details in the thread context. Parse this out into a windows exception record structure so the correct fault information can be seen by the guest. --- Source/Windows/ARM64EC/Module.cpp | 71 ++++++++++++++++++++++++++++++- 1 file changed, 70 insertions(+), 1 deletion(-) diff --git a/Source/Windows/ARM64EC/Module.cpp b/Source/Windows/ARM64EC/Module.cpp index e10b9adc8..ea835ada9 100644 --- a/Source/Windows/ARM64EC/Module.cpp +++ b/Source/Windows/ARM64EC/Module.cpp @@ -140,6 +140,75 @@ namespace Exception { static std::optional HandlerConfig; static uintptr_t KiUserExceptionDispatcher; +static EXCEPTION_RECORD HandleGuestException(const EXCEPTION_RECORD& Src, ARM64_NT_CONTEXT& Context) { + auto* Thread = GetCPUArea().ThreadState(); + auto& Fault = Thread->CurrentFrame->SynchronousFaultData; + EXCEPTION_RECORD Dst = Src; + Dst.ExceptionAddress = reinterpret_cast(Context.Pc); + + // Windows always clears TF, DF and AF when handling an exception, restoring after. + // TODO: Check windows behaviour for the restoring after, quite awkward to achieve with the BT API. Would need to fixup flags after a + // rethrow and keep track of context pointers on the stack so if a SEH handler changes flags they can be restored in BeginContext after + // the NtContinue syscall (which will convert to an ARM64 context and back, losing these flags). + uint32_t EFlags = CTX->ReconstructCompactedEFLAGS(Thread, true, Context.X, Context.Cpsr); + EFlags &= ~((1 << FEXCore::X86State::RFLAG_DF_RAW_LOC) | (1 << FEXCore::X86State::RFLAG_TF_LOC) | (1 << FEXCore::X86State::RFLAG_AF_RAW_LOC)); + CTX->SetFlagsFromCompactedEFLAGS(Thread, EFlags); + + if (!Fault.FaultToTopAndGeneratedException) { + return Dst; + } + Fault.FaultToTopAndGeneratedException = false; + + Dst.ExceptionFlags = 0; + Dst.NumberParameters = 0; + + switch (Fault.Signal) { + case FEXCore::Core::FAULT_SIGILL: Dst.ExceptionCode = EXCEPTION_ILLEGAL_INSTRUCTION; return Dst; + case FEXCore::Core::FAULT_SIGTRAP: + switch (Fault.TrapNo) { + case FEXCore::X86State::X86_TRAPNO_DB: Dst.ExceptionCode = EXCEPTION_SINGLE_STEP; return Dst; + case FEXCore::X86State::X86_TRAPNO_BP: + Context.Pc -= 1; + Dst.ExceptionAddress = reinterpret_cast(Context.Pc); + Dst.ExceptionCode = EXCEPTION_BREAKPOINT; + Dst.NumberParameters = 1; + Dst.ExceptionInformation[0] = 0; + return Dst; + default: LogMan::Msg::EFmt("Unknown SIGTRAP trap: {}", Fault.TrapNo); break; + } + break; + case FEXCore::Core::FAULT_SIGSEGV: + switch (Fault.TrapNo) { + case FEXCore::X86State::X86_TRAPNO_GP: + if ((Fault.err_code & 0b111) == 0b010) { + switch (Fault.err_code >> 3) { + case 0x2d: + Context.Pc += 2; + Dst.ExceptionCode = EXCEPTION_BREAKPOINT; + Dst.ExceptionAddress = reinterpret_cast(Context.Pc + 1); + Dst.NumberParameters = 1; + Dst.ExceptionInformation[0] = Context.X8; // RAX + // Note that ExceptionAddress doesn't equal the reported context RIP here, this discrepancy expected and not having it can trigger anti-debug logic. + return Dst; + default: LogMan::Msg::EFmt("Unknown interrupt: 0x{:X}", Fault.err_code >> 3); break; + } + } else { + Dst.ExceptionCode = EXCEPTION_PRIV_INSTRUCTION; + return Dst; + } + break; + case FEXCore::X86State::X86_TRAPNO_OF: Dst.ExceptionCode = EXCEPTION_INT_OVERFLOW; return Dst; + default: LogMan::Msg::EFmt("Unknown SIGSEGV trap: {}", Fault.TrapNo); break; + } + break; + default: LogMan::Msg::EFmt("Unknown signal type: {}", Fault.Signal); break; + } + + // Default to SIGILL + Dst.ExceptionCode = EXCEPTION_ILLEGAL_INSTRUCTION; + return Dst; +} + static bool HandleUnalignedAccess(ARM64_NT_CONTEXT& Context) { if (!CTX->IsAddressInCodeBuffer(GetCPUArea().ThreadState(), Context.Pc)) { return false; @@ -299,7 +368,7 @@ static void RethrowGuestException(const EXCEPTION_RECORD& Rec, ARM64_NT_CONTEXT& LogMan::Msg::DFmt("Reconstructing context"); Args->Context = ReconstructPackedECContext(Context); LogMan::Msg::DFmt("pc: {:X} rip: {:X}", Context.Pc, Args->Context.Pc); - Args->Rec = *Ptrs->ExceptionRecord; + Args->Rec = HandleGuestException(Rec, Args->Context); Context.Sp = reinterpret_cast(Args); Context.Pc = KiUserExceptionDispatcher; } From dba0a1d09ea688c436ad1e2ee103cc59ca2981d0 Mon Sep 17 00:00:00 2001 From: Billy Laws Date: Fri, 12 Jul 2024 17:31:45 +0100 Subject: [PATCH 12/13] ARM64EC: Initialize x86 control registers on thread start --- Source/Windows/ARM64EC/Module.cpp | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/Source/Windows/ARM64EC/Module.cpp b/Source/Windows/ARM64EC/Module.cpp index ea835ada9..39d2da8a7 100644 --- a/Source/Windows/ARM64EC/Module.cpp +++ b/Source/Windows/ARM64EC/Module.cpp @@ -63,6 +63,10 @@ struct ThreadCPUArea { return Area->EmulatorStackBase; } + ARM64EC_NT_CONTEXT& ContextAmd64() const { + return *Area->ContextAmd64; + } + FEXCore::Core::CpuStateFrame*& StateFrame() const { return reinterpret_cast(Area->EmulatorData[0]); } @@ -572,6 +576,19 @@ NTSTATUS ThreadInit() { uint64_t EnterECFillSRA = Thread->CurrentFrame->Pointers.Common.DispatcherLoopTopEnterECFillSRA; CPUArea.DispatcherLoopTopEnterECFillSRA() = EnterECFillSRA; + CPUArea.ContextAmd64() = {.ContextFlags = CONTEXT_CONTROL | CONTEXT_INTEGER | CONTEXT_FLOATING_POINT, + .AMD64_SegCs = 0x33, + .AMD64_SegDs = 0x2b, + .AMD64_SegEs = 0x2b, + .AMD64_SegFs = 0x53, + .AMD64_SegGs = 0x2b, + .AMD64_SegSs = 0x2b, + .AMD64_EFlags = 0x202, + .AMD64_MxCsr = 0x1f80, + .AMD64_MxCsr_copy = 0x1f80, + .AMD64_ControlWord = 0x27f}; + Exception::LoadStateFromECContext(Thread, CPUArea.ContextAmd64().AMD64_Context); + { std::scoped_lock Lock(ThreadCreationMutex); Threads.emplace(GetCurrentThreadId(), Thread); From f6f8d26426ccbc02a1202baac8c0e8d0e0bcc295 Mon Sep 17 00:00:00 2001 From: Billy Laws Date: Fri, 12 Jul 2024 19:24:13 +0000 Subject: [PATCH 13/13] Update jemalloc submodule --- External/jemalloc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/External/jemalloc b/External/jemalloc index 569545241..f3149a8c3 160000 --- a/External/jemalloc +++ b/External/jemalloc @@ -1 +1 @@ -Subproject commit 569545241370457e2d14b0458b0ae9261491ea83 +Subproject commit f3149a8c3b6aaa523befdd09e34064fbad949768