mirror of
https://github.com/FEX-Emu/FEX.git
synced 2025-02-24 08:42:31 +00:00
Merge pull request #3171 from Sonicadvance1/merge_dispatcher
FEXCore: Merge Arm64Dispatcher in to Dispatcher
This commit is contained in:
commit
935b3a313a
@ -107,7 +107,6 @@ set (SRCS
|
|||||||
Interface/Core/X86HelperGen.cpp
|
Interface/Core/X86HelperGen.cpp
|
||||||
Interface/Core/ArchHelpers/Arm64Emitter.cpp
|
Interface/Core/ArchHelpers/Arm64Emitter.cpp
|
||||||
Interface/Core/Dispatcher/Dispatcher.cpp
|
Interface/Core/Dispatcher/Dispatcher.cpp
|
||||||
Interface/Core/Dispatcher/Arm64Dispatcher.cpp
|
|
||||||
Interface/Core/Interpreter/Fallbacks/InterpreterFallbacks.cpp
|
Interface/Core/Interpreter/Fallbacks/InterpreterFallbacks.cpp
|
||||||
Interface/Core/JIT/Arm64/JIT.cpp
|
Interface/Core/JIT/Arm64/JIT.cpp
|
||||||
Interface/Core/JIT/Arm64/ALUOps.cpp
|
Interface/Core/JIT/Arm64/ALUOps.cpp
|
||||||
|
@ -5,7 +5,6 @@
|
|||||||
#include "Interface/Core/ArchHelpers/CodeEmitter/Emitter.h"
|
#include "Interface/Core/ArchHelpers/CodeEmitter/Emitter.h"
|
||||||
#include "Interface/Core/ArchHelpers/CodeEmitter/Registers.h"
|
#include "Interface/Core/ArchHelpers/CodeEmitter/Registers.h"
|
||||||
|
|
||||||
#include "Interface/Core/Dispatcher/Dispatcher.h"
|
|
||||||
#include "Interface/Core/ObjectCache/Relocations.h"
|
#include "Interface/Core/ObjectCache/Relocations.h"
|
||||||
|
|
||||||
#include <aarch64/assembler-aarch64.h>
|
#include <aarch64/assembler-aarch64.h>
|
||||||
@ -29,6 +28,10 @@
|
|||||||
#include <utility>
|
#include <utility>
|
||||||
#include <span>
|
#include <span>
|
||||||
|
|
||||||
|
namespace FEXCore::Context {
|
||||||
|
class ContextImpl;
|
||||||
|
}
|
||||||
|
|
||||||
namespace FEXCore::CPU {
|
namespace FEXCore::CPU {
|
||||||
// Contains the address to the currently available CPU state
|
// Contains the address to the currently available CPU state
|
||||||
constexpr auto STATE = FEXCore::ARMEmitter::XReg::x28;
|
constexpr auto STATE = FEXCore::ARMEmitter::XReg::x28;
|
||||||
|
@ -319,7 +319,7 @@ namespace FEXCore::Context {
|
|||||||
}
|
}
|
||||||
|
|
||||||
DispatcherConfig.StaticRegisterAllocation = Config.StaticRegisterAllocation && BackendFeatures.SupportsStaticRegisterAllocation;
|
DispatcherConfig.StaticRegisterAllocation = Config.StaticRegisterAllocation && BackendFeatures.SupportsStaticRegisterAllocation;
|
||||||
Dispatcher = FEXCore::CPU::Dispatcher::CreateArm64(this, DispatcherConfig);
|
Dispatcher = FEXCore::CPU::Dispatcher::Create(this, DispatcherConfig);
|
||||||
|
|
||||||
// Set up the SignalDelegator config since core is initialized.
|
// Set up the SignalDelegator config since core is initialized.
|
||||||
FEXCore::SignalDelegator::SignalDelegatorConfig SignalConfig {
|
FEXCore::SignalDelegator::SignalDelegatorConfig SignalConfig {
|
||||||
|
@ -1,584 +0,0 @@
|
|||||||
// SPDX-License-Identifier: MIT
|
|
||||||
#include "Interface/Core/ArchHelpers/CodeEmitter/Emitter.h"
|
|
||||||
#include "Interface/Core/LookupCache.h"
|
|
||||||
|
|
||||||
#include "Interface/Core/Dispatcher/Arm64Dispatcher.h"
|
|
||||||
|
|
||||||
#include "Interface/Context/Context.h"
|
|
||||||
#include "Interface/Core/X86HelperGen.h"
|
|
||||||
|
|
||||||
#include <FEXCore/Core/CPUBackend.h>
|
|
||||||
#include <FEXCore/Core/CoreState.h>
|
|
||||||
#include <FEXCore/Core/X86Enums.h>
|
|
||||||
#include <FEXCore/Debug/InternalThreadState.h>
|
|
||||||
#include <FEXCore/fextl/memory.h>
|
|
||||||
#include <FEXHeaderUtils/Syscalls.h>
|
|
||||||
|
|
||||||
#include <array>
|
|
||||||
#include <bit>
|
|
||||||
#include <cmath>
|
|
||||||
#include <cstddef>
|
|
||||||
#include <cstdint>
|
|
||||||
#include <memory>
|
|
||||||
|
|
||||||
#include <aarch64/assembler-aarch64.h>
|
|
||||||
#include <aarch64/constants-aarch64.h>
|
|
||||||
#include <aarch64/cpu-aarch64.h>
|
|
||||||
#include <aarch64/operands-aarch64.h>
|
|
||||||
#include <code-buffer-vixl.h>
|
|
||||||
#include <platform-vixl.h>
|
|
||||||
|
|
||||||
#include <unistd.h>
|
|
||||||
|
|
||||||
namespace FEXCore::CPU {
|
|
||||||
|
|
||||||
constexpr size_t MAX_DISPATCHER_CODE_SIZE = 4096;
|
|
||||||
|
|
||||||
Arm64Dispatcher::Arm64Dispatcher(FEXCore::Context::ContextImpl *ctx, const DispatcherConfig &config)
|
|
||||||
: FEXCore::CPU::Dispatcher(ctx, config), Arm64Emitter(ctx, MAX_DISPATCHER_CODE_SIZE) {
|
|
||||||
EmitDispatcher();
|
|
||||||
}
|
|
||||||
|
|
||||||
void Arm64Dispatcher::EmitDispatcher() {
|
|
||||||
#ifdef VIXL_DISASSEMBLER
|
|
||||||
const auto DisasmBegin = GetCursorAddress<const vixl::aarch64::Instruction*>();
|
|
||||||
#endif
|
|
||||||
|
|
||||||
DispatchPtr = GetCursorAddress<AsmDispatch>();
|
|
||||||
|
|
||||||
// while (true) {
|
|
||||||
// Ptr = FindBlock(RIP)
|
|
||||||
// if (!Ptr)
|
|
||||||
// Ptr = CTX->CompileBlock(RIP);
|
|
||||||
//
|
|
||||||
// Ptr();
|
|
||||||
// }
|
|
||||||
|
|
||||||
ARMEmitter::ForwardLabel l_CTX;
|
|
||||||
ARMEmitter::ForwardLabel l_Sleep;
|
|
||||||
ARMEmitter::ForwardLabel l_CompileBlock;
|
|
||||||
|
|
||||||
// Push all the register we need to save
|
|
||||||
PushCalleeSavedRegisters();
|
|
||||||
|
|
||||||
// Push our memory base to the correct register
|
|
||||||
// Move our thread pointer to the correct register
|
|
||||||
// This is passed in to parameter 0 (x0)
|
|
||||||
mov(STATE, ARMEmitter::XReg::x0);
|
|
||||||
|
|
||||||
// Save this stack pointer so we can cleanly shutdown the emulation with a long jump
|
|
||||||
// regardless of where we were in the stack
|
|
||||||
add(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r0, ARMEmitter::Reg::rsp, 0);
|
|
||||||
str(ARMEmitter::XReg::x0, STATE_PTR(CpuStateFrame, ReturningStackLocation));
|
|
||||||
|
|
||||||
AbsoluteLoopTopAddressFillSRA = GetCursorAddress<uint64_t>();
|
|
||||||
|
|
||||||
if (config.StaticRegisterAllocation) {
|
|
||||||
FillStaticRegs();
|
|
||||||
}
|
|
||||||
|
|
||||||
// We want to ensure that we are 16 byte aligned at the top of this loop
|
|
||||||
Align16B();
|
|
||||||
ARMEmitter::BiDirectionalLabel FullLookup{};
|
|
||||||
ARMEmitter::BiDirectionalLabel CallBlock{};
|
|
||||||
ARMEmitter::BackwardLabel LoopTop{};
|
|
||||||
|
|
||||||
Bind(&LoopTop);
|
|
||||||
AbsoluteLoopTopAddress = GetCursorAddress<uint64_t>();
|
|
||||||
|
|
||||||
// Load in our RIP
|
|
||||||
// Don't modify x2 since it contains our RIP once the block doesn't exist
|
|
||||||
|
|
||||||
auto RipReg = ARMEmitter::XReg::x2;
|
|
||||||
ldr(RipReg, STATE_PTR(CpuStateFrame, State.rip));
|
|
||||||
|
|
||||||
// L1 Cache
|
|
||||||
ldr(ARMEmitter::XReg::x0, STATE_PTR(CpuStateFrame, Pointers.Common.L1Pointer));
|
|
||||||
|
|
||||||
and_(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r3, RipReg.R(), LookupCache::L1_ENTRIES_MASK);
|
|
||||||
add(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r0, ARMEmitter::Reg::r0, ARMEmitter::Reg::r3, ARMEmitter::ShiftType::LSL , 4);
|
|
||||||
ldp<ARMEmitter::IndexType::OFFSET>(ARMEmitter::XReg::x3, ARMEmitter::XReg::x0, ARMEmitter::Reg::r0, 0);
|
|
||||||
cmp(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r0, RipReg.R());
|
|
||||||
b(ARMEmitter::Condition::CC_NE, &FullLookup);
|
|
||||||
|
|
||||||
br(ARMEmitter::Reg::r3);
|
|
||||||
|
|
||||||
// L1C check failed, do a full lookup
|
|
||||||
Bind(&FullLookup);
|
|
||||||
|
|
||||||
// This is the block cache lookup routine
|
|
||||||
// It matches what is going on it LookupCache.h::FindBlock
|
|
||||||
ldr(ARMEmitter::XReg::x0, STATE_PTR(CpuStateFrame, Pointers.Common.L2Pointer));
|
|
||||||
|
|
||||||
// Mask the address by the virtual address size so we can check for aliases
|
|
||||||
uint64_t VirtualMemorySize = CTX->Config.VirtualMemSize;
|
|
||||||
if (std::popcount(VirtualMemorySize) == 1) {
|
|
||||||
and_(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r3, RipReg.R(), VirtualMemorySize - 1);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
LoadConstant(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r3, VirtualMemorySize);
|
|
||||||
and_(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r3, RipReg.R(), ARMEmitter::Reg::r3);
|
|
||||||
}
|
|
||||||
|
|
||||||
ARMEmitter::ForwardLabel NoBlock;
|
|
||||||
|
|
||||||
{
|
|
||||||
// Offset the address and add to our page pointer
|
|
||||||
lsr(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r1, ARMEmitter::Reg::r3, 12);
|
|
||||||
|
|
||||||
// Load the pointer from the offset
|
|
||||||
ldr(ARMEmitter::XReg::x0, ARMEmitter::Reg::r0, ARMEmitter::Reg::r1, ARMEmitter::ExtendedType::LSL_64, 3);
|
|
||||||
|
|
||||||
// If page pointer is zero then we have no block
|
|
||||||
cbz(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r0, &NoBlock);
|
|
||||||
|
|
||||||
// Steal the page offset
|
|
||||||
and_(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r1, ARMEmitter::Reg::r3, 0x0FFF);
|
|
||||||
|
|
||||||
// Shift the offset by the size of the block cache entry
|
|
||||||
add(ARMEmitter::XReg::x0, ARMEmitter::XReg::x0, ARMEmitter::XReg::x1, ARMEmitter::ShiftType::LSL, (int)log2(sizeof(FEXCore::LookupCache::LookupCacheEntry)));
|
|
||||||
|
|
||||||
// The the full LookupCacheEntry with a single LDP.
|
|
||||||
// Check the guest address first to ensure it maps to the address we are currently at.
|
|
||||||
// This fixes aliasing problems
|
|
||||||
ldp<ARMEmitter::IndexType::OFFSET>(ARMEmitter::XReg::x3, ARMEmitter::XReg::x1, ARMEmitter::Reg::r0, 0);
|
|
||||||
|
|
||||||
// If the guest address doesn't match, Compile the block.
|
|
||||||
cmp(ARMEmitter::XReg::x1, RipReg);
|
|
||||||
b(ARMEmitter::Condition::CC_NE, &NoBlock);
|
|
||||||
|
|
||||||
// Check the host address to see if it matches, else compile the block.
|
|
||||||
cbz(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r3, &NoBlock);
|
|
||||||
|
|
||||||
// If we've made it here then we have a real compiled block
|
|
||||||
{
|
|
||||||
// update L1 cache
|
|
||||||
ldr(ARMEmitter::XReg::x0, STATE_PTR(CpuStateFrame, Pointers.Common.L1Pointer));
|
|
||||||
|
|
||||||
and_(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r1, RipReg.R(), LookupCache::L1_ENTRIES_MASK);
|
|
||||||
add(ARMEmitter::XReg::x0, ARMEmitter::XReg::x0, ARMEmitter::XReg::x1, ARMEmitter::ShiftType::LSL, 4);
|
|
||||||
stp<ARMEmitter::IndexType::OFFSET>(ARMEmitter::XReg::x3, ARMEmitter::XReg::x2, ARMEmitter::Reg::r0);
|
|
||||||
|
|
||||||
// Jump to the block
|
|
||||||
br(ARMEmitter::Reg::r3);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
ThreadStopHandlerAddressSpillSRA = GetCursorAddress<uint64_t>();
|
|
||||||
if (config.StaticRegisterAllocation)
|
|
||||||
SpillStaticRegs(TMP1);
|
|
||||||
|
|
||||||
ThreadStopHandlerAddress = GetCursorAddress<uint64_t>();
|
|
||||||
|
|
||||||
PopCalleeSavedRegisters();
|
|
||||||
|
|
||||||
// Return from the function
|
|
||||||
// LR is set to the correct return location now
|
|
||||||
ret();
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
ExitFunctionLinkerAddress = GetCursorAddress<uint64_t>();
|
|
||||||
if (config.StaticRegisterAllocation)
|
|
||||||
SpillStaticRegs(TMP1);
|
|
||||||
|
|
||||||
ldr(ARMEmitter::XReg::x0, STATE, offsetof(FEXCore::Core::CPUState, DeferredSignalRefCount));
|
|
||||||
add(ARMEmitter::Size::i64Bit, ARMEmitter::XReg::x0, ARMEmitter::XReg::x0, 1);
|
|
||||||
str(ARMEmitter::XReg::x0, STATE, offsetof(FEXCore::Core::CPUState, DeferredSignalRefCount));
|
|
||||||
|
|
||||||
mov(ARMEmitter::XReg::x0, STATE);
|
|
||||||
mov(ARMEmitter::XReg::x1, ARMEmitter::XReg::lr);
|
|
||||||
|
|
||||||
ldr(ARMEmitter::XReg::x2, STATE_PTR(CpuStateFrame, Pointers.Common.ExitFunctionLink));
|
|
||||||
#ifdef VIXL_SIMULATOR
|
|
||||||
GenerateIndirectRuntimeCall<uintptr_t, void *, void *>(ARMEmitter::Reg::r2);
|
|
||||||
#else
|
|
||||||
blr(ARMEmitter::Reg::r2);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (config.StaticRegisterAllocation)
|
|
||||||
FillStaticRegs();
|
|
||||||
|
|
||||||
ldr(ARMEmitter::XReg::x1, STATE, offsetof(FEXCore::Core::CPUState, DeferredSignalRefCount));
|
|
||||||
subs(ARMEmitter::Size::i64Bit, ARMEmitter::XReg::x1, ARMEmitter::XReg::x1, 1);
|
|
||||||
str(ARMEmitter::XReg::x1, STATE, offsetof(FEXCore::Core::CPUState, DeferredSignalRefCount));
|
|
||||||
|
|
||||||
// Trigger segfault if any deferred signals are pending
|
|
||||||
ldr(ARMEmitter::XReg::x1, STATE, offsetof(FEXCore::Core::CPUState, DeferredSignalFaultAddress));
|
|
||||||
str(ARMEmitter::XReg::zr, ARMEmitter::XReg::x1, 0);
|
|
||||||
|
|
||||||
br(ARMEmitter::Reg::r0);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Need to create the block
|
|
||||||
{
|
|
||||||
Bind(&NoBlock);
|
|
||||||
|
|
||||||
if (config.StaticRegisterAllocation)
|
|
||||||
SpillStaticRegs(TMP1);
|
|
||||||
|
|
||||||
ldr(ARMEmitter::XReg::x0, STATE, offsetof(FEXCore::Core::CPUState, DeferredSignalRefCount));
|
|
||||||
add(ARMEmitter::Size::i64Bit, ARMEmitter::XReg::x0, ARMEmitter::XReg::x0, 1);
|
|
||||||
str(ARMEmitter::XReg::x0, STATE, offsetof(FEXCore::Core::CPUState, DeferredSignalRefCount));
|
|
||||||
|
|
||||||
ldr(ARMEmitter::XReg::x0, &l_CTX);
|
|
||||||
mov(ARMEmitter::XReg::x1, STATE);
|
|
||||||
ldr(ARMEmitter::XReg::x3, &l_CompileBlock);
|
|
||||||
|
|
||||||
// X2 contains our guest RIP
|
|
||||||
#ifdef VIXL_SIMULATOR
|
|
||||||
GenerateIndirectRuntimeCall<void, void *, uint64_t, void *>(ARMEmitter::Reg::r3);
|
|
||||||
#else
|
|
||||||
blr(ARMEmitter::Reg::r3); // { CTX, Frame, RIP}
|
|
||||||
#endif
|
|
||||||
|
|
||||||
if (config.StaticRegisterAllocation)
|
|
||||||
FillStaticRegs();
|
|
||||||
|
|
||||||
ldr(ARMEmitter::XReg::x0, STATE, offsetof(FEXCore::Core::CPUState, DeferredSignalRefCount));
|
|
||||||
subs(ARMEmitter::Size::i64Bit, ARMEmitter::XReg::x0, ARMEmitter::XReg::x0, 1);
|
|
||||||
str(ARMEmitter::XReg::x0, STATE, offsetof(FEXCore::Core::CPUState, DeferredSignalRefCount));
|
|
||||||
|
|
||||||
// Trigger segfault if any deferred signals are pending
|
|
||||||
ldr(TMP1, STATE, offsetof(FEXCore::Core::CPUState, DeferredSignalFaultAddress));
|
|
||||||
str(ARMEmitter::XReg::zr, TMP1, 0);
|
|
||||||
|
|
||||||
b(&LoopTop);
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
SignalHandlerReturnAddress = GetCursorAddress<uint64_t>();
|
|
||||||
|
|
||||||
// Now to get back to our old location we need to do a fault dance
|
|
||||||
// We can't use SIGTRAP here since gdb catches it and never gives it to the application!
|
|
||||||
hlt(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
SignalHandlerReturnAddressRT = GetCursorAddress<uint64_t>();
|
|
||||||
|
|
||||||
// Now to get back to our old location we need to do a fault dance
|
|
||||||
// We can't use SIGTRAP here since gdb catches it and never gives it to the application!
|
|
||||||
hlt(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
// Guest SIGILL handler
|
|
||||||
// Needs to be distinct from the SignalHandlerReturnAddress
|
|
||||||
GuestSignal_SIGILL = GetCursorAddress<uint64_t>();
|
|
||||||
|
|
||||||
if (config.StaticRegisterAllocation)
|
|
||||||
SpillStaticRegs(TMP1);
|
|
||||||
|
|
||||||
hlt(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
// Guest SIGTRAP handler
|
|
||||||
// Needs to be distinct from the SignalHandlerReturnAddress
|
|
||||||
GuestSignal_SIGTRAP = GetCursorAddress<uint64_t>();
|
|
||||||
|
|
||||||
if (config.StaticRegisterAllocation)
|
|
||||||
SpillStaticRegs(TMP1);
|
|
||||||
|
|
||||||
brk(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
// Guest Overflow handler
|
|
||||||
// Needs to be distinct from the SignalHandlerReturnAddress
|
|
||||||
GuestSignal_SIGSEGV = GetCursorAddress<uint64_t>();
|
|
||||||
|
|
||||||
if (config.StaticRegisterAllocation)
|
|
||||||
SpillStaticRegs(TMP1);
|
|
||||||
|
|
||||||
// hlt/udf = SIGILL
|
|
||||||
// brk = SIGTRAP
|
|
||||||
// ??? = SIGSEGV
|
|
||||||
// Force a SIGSEGV by loading zero
|
|
||||||
if (CTX->ExitOnHLTEnabled()) {
|
|
||||||
ldr(ARMEmitter::XReg::x0, STATE_PTR(CpuStateFrame, ReturningStackLocation));
|
|
||||||
add(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::rsp, ARMEmitter::Reg::r0, 0);
|
|
||||||
PopCalleeSavedRegisters();
|
|
||||||
ret();
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
LoadConstant(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r1, 0);
|
|
||||||
ldr(ARMEmitter::XReg::x1, ARMEmitter::Reg::r1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
ThreadPauseHandlerAddressSpillSRA = GetCursorAddress<uint64_t>();
|
|
||||||
if (config.StaticRegisterAllocation)
|
|
||||||
SpillStaticRegs(TMP1);
|
|
||||||
|
|
||||||
ThreadPauseHandlerAddress = GetCursorAddress<uint64_t>();
|
|
||||||
// We are pausing, this means the frontend should be waiting for this thread to idle
|
|
||||||
// We will have faulted and jumped to this location at this point
|
|
||||||
|
|
||||||
// Call our sleep handler
|
|
||||||
ldr(ARMEmitter::XReg::x0, &l_CTX);
|
|
||||||
mov(ARMEmitter::XReg::x1, STATE);
|
|
||||||
ldr(ARMEmitter::XReg::x2, &l_Sleep);
|
|
||||||
#ifdef VIXL_SIMULATOR
|
|
||||||
GenerateIndirectRuntimeCall<void, void *, void *>(ARMEmitter::Reg::r2);
|
|
||||||
#else
|
|
||||||
blr(ARMEmitter::Reg::r2);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
PauseReturnInstruction = GetCursorAddress<uint64_t>();
|
|
||||||
// Fault to start running again
|
|
||||||
hlt(0);
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
// The expectation here is that a thunked function needs to call back in to the JIT in a reentrant safe way
|
|
||||||
// To do this safely we need to do some state tracking and register saving
|
|
||||||
//
|
|
||||||
// eg:
|
|
||||||
// JIT Call->
|
|
||||||
// Thunk->
|
|
||||||
// Thunk callback->
|
|
||||||
//
|
|
||||||
// The thunk callback needs to execute JIT code and when it returns, it needs to safely return to the thunk rather than JIT space
|
|
||||||
// This is handled by pushing a return address trampoline to the stack so when the guest address returns it hits our custom thunk return
|
|
||||||
// - This will safely return us to the thunk
|
|
||||||
//
|
|
||||||
// On return to the thunk, the thunk can get whatever its return value is from the thread context depending on ABI handling on its end
|
|
||||||
// When the thunk itself returns, it'll do its regular return logic there
|
|
||||||
// void ReentrantCallback(FEXCore::Core::InternalThreadState *Thread, uint64_t RIP);
|
|
||||||
CallbackPtr = GetCursorAddress<JITCallback>();
|
|
||||||
|
|
||||||
// We expect the thunk to have previously pushed the registers it was using
|
|
||||||
PushCalleeSavedRegisters();
|
|
||||||
|
|
||||||
// First thing we need to move the thread state pointer back in to our register
|
|
||||||
mov(STATE, ARMEmitter::XReg::x0);
|
|
||||||
|
|
||||||
// Make sure to adjust the refcounter so we don't clear the cache now
|
|
||||||
ldr(ARMEmitter::WReg::w2, STATE_PTR(CpuStateFrame, SignalHandlerRefCounter));
|
|
||||||
add(ARMEmitter::Size::i32Bit, ARMEmitter::Reg::r2, ARMEmitter::Reg::r2, 1);
|
|
||||||
str(ARMEmitter::WReg::w2, STATE_PTR(CpuStateFrame, SignalHandlerRefCounter));
|
|
||||||
|
|
||||||
// Now push the callback return trampoline to the guest stack
|
|
||||||
// Guest will be misaligned because calling a thunk won't correct the guest's stack once we call the callback from the host
|
|
||||||
LoadConstant(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r0, CTX->X86CodeGen.CallbackReturn);
|
|
||||||
|
|
||||||
ldr(ARMEmitter::XReg::x2, STATE_PTR(CpuStateFrame, State.gregs[X86State::REG_RSP]));
|
|
||||||
sub(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r2, ARMEmitter::Reg::r2, 16);
|
|
||||||
str(ARMEmitter::XReg::x2, STATE_PTR(CpuStateFrame, State.gregs[X86State::REG_RSP]));
|
|
||||||
|
|
||||||
// Store the trampoline to the guest stack
|
|
||||||
// Guest stack is now correctly misaligned after a regular call instruction
|
|
||||||
str(ARMEmitter::XReg::x0, ARMEmitter::Reg::r2, 0);
|
|
||||||
|
|
||||||
// Store RIP to the context state
|
|
||||||
str(ARMEmitter::XReg::x1, STATE_PTR(CpuStateFrame, State.rip));
|
|
||||||
|
|
||||||
// load static regs
|
|
||||||
if (config.StaticRegisterAllocation)
|
|
||||||
FillStaticRegs();
|
|
||||||
|
|
||||||
// Now go back to the regular dispatcher loop
|
|
||||||
b(&LoopTop);
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
LUDIVHandlerAddress = GetCursorAddress<uint64_t>();
|
|
||||||
|
|
||||||
PushDynamicRegsAndLR(ARMEmitter::Reg::r3);
|
|
||||||
SpillStaticRegs(ARMEmitter::Reg::r3);
|
|
||||||
|
|
||||||
ldr(ARMEmitter::XReg::x3, STATE_PTR(CpuStateFrame, Pointers.AArch64.LUDIV));
|
|
||||||
#ifdef VIXL_SIMULATOR
|
|
||||||
GenerateIndirectRuntimeCall<uint64_t, uint64_t, uint64_t, uint64_t>(ARMEmitter::Reg::r3);
|
|
||||||
#else
|
|
||||||
blr(ARMEmitter::Reg::r3);
|
|
||||||
#endif
|
|
||||||
FillStaticRegs();
|
|
||||||
|
|
||||||
// Result is now in x0
|
|
||||||
// Fix the stack and any values that were stepped on
|
|
||||||
PopDynamicRegsAndLR();
|
|
||||||
|
|
||||||
// Go back to our code block
|
|
||||||
ret();
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
LDIVHandlerAddress = GetCursorAddress<uint64_t>();
|
|
||||||
|
|
||||||
PushDynamicRegsAndLR(ARMEmitter::Reg::r3);
|
|
||||||
SpillStaticRegs(ARMEmitter::Reg::r3);
|
|
||||||
|
|
||||||
ldr(ARMEmitter::XReg::x3, STATE_PTR(CpuStateFrame, Pointers.AArch64.LDIV));
|
|
||||||
#ifdef VIXL_SIMULATOR
|
|
||||||
GenerateIndirectRuntimeCall<uint64_t, uint64_t, uint64_t, uint64_t>(ARMEmitter::Reg::r3);
|
|
||||||
#else
|
|
||||||
blr(ARMEmitter::Reg::r3);
|
|
||||||
#endif
|
|
||||||
FillStaticRegs();
|
|
||||||
|
|
||||||
// Result is now in x0
|
|
||||||
// Fix the stack and any values that were stepped on
|
|
||||||
PopDynamicRegsAndLR();
|
|
||||||
|
|
||||||
// Go back to our code block
|
|
||||||
ret();
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
LUREMHandlerAddress = GetCursorAddress<uint64_t>();
|
|
||||||
|
|
||||||
PushDynamicRegsAndLR(ARMEmitter::Reg::r3);
|
|
||||||
SpillStaticRegs(ARMEmitter::Reg::r3);
|
|
||||||
|
|
||||||
ldr(ARMEmitter::XReg::x3, STATE_PTR(CpuStateFrame, Pointers.AArch64.LUREM));
|
|
||||||
#ifdef VIXL_SIMULATOR
|
|
||||||
GenerateIndirectRuntimeCall<uint64_t, uint64_t, uint64_t, uint64_t>(ARMEmitter::Reg::r3);
|
|
||||||
#else
|
|
||||||
blr(ARMEmitter::Reg::r3);
|
|
||||||
#endif
|
|
||||||
FillStaticRegs();
|
|
||||||
|
|
||||||
// Result is now in x0
|
|
||||||
// Fix the stack and any values that were stepped on
|
|
||||||
PopDynamicRegsAndLR();
|
|
||||||
|
|
||||||
// Go back to our code block
|
|
||||||
ret();
|
|
||||||
}
|
|
||||||
|
|
||||||
{
|
|
||||||
LREMHandlerAddress = GetCursorAddress<uint64_t>();
|
|
||||||
|
|
||||||
PushDynamicRegsAndLR(ARMEmitter::Reg::r3);
|
|
||||||
SpillStaticRegs(ARMEmitter::Reg::r3);
|
|
||||||
|
|
||||||
ldr(ARMEmitter::XReg::x3, STATE_PTR(CpuStateFrame, Pointers.AArch64.LREM));
|
|
||||||
|
|
||||||
#ifdef VIXL_SIMULATOR
|
|
||||||
GenerateIndirectRuntimeCall<uint64_t, uint64_t, uint64_t, uint64_t>(ARMEmitter::Reg::r3);
|
|
||||||
#else
|
|
||||||
blr(ARMEmitter::Reg::r3);
|
|
||||||
#endif
|
|
||||||
FillStaticRegs();
|
|
||||||
|
|
||||||
// Result is now in x0
|
|
||||||
// Fix the stack and any values that were stepped on
|
|
||||||
PopDynamicRegsAndLR();
|
|
||||||
|
|
||||||
// Go back to our code block
|
|
||||||
ret();
|
|
||||||
}
|
|
||||||
|
|
||||||
Bind(&l_CTX);
|
|
||||||
dc64(reinterpret_cast<uintptr_t>(CTX));
|
|
||||||
Bind(&l_Sleep);
|
|
||||||
dc64(reinterpret_cast<uint64_t>(SleepThread));
|
|
||||||
Bind(&l_CompileBlock);
|
|
||||||
dc64(GetCompileBlockPtr());
|
|
||||||
|
|
||||||
Start = reinterpret_cast<uint64_t>(DispatchPtr);
|
|
||||||
End = GetCursorAddress<uint64_t>();
|
|
||||||
ClearICache(reinterpret_cast<void*>(DispatchPtr), End - reinterpret_cast<uint64_t>(DispatchPtr));
|
|
||||||
|
|
||||||
if (CTX->Config.BlockJITNaming()) {
|
|
||||||
fextl::string Name = fextl::fmt::format("Dispatch_{}", FHU::Syscalls::gettid());
|
|
||||||
CTX->Symbols.RegisterNamedRegion(reinterpret_cast<void*>(DispatchPtr), End - reinterpret_cast<uint64_t>(DispatchPtr), Name);
|
|
||||||
}
|
|
||||||
if (CTX->Config.GlobalJITNaming()) {
|
|
||||||
CTX->Symbols.RegisterJITSpace(reinterpret_cast<void*>(DispatchPtr), End - reinterpret_cast<uint64_t>(DispatchPtr));
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef VIXL_DISASSEMBLER
|
|
||||||
if (Disassemble() & FEXCore::Config::Disassemble::DISPATCHER) {
|
|
||||||
const auto DisasmEnd = GetCursorAddress<const vixl::aarch64::Instruction*>();
|
|
||||||
for (auto PCToDecode = DisasmBegin; PCToDecode < DisasmEnd; PCToDecode += 4) {
|
|
||||||
DisasmDecoder->Decode(PCToDecode);
|
|
||||||
auto Output = Disasm.GetOutput();
|
|
||||||
LogMan::Msg::IFmt("{}", Output);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef VIXL_SIMULATOR
|
|
||||||
void Arm64Dispatcher::ExecuteDispatch(FEXCore::Core::CpuStateFrame *Frame) {
|
|
||||||
Simulator.WriteXRegister(0, reinterpret_cast<int64_t>(Frame));
|
|
||||||
Simulator.RunFrom(reinterpret_cast<vixl::aarch64::Instruction const*>(DispatchPtr));
|
|
||||||
}
|
|
||||||
|
|
||||||
void Arm64Dispatcher::ExecuteJITCallback(FEXCore::Core::CpuStateFrame *Frame, uint64_t RIP) {
|
|
||||||
Simulator.WriteXRegister(0, reinterpret_cast<int64_t>(Frame));
|
|
||||||
Simulator.WriteXRegister(1, RIP);
|
|
||||||
Simulator.RunFrom(reinterpret_cast<vixl::aarch64::Instruction const*>(CallbackPtr));
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
size_t Arm64Dispatcher::GenerateGDBPauseCheck(uint8_t *CodeBuffer, uint64_t GuestRIP) {
|
|
||||||
FEXCore::ARMEmitter::Emitter emit{CodeBuffer, MaxGDBPauseCheckSize};
|
|
||||||
|
|
||||||
ARMEmitter::ForwardLabel RunBlock;
|
|
||||||
|
|
||||||
// If we have a gdb server running then run in a less efficient mode that checks if we need to exit
|
|
||||||
// This happens when single stepping
|
|
||||||
|
|
||||||
static_assert(sizeof(FEXCore::Context::ContextImpl::Config.RunningMode) == 4, "This is expected to be size of 4");
|
|
||||||
emit.ldr(ARMEmitter::XReg::x0, STATE_PTR(CpuStateFrame, Thread));
|
|
||||||
emit.ldr(ARMEmitter::XReg::x0, ARMEmitter::Reg::r0, offsetof(FEXCore::Core::InternalThreadState, CTX)); // Get Context
|
|
||||||
emit.ldr(ARMEmitter::WReg::w0, ARMEmitter::Reg::r0, offsetof(FEXCore::Context::ContextImpl, Config.RunningMode));
|
|
||||||
|
|
||||||
// If the value == 0 then we don't need to stop
|
|
||||||
emit.cbz(ARMEmitter::Size::i32Bit, ARMEmitter::Reg::r0, &RunBlock);
|
|
||||||
{
|
|
||||||
ARMEmitter::ForwardLabel l_GuestRIP;
|
|
||||||
// Make sure RIP is syncronized to the context
|
|
||||||
emit.ldr(ARMEmitter::XReg::x0, &l_GuestRIP);
|
|
||||||
emit.str(ARMEmitter::XReg::x0, STATE_PTR(CpuStateFrame, State.rip));
|
|
||||||
|
|
||||||
// Stop the thread
|
|
||||||
emit.ldr(ARMEmitter::XReg::x0, STATE_PTR(CpuStateFrame, Pointers.Common.ThreadPauseHandlerSpillSRA));
|
|
||||||
emit.br(ARMEmitter::Reg::r0);
|
|
||||||
emit.Bind(&l_GuestRIP);
|
|
||||||
emit.dc64(GuestRIP);
|
|
||||||
}
|
|
||||||
emit.Bind(&RunBlock);
|
|
||||||
|
|
||||||
auto UsedBytes = emit.GetCursorOffset();
|
|
||||||
emit.ClearICache(CodeBuffer, UsedBytes);
|
|
||||||
return UsedBytes;
|
|
||||||
}
|
|
||||||
|
|
||||||
void Arm64Dispatcher::InitThreadPointers(FEXCore::Core::InternalThreadState *Thread) {
|
|
||||||
// Setup dispatcher specific pointers that need to be accessed from JIT code
|
|
||||||
{
|
|
||||||
auto &Common = Thread->CurrentFrame->Pointers.Common;
|
|
||||||
|
|
||||||
Common.DispatcherLoopTop = AbsoluteLoopTopAddress;
|
|
||||||
Common.DispatcherLoopTopFillSRA = AbsoluteLoopTopAddressFillSRA;
|
|
||||||
Common.ExitFunctionLinker = ExitFunctionLinkerAddress;
|
|
||||||
Common.ThreadStopHandlerSpillSRA = ThreadStopHandlerAddressSpillSRA;
|
|
||||||
Common.ThreadPauseHandlerSpillSRA = ThreadPauseHandlerAddressSpillSRA;
|
|
||||||
Common.GuestSignal_SIGILL = GuestSignal_SIGILL;
|
|
||||||
Common.GuestSignal_SIGTRAP = GuestSignal_SIGTRAP;
|
|
||||||
Common.GuestSignal_SIGSEGV = GuestSignal_SIGSEGV;
|
|
||||||
Common.SignalReturnHandler = SignalHandlerReturnAddress;
|
|
||||||
Common.SignalReturnHandlerRT = SignalHandlerReturnAddressRT;
|
|
||||||
|
|
||||||
auto &AArch64 = Thread->CurrentFrame->Pointers.AArch64;
|
|
||||||
AArch64.LUDIVHandler = LUDIVHandlerAddress;
|
|
||||||
AArch64.LDIVHandler = LDIVHandlerAddress;
|
|
||||||
AArch64.LUREMHandler = LUREMHandlerAddress;
|
|
||||||
AArch64.LREMHandler = LREMHandlerAddress;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fextl::unique_ptr<Dispatcher> Dispatcher::CreateArm64(FEXCore::Context::ContextImpl *CTX, const DispatcherConfig &Config) {
|
|
||||||
return fextl::make_unique<Arm64Dispatcher>(CTX, Config);
|
|
||||||
}
|
|
||||||
|
|
||||||
}
|
|
@ -1,61 +0,0 @@
|
|||||||
// SPDX-License-Identifier: MIT
|
|
||||||
#pragma once
|
|
||||||
|
|
||||||
#include "Interface/Core/ArchHelpers/Arm64Emitter.h"
|
|
||||||
#include "Interface/Core/Dispatcher/Dispatcher.h"
|
|
||||||
|
|
||||||
#ifdef VIXL_SIMULATOR
|
|
||||||
#include <aarch64/simulator-aarch64.h>
|
|
||||||
#endif
|
|
||||||
|
|
||||||
namespace FEXCore::Core {
|
|
||||||
struct InternalThreadState;
|
|
||||||
}
|
|
||||||
|
|
||||||
#define STATE_PTR(STATE_TYPE, FIELD) \
|
|
||||||
STATE.R(), offsetof(FEXCore::Core::STATE_TYPE, FIELD)
|
|
||||||
|
|
||||||
namespace FEXCore::CPU {
|
|
||||||
|
|
||||||
class Arm64Dispatcher final : public Dispatcher, public Arm64Emitter {
|
|
||||||
public:
|
|
||||||
Arm64Dispatcher(FEXCore::Context::ContextImpl *ctx, const DispatcherConfig &config);
|
|
||||||
void InitThreadPointers(FEXCore::Core::InternalThreadState *Thread) override;
|
|
||||||
size_t GenerateGDBPauseCheck(uint8_t *CodeBuffer, uint64_t GuestRIP) override;
|
|
||||||
|
|
||||||
#ifdef VIXL_SIMULATOR
|
|
||||||
void ExecuteDispatch(FEXCore::Core::CpuStateFrame *Frame) override;
|
|
||||||
void ExecuteJITCallback(FEXCore::Core::CpuStateFrame *Frame, uint64_t RIP) override;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
void EmitDispatcher();
|
|
||||||
|
|
||||||
uint16_t GetSRAGPRCount() const override {
|
|
||||||
return StaticRegisters.size();
|
|
||||||
}
|
|
||||||
|
|
||||||
uint16_t GetSRAFPRCount() const override {
|
|
||||||
return StaticFPRegisters.size();
|
|
||||||
}
|
|
||||||
|
|
||||||
void GetSRAGPRMapping(uint8_t Mapping[16]) const override {
|
|
||||||
for (size_t i = 0; i < StaticRegisters.size(); ++i) {
|
|
||||||
Mapping[i] = StaticRegisters[i].Idx();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
void GetSRAFPRMapping(uint8_t Mapping[16]) const override {
|
|
||||||
for (size_t i = 0; i < StaticFPRegisters.size(); ++i) {
|
|
||||||
Mapping[i] = StaticFPRegisters[i].Idx();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
// Long division helpers
|
|
||||||
uint64_t LUDIVHandlerAddress{};
|
|
||||||
uint64_t LDIVHandlerAddress{};
|
|
||||||
uint64_t LUREMHandlerAddress{};
|
|
||||||
uint64_t LREMHandlerAddress{};
|
|
||||||
};
|
|
||||||
|
|
||||||
}
|
|
@ -1,7 +1,9 @@
|
|||||||
// SPDX-License-Identifier: MIT
|
// SPDX-License-Identifier: MIT
|
||||||
|
|
||||||
#include "Interface/Context/Context.h"
|
#include "Interface/Context/Context.h"
|
||||||
|
#include "Interface/Core/ArchHelpers/CodeEmitter/Emitter.h"
|
||||||
#include "Interface/Core/Dispatcher/Dispatcher.h"
|
#include "Interface/Core/Dispatcher/Dispatcher.h"
|
||||||
|
#include "Interface/Core/LookupCache.h"
|
||||||
#include "Interface/Core/X86HelperGen.h"
|
#include "Interface/Core/X86HelperGen.h"
|
||||||
|
|
||||||
#include <FEXCore/Config/Config.h>
|
#include <FEXCore/Config/Config.h>
|
||||||
@ -51,4 +53,555 @@ uint64_t Dispatcher::GetCompileBlockPtr() {
|
|||||||
return CompileBlockPtr.Data;
|
return CompileBlockPtr.Data;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
constexpr size_t MAX_DISPATCHER_CODE_SIZE = 4096;
|
||||||
|
|
||||||
|
Dispatcher::Dispatcher(FEXCore::Context::ContextImpl *ctx, const DispatcherConfig &config)
|
||||||
|
: Arm64Emitter(ctx, MAX_DISPATCHER_CODE_SIZE)
|
||||||
|
, CTX {ctx}
|
||||||
|
, config {config} {
|
||||||
|
EmitDispatcher();
|
||||||
|
}
|
||||||
|
|
||||||
|
void Dispatcher::EmitDispatcher() {
|
||||||
|
#ifdef VIXL_DISASSEMBLER
|
||||||
|
const auto DisasmBegin = GetCursorAddress<const vixl::aarch64::Instruction*>();
|
||||||
|
#endif
|
||||||
|
|
||||||
|
DispatchPtr = GetCursorAddress<AsmDispatch>();
|
||||||
|
|
||||||
|
// while (true) {
|
||||||
|
// Ptr = FindBlock(RIP)
|
||||||
|
// if (!Ptr)
|
||||||
|
// Ptr = CTX->CompileBlock(RIP);
|
||||||
|
//
|
||||||
|
// Ptr();
|
||||||
|
// }
|
||||||
|
|
||||||
|
ARMEmitter::ForwardLabel l_CTX;
|
||||||
|
ARMEmitter::ForwardLabel l_Sleep;
|
||||||
|
ARMEmitter::ForwardLabel l_CompileBlock;
|
||||||
|
|
||||||
|
// Push all the register we need to save
|
||||||
|
PushCalleeSavedRegisters();
|
||||||
|
|
||||||
|
// Push our memory base to the correct register
|
||||||
|
// Move our thread pointer to the correct register
|
||||||
|
// This is passed in to parameter 0 (x0)
|
||||||
|
mov(STATE, ARMEmitter::XReg::x0);
|
||||||
|
|
||||||
|
// Save this stack pointer so we can cleanly shutdown the emulation with a long jump
|
||||||
|
// regardless of where we were in the stack
|
||||||
|
add(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r0, ARMEmitter::Reg::rsp, 0);
|
||||||
|
str(ARMEmitter::XReg::x0, STATE_PTR(CpuStateFrame, ReturningStackLocation));
|
||||||
|
|
||||||
|
AbsoluteLoopTopAddressFillSRA = GetCursorAddress<uint64_t>();
|
||||||
|
|
||||||
|
if (config.StaticRegisterAllocation) {
|
||||||
|
FillStaticRegs();
|
||||||
|
}
|
||||||
|
|
||||||
|
// We want to ensure that we are 16 byte aligned at the top of this loop
|
||||||
|
Align16B();
|
||||||
|
ARMEmitter::BiDirectionalLabel FullLookup{};
|
||||||
|
ARMEmitter::BiDirectionalLabel CallBlock{};
|
||||||
|
ARMEmitter::BackwardLabel LoopTop{};
|
||||||
|
|
||||||
|
Bind(&LoopTop);
|
||||||
|
AbsoluteLoopTopAddress = GetCursorAddress<uint64_t>();
|
||||||
|
|
||||||
|
// Load in our RIP
|
||||||
|
// Don't modify x2 since it contains our RIP once the block doesn't exist
|
||||||
|
|
||||||
|
auto RipReg = ARMEmitter::XReg::x2;
|
||||||
|
ldr(RipReg, STATE_PTR(CpuStateFrame, State.rip));
|
||||||
|
|
||||||
|
// L1 Cache
|
||||||
|
ldr(ARMEmitter::XReg::x0, STATE_PTR(CpuStateFrame, Pointers.Common.L1Pointer));
|
||||||
|
|
||||||
|
and_(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r3, RipReg.R(), LookupCache::L1_ENTRIES_MASK);
|
||||||
|
add(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r0, ARMEmitter::Reg::r0, ARMEmitter::Reg::r3, ARMEmitter::ShiftType::LSL , 4);
|
||||||
|
ldp<ARMEmitter::IndexType::OFFSET>(ARMEmitter::XReg::x3, ARMEmitter::XReg::x0, ARMEmitter::Reg::r0, 0);
|
||||||
|
cmp(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r0, RipReg.R());
|
||||||
|
b(ARMEmitter::Condition::CC_NE, &FullLookup);
|
||||||
|
|
||||||
|
br(ARMEmitter::Reg::r3);
|
||||||
|
|
||||||
|
// L1C check failed, do a full lookup
|
||||||
|
Bind(&FullLookup);
|
||||||
|
|
||||||
|
// This is the block cache lookup routine
|
||||||
|
// It matches what is going on it LookupCache.h::FindBlock
|
||||||
|
ldr(ARMEmitter::XReg::x0, STATE_PTR(CpuStateFrame, Pointers.Common.L2Pointer));
|
||||||
|
|
||||||
|
// Mask the address by the virtual address size so we can check for aliases
|
||||||
|
uint64_t VirtualMemorySize = CTX->Config.VirtualMemSize;
|
||||||
|
if (std::popcount(VirtualMemorySize) == 1) {
|
||||||
|
and_(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r3, RipReg.R(), VirtualMemorySize - 1);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
LoadConstant(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r3, VirtualMemorySize);
|
||||||
|
and_(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r3, RipReg.R(), ARMEmitter::Reg::r3);
|
||||||
|
}
|
||||||
|
|
||||||
|
ARMEmitter::ForwardLabel NoBlock;
|
||||||
|
|
||||||
|
{
|
||||||
|
// Offset the address and add to our page pointer
|
||||||
|
lsr(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r1, ARMEmitter::Reg::r3, 12);
|
||||||
|
|
||||||
|
// Load the pointer from the offset
|
||||||
|
ldr(ARMEmitter::XReg::x0, ARMEmitter::Reg::r0, ARMEmitter::Reg::r1, ARMEmitter::ExtendedType::LSL_64, 3);
|
||||||
|
|
||||||
|
// If page pointer is zero then we have no block
|
||||||
|
cbz(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r0, &NoBlock);
|
||||||
|
|
||||||
|
// Steal the page offset
|
||||||
|
and_(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r1, ARMEmitter::Reg::r3, 0x0FFF);
|
||||||
|
|
||||||
|
// Shift the offset by the size of the block cache entry
|
||||||
|
add(ARMEmitter::XReg::x0, ARMEmitter::XReg::x0, ARMEmitter::XReg::x1, ARMEmitter::ShiftType::LSL, (int)log2(sizeof(FEXCore::LookupCache::LookupCacheEntry)));
|
||||||
|
|
||||||
|
// The the full LookupCacheEntry with a single LDP.
|
||||||
|
// Check the guest address first to ensure it maps to the address we are currently at.
|
||||||
|
// This fixes aliasing problems
|
||||||
|
ldp<ARMEmitter::IndexType::OFFSET>(ARMEmitter::XReg::x3, ARMEmitter::XReg::x1, ARMEmitter::Reg::r0, 0);
|
||||||
|
|
||||||
|
// If the guest address doesn't match, Compile the block.
|
||||||
|
cmp(ARMEmitter::XReg::x1, RipReg);
|
||||||
|
b(ARMEmitter::Condition::CC_NE, &NoBlock);
|
||||||
|
|
||||||
|
// Check the host address to see if it matches, else compile the block.
|
||||||
|
cbz(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r3, &NoBlock);
|
||||||
|
|
||||||
|
// If we've made it here then we have a real compiled block
|
||||||
|
{
|
||||||
|
// update L1 cache
|
||||||
|
ldr(ARMEmitter::XReg::x0, STATE_PTR(CpuStateFrame, Pointers.Common.L1Pointer));
|
||||||
|
|
||||||
|
and_(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r1, RipReg.R(), LookupCache::L1_ENTRIES_MASK);
|
||||||
|
add(ARMEmitter::XReg::x0, ARMEmitter::XReg::x0, ARMEmitter::XReg::x1, ARMEmitter::ShiftType::LSL, 4);
|
||||||
|
stp<ARMEmitter::IndexType::OFFSET>(ARMEmitter::XReg::x3, ARMEmitter::XReg::x2, ARMEmitter::Reg::r0);
|
||||||
|
|
||||||
|
// Jump to the block
|
||||||
|
br(ARMEmitter::Reg::r3);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
ThreadStopHandlerAddressSpillSRA = GetCursorAddress<uint64_t>();
|
||||||
|
if (config.StaticRegisterAllocation)
|
||||||
|
SpillStaticRegs(TMP1);
|
||||||
|
|
||||||
|
ThreadStopHandlerAddress = GetCursorAddress<uint64_t>();
|
||||||
|
|
||||||
|
PopCalleeSavedRegisters();
|
||||||
|
|
||||||
|
// Return from the function
|
||||||
|
// LR is set to the correct return location now
|
||||||
|
ret();
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
ExitFunctionLinkerAddress = GetCursorAddress<uint64_t>();
|
||||||
|
if (config.StaticRegisterAllocation)
|
||||||
|
SpillStaticRegs(TMP1);
|
||||||
|
|
||||||
|
ldr(ARMEmitter::XReg::x0, STATE, offsetof(FEXCore::Core::CPUState, DeferredSignalRefCount));
|
||||||
|
add(ARMEmitter::Size::i64Bit, ARMEmitter::XReg::x0, ARMEmitter::XReg::x0, 1);
|
||||||
|
str(ARMEmitter::XReg::x0, STATE, offsetof(FEXCore::Core::CPUState, DeferredSignalRefCount));
|
||||||
|
|
||||||
|
mov(ARMEmitter::XReg::x0, STATE);
|
||||||
|
mov(ARMEmitter::XReg::x1, ARMEmitter::XReg::lr);
|
||||||
|
|
||||||
|
ldr(ARMEmitter::XReg::x2, STATE_PTR(CpuStateFrame, Pointers.Common.ExitFunctionLink));
|
||||||
|
#ifdef VIXL_SIMULATOR
|
||||||
|
GenerateIndirectRuntimeCall<uintptr_t, void *, void *>(ARMEmitter::Reg::r2);
|
||||||
|
#else
|
||||||
|
blr(ARMEmitter::Reg::r2);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (config.StaticRegisterAllocation)
|
||||||
|
FillStaticRegs();
|
||||||
|
|
||||||
|
ldr(ARMEmitter::XReg::x1, STATE, offsetof(FEXCore::Core::CPUState, DeferredSignalRefCount));
|
||||||
|
subs(ARMEmitter::Size::i64Bit, ARMEmitter::XReg::x1, ARMEmitter::XReg::x1, 1);
|
||||||
|
str(ARMEmitter::XReg::x1, STATE, offsetof(FEXCore::Core::CPUState, DeferredSignalRefCount));
|
||||||
|
|
||||||
|
// Trigger segfault if any deferred signals are pending
|
||||||
|
ldr(ARMEmitter::XReg::x1, STATE, offsetof(FEXCore::Core::CPUState, DeferredSignalFaultAddress));
|
||||||
|
str(ARMEmitter::XReg::zr, ARMEmitter::XReg::x1, 0);
|
||||||
|
|
||||||
|
br(ARMEmitter::Reg::r0);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Need to create the block
|
||||||
|
{
|
||||||
|
Bind(&NoBlock);
|
||||||
|
|
||||||
|
if (config.StaticRegisterAllocation)
|
||||||
|
SpillStaticRegs(TMP1);
|
||||||
|
|
||||||
|
ldr(ARMEmitter::XReg::x0, STATE, offsetof(FEXCore::Core::CPUState, DeferredSignalRefCount));
|
||||||
|
add(ARMEmitter::Size::i64Bit, ARMEmitter::XReg::x0, ARMEmitter::XReg::x0, 1);
|
||||||
|
str(ARMEmitter::XReg::x0, STATE, offsetof(FEXCore::Core::CPUState, DeferredSignalRefCount));
|
||||||
|
|
||||||
|
ldr(ARMEmitter::XReg::x0, &l_CTX);
|
||||||
|
mov(ARMEmitter::XReg::x1, STATE);
|
||||||
|
ldr(ARMEmitter::XReg::x3, &l_CompileBlock);
|
||||||
|
|
||||||
|
// X2 contains our guest RIP
|
||||||
|
#ifdef VIXL_SIMULATOR
|
||||||
|
GenerateIndirectRuntimeCall<void, void *, uint64_t, void *>(ARMEmitter::Reg::r3);
|
||||||
|
#else
|
||||||
|
blr(ARMEmitter::Reg::r3); // { CTX, Frame, RIP}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
if (config.StaticRegisterAllocation)
|
||||||
|
FillStaticRegs();
|
||||||
|
|
||||||
|
ldr(ARMEmitter::XReg::x0, STATE, offsetof(FEXCore::Core::CPUState, DeferredSignalRefCount));
|
||||||
|
subs(ARMEmitter::Size::i64Bit, ARMEmitter::XReg::x0, ARMEmitter::XReg::x0, 1);
|
||||||
|
str(ARMEmitter::XReg::x0, STATE, offsetof(FEXCore::Core::CPUState, DeferredSignalRefCount));
|
||||||
|
|
||||||
|
// Trigger segfault if any deferred signals are pending
|
||||||
|
ldr(TMP1, STATE, offsetof(FEXCore::Core::CPUState, DeferredSignalFaultAddress));
|
||||||
|
str(ARMEmitter::XReg::zr, TMP1, 0);
|
||||||
|
|
||||||
|
b(&LoopTop);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
SignalHandlerReturnAddress = GetCursorAddress<uint64_t>();
|
||||||
|
|
||||||
|
// Now to get back to our old location we need to do a fault dance
|
||||||
|
// We can't use SIGTRAP here since gdb catches it and never gives it to the application!
|
||||||
|
hlt(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
SignalHandlerReturnAddressRT = GetCursorAddress<uint64_t>();
|
||||||
|
|
||||||
|
// Now to get back to our old location we need to do a fault dance
|
||||||
|
// We can't use SIGTRAP here since gdb catches it and never gives it to the application!
|
||||||
|
hlt(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
// Guest SIGILL handler
|
||||||
|
// Needs to be distinct from the SignalHandlerReturnAddress
|
||||||
|
GuestSignal_SIGILL = GetCursorAddress<uint64_t>();
|
||||||
|
|
||||||
|
if (config.StaticRegisterAllocation)
|
||||||
|
SpillStaticRegs(TMP1);
|
||||||
|
|
||||||
|
hlt(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
// Guest SIGTRAP handler
|
||||||
|
// Needs to be distinct from the SignalHandlerReturnAddress
|
||||||
|
GuestSignal_SIGTRAP = GetCursorAddress<uint64_t>();
|
||||||
|
|
||||||
|
if (config.StaticRegisterAllocation)
|
||||||
|
SpillStaticRegs(TMP1);
|
||||||
|
|
||||||
|
brk(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
// Guest Overflow handler
|
||||||
|
// Needs to be distinct from the SignalHandlerReturnAddress
|
||||||
|
GuestSignal_SIGSEGV = GetCursorAddress<uint64_t>();
|
||||||
|
|
||||||
|
if (config.StaticRegisterAllocation)
|
||||||
|
SpillStaticRegs(TMP1);
|
||||||
|
|
||||||
|
// hlt/udf = SIGILL
|
||||||
|
// brk = SIGTRAP
|
||||||
|
// ??? = SIGSEGV
|
||||||
|
// Force a SIGSEGV by loading zero
|
||||||
|
if (CTX->ExitOnHLTEnabled()) {
|
||||||
|
ldr(ARMEmitter::XReg::x0, STATE_PTR(CpuStateFrame, ReturningStackLocation));
|
||||||
|
add(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::rsp, ARMEmitter::Reg::r0, 0);
|
||||||
|
PopCalleeSavedRegisters();
|
||||||
|
ret();
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
LoadConstant(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r1, 0);
|
||||||
|
ldr(ARMEmitter::XReg::x1, ARMEmitter::Reg::r1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
ThreadPauseHandlerAddressSpillSRA = GetCursorAddress<uint64_t>();
|
||||||
|
if (config.StaticRegisterAllocation)
|
||||||
|
SpillStaticRegs(TMP1);
|
||||||
|
|
||||||
|
ThreadPauseHandlerAddress = GetCursorAddress<uint64_t>();
|
||||||
|
// We are pausing, this means the frontend should be waiting for this thread to idle
|
||||||
|
// We will have faulted and jumped to this location at this point
|
||||||
|
|
||||||
|
// Call our sleep handler
|
||||||
|
ldr(ARMEmitter::XReg::x0, &l_CTX);
|
||||||
|
mov(ARMEmitter::XReg::x1, STATE);
|
||||||
|
ldr(ARMEmitter::XReg::x2, &l_Sleep);
|
||||||
|
#ifdef VIXL_SIMULATOR
|
||||||
|
GenerateIndirectRuntimeCall<void, void *, void *>(ARMEmitter::Reg::r2);
|
||||||
|
#else
|
||||||
|
blr(ARMEmitter::Reg::r2);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
PauseReturnInstruction = GetCursorAddress<uint64_t>();
|
||||||
|
// Fault to start running again
|
||||||
|
hlt(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
// The expectation here is that a thunked function needs to call back in to the JIT in a reentrant safe way
|
||||||
|
// To do this safely we need to do some state tracking and register saving
|
||||||
|
//
|
||||||
|
// eg:
|
||||||
|
// JIT Call->
|
||||||
|
// Thunk->
|
||||||
|
// Thunk callback->
|
||||||
|
//
|
||||||
|
// The thunk callback needs to execute JIT code and when it returns, it needs to safely return to the thunk rather than JIT space
|
||||||
|
// This is handled by pushing a return address trampoline to the stack so when the guest address returns it hits our custom thunk return
|
||||||
|
// - This will safely return us to the thunk
|
||||||
|
//
|
||||||
|
// On return to the thunk, the thunk can get whatever its return value is from the thread context depending on ABI handling on its end
|
||||||
|
// When the thunk itself returns, it'll do its regular return logic there
|
||||||
|
// void ReentrantCallback(FEXCore::Core::InternalThreadState *Thread, uint64_t RIP);
|
||||||
|
CallbackPtr = GetCursorAddress<JITCallback>();
|
||||||
|
|
||||||
|
// We expect the thunk to have previously pushed the registers it was using
|
||||||
|
PushCalleeSavedRegisters();
|
||||||
|
|
||||||
|
// First thing we need to move the thread state pointer back in to our register
|
||||||
|
mov(STATE, ARMEmitter::XReg::x0);
|
||||||
|
|
||||||
|
// Make sure to adjust the refcounter so we don't clear the cache now
|
||||||
|
ldr(ARMEmitter::WReg::w2, STATE_PTR(CpuStateFrame, SignalHandlerRefCounter));
|
||||||
|
add(ARMEmitter::Size::i32Bit, ARMEmitter::Reg::r2, ARMEmitter::Reg::r2, 1);
|
||||||
|
str(ARMEmitter::WReg::w2, STATE_PTR(CpuStateFrame, SignalHandlerRefCounter));
|
||||||
|
|
||||||
|
// Now push the callback return trampoline to the guest stack
|
||||||
|
// Guest will be misaligned because calling a thunk won't correct the guest's stack once we call the callback from the host
|
||||||
|
LoadConstant(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r0, CTX->X86CodeGen.CallbackReturn);
|
||||||
|
|
||||||
|
ldr(ARMEmitter::XReg::x2, STATE_PTR(CpuStateFrame, State.gregs[X86State::REG_RSP]));
|
||||||
|
sub(ARMEmitter::Size::i64Bit, ARMEmitter::Reg::r2, ARMEmitter::Reg::r2, 16);
|
||||||
|
str(ARMEmitter::XReg::x2, STATE_PTR(CpuStateFrame, State.gregs[X86State::REG_RSP]));
|
||||||
|
|
||||||
|
// Store the trampoline to the guest stack
|
||||||
|
// Guest stack is now correctly misaligned after a regular call instruction
|
||||||
|
str(ARMEmitter::XReg::x0, ARMEmitter::Reg::r2, 0);
|
||||||
|
|
||||||
|
// Store RIP to the context state
|
||||||
|
str(ARMEmitter::XReg::x1, STATE_PTR(CpuStateFrame, State.rip));
|
||||||
|
|
||||||
|
// load static regs
|
||||||
|
if (config.StaticRegisterAllocation)
|
||||||
|
FillStaticRegs();
|
||||||
|
|
||||||
|
// Now go back to the regular dispatcher loop
|
||||||
|
b(&LoopTop);
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
LUDIVHandlerAddress = GetCursorAddress<uint64_t>();
|
||||||
|
|
||||||
|
PushDynamicRegsAndLR(ARMEmitter::Reg::r3);
|
||||||
|
SpillStaticRegs(ARMEmitter::Reg::r3);
|
||||||
|
|
||||||
|
ldr(ARMEmitter::XReg::x3, STATE_PTR(CpuStateFrame, Pointers.AArch64.LUDIV));
|
||||||
|
#ifdef VIXL_SIMULATOR
|
||||||
|
GenerateIndirectRuntimeCall<uint64_t, uint64_t, uint64_t, uint64_t>(ARMEmitter::Reg::r3);
|
||||||
|
#else
|
||||||
|
blr(ARMEmitter::Reg::r3);
|
||||||
|
#endif
|
||||||
|
FillStaticRegs();
|
||||||
|
|
||||||
|
// Result is now in x0
|
||||||
|
// Fix the stack and any values that were stepped on
|
||||||
|
PopDynamicRegsAndLR();
|
||||||
|
|
||||||
|
// Go back to our code block
|
||||||
|
ret();
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
LDIVHandlerAddress = GetCursorAddress<uint64_t>();
|
||||||
|
|
||||||
|
PushDynamicRegsAndLR(ARMEmitter::Reg::r3);
|
||||||
|
SpillStaticRegs(ARMEmitter::Reg::r3);
|
||||||
|
|
||||||
|
ldr(ARMEmitter::XReg::x3, STATE_PTR(CpuStateFrame, Pointers.AArch64.LDIV));
|
||||||
|
#ifdef VIXL_SIMULATOR
|
||||||
|
GenerateIndirectRuntimeCall<uint64_t, uint64_t, uint64_t, uint64_t>(ARMEmitter::Reg::r3);
|
||||||
|
#else
|
||||||
|
blr(ARMEmitter::Reg::r3);
|
||||||
|
#endif
|
||||||
|
FillStaticRegs();
|
||||||
|
|
||||||
|
// Result is now in x0
|
||||||
|
// Fix the stack and any values that were stepped on
|
||||||
|
PopDynamicRegsAndLR();
|
||||||
|
|
||||||
|
// Go back to our code block
|
||||||
|
ret();
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
LUREMHandlerAddress = GetCursorAddress<uint64_t>();
|
||||||
|
|
||||||
|
PushDynamicRegsAndLR(ARMEmitter::Reg::r3);
|
||||||
|
SpillStaticRegs(ARMEmitter::Reg::r3);
|
||||||
|
|
||||||
|
ldr(ARMEmitter::XReg::x3, STATE_PTR(CpuStateFrame, Pointers.AArch64.LUREM));
|
||||||
|
#ifdef VIXL_SIMULATOR
|
||||||
|
GenerateIndirectRuntimeCall<uint64_t, uint64_t, uint64_t, uint64_t>(ARMEmitter::Reg::r3);
|
||||||
|
#else
|
||||||
|
blr(ARMEmitter::Reg::r3);
|
||||||
|
#endif
|
||||||
|
FillStaticRegs();
|
||||||
|
|
||||||
|
// Result is now in x0
|
||||||
|
// Fix the stack and any values that were stepped on
|
||||||
|
PopDynamicRegsAndLR();
|
||||||
|
|
||||||
|
// Go back to our code block
|
||||||
|
ret();
|
||||||
|
}
|
||||||
|
|
||||||
|
{
|
||||||
|
LREMHandlerAddress = GetCursorAddress<uint64_t>();
|
||||||
|
|
||||||
|
PushDynamicRegsAndLR(ARMEmitter::Reg::r3);
|
||||||
|
SpillStaticRegs(ARMEmitter::Reg::r3);
|
||||||
|
|
||||||
|
ldr(ARMEmitter::XReg::x3, STATE_PTR(CpuStateFrame, Pointers.AArch64.LREM));
|
||||||
|
|
||||||
|
#ifdef VIXL_SIMULATOR
|
||||||
|
GenerateIndirectRuntimeCall<uint64_t, uint64_t, uint64_t, uint64_t>(ARMEmitter::Reg::r3);
|
||||||
|
#else
|
||||||
|
blr(ARMEmitter::Reg::r3);
|
||||||
|
#endif
|
||||||
|
FillStaticRegs();
|
||||||
|
|
||||||
|
// Result is now in x0
|
||||||
|
// Fix the stack and any values that were stepped on
|
||||||
|
PopDynamicRegsAndLR();
|
||||||
|
|
||||||
|
// Go back to our code block
|
||||||
|
ret();
|
||||||
|
}
|
||||||
|
|
||||||
|
Bind(&l_CTX);
|
||||||
|
dc64(reinterpret_cast<uintptr_t>(CTX));
|
||||||
|
Bind(&l_Sleep);
|
||||||
|
dc64(reinterpret_cast<uint64_t>(SleepThread));
|
||||||
|
Bind(&l_CompileBlock);
|
||||||
|
dc64(GetCompileBlockPtr());
|
||||||
|
|
||||||
|
Start = reinterpret_cast<uint64_t>(DispatchPtr);
|
||||||
|
End = GetCursorAddress<uint64_t>();
|
||||||
|
ClearICache(reinterpret_cast<void*>(DispatchPtr), End - reinterpret_cast<uint64_t>(DispatchPtr));
|
||||||
|
|
||||||
|
if (CTX->Config.BlockJITNaming()) {
|
||||||
|
fextl::string Name = fextl::fmt::format("Dispatch_{}", FHU::Syscalls::gettid());
|
||||||
|
CTX->Symbols.RegisterNamedRegion(reinterpret_cast<void*>(DispatchPtr), End - reinterpret_cast<uint64_t>(DispatchPtr), Name);
|
||||||
|
}
|
||||||
|
if (CTX->Config.GlobalJITNaming()) {
|
||||||
|
CTX->Symbols.RegisterJITSpace(reinterpret_cast<void*>(DispatchPtr), End - reinterpret_cast<uint64_t>(DispatchPtr));
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef VIXL_DISASSEMBLER
|
||||||
|
if (Disassemble() & FEXCore::Config::Disassemble::DISPATCHER) {
|
||||||
|
const auto DisasmEnd = GetCursorAddress<const vixl::aarch64::Instruction*>();
|
||||||
|
for (auto PCToDecode = DisasmBegin; PCToDecode < DisasmEnd; PCToDecode += 4) {
|
||||||
|
DisasmDecoder->Decode(PCToDecode);
|
||||||
|
auto Output = Disasm.GetOutput();
|
||||||
|
LogMan::Msg::IFmt("{}", Output);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef VIXL_SIMULATOR
|
||||||
|
void Dispatcher::ExecuteDispatch(FEXCore::Core::CpuStateFrame *Frame) {
|
||||||
|
Simulator.WriteXRegister(0, reinterpret_cast<int64_t>(Frame));
|
||||||
|
Simulator.RunFrom(reinterpret_cast<vixl::aarch64::Instruction const*>(DispatchPtr));
|
||||||
|
}
|
||||||
|
|
||||||
|
void Dispatcher::ExecuteJITCallback(FEXCore::Core::CpuStateFrame *Frame, uint64_t RIP) {
|
||||||
|
Simulator.WriteXRegister(0, reinterpret_cast<int64_t>(Frame));
|
||||||
|
Simulator.WriteXRegister(1, RIP);
|
||||||
|
Simulator.RunFrom(reinterpret_cast<vixl::aarch64::Instruction const*>(CallbackPtr));
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
size_t Dispatcher::GenerateGDBPauseCheck(uint8_t *CodeBuffer, uint64_t GuestRIP) {
|
||||||
|
FEXCore::ARMEmitter::Emitter emit{CodeBuffer, MaxGDBPauseCheckSize};
|
||||||
|
|
||||||
|
ARMEmitter::ForwardLabel RunBlock;
|
||||||
|
|
||||||
|
// If we have a gdb server running then run in a less efficient mode that checks if we need to exit
|
||||||
|
// This happens when single stepping
|
||||||
|
|
||||||
|
static_assert(sizeof(FEXCore::Context::ContextImpl::Config.RunningMode) == 4, "This is expected to be size of 4");
|
||||||
|
emit.ldr(ARMEmitter::XReg::x0, STATE_PTR(CpuStateFrame, Thread));
|
||||||
|
emit.ldr(ARMEmitter::XReg::x0, ARMEmitter::Reg::r0, offsetof(FEXCore::Core::InternalThreadState, CTX)); // Get Context
|
||||||
|
emit.ldr(ARMEmitter::WReg::w0, ARMEmitter::Reg::r0, offsetof(FEXCore::Context::ContextImpl, Config.RunningMode));
|
||||||
|
|
||||||
|
// If the value == 0 then we don't need to stop
|
||||||
|
emit.cbz(ARMEmitter::Size::i32Bit, ARMEmitter::Reg::r0, &RunBlock);
|
||||||
|
{
|
||||||
|
ARMEmitter::ForwardLabel l_GuestRIP;
|
||||||
|
// Make sure RIP is syncronized to the context
|
||||||
|
emit.ldr(ARMEmitter::XReg::x0, &l_GuestRIP);
|
||||||
|
emit.str(ARMEmitter::XReg::x0, STATE_PTR(CpuStateFrame, State.rip));
|
||||||
|
|
||||||
|
// Stop the thread
|
||||||
|
emit.ldr(ARMEmitter::XReg::x0, STATE_PTR(CpuStateFrame, Pointers.Common.ThreadPauseHandlerSpillSRA));
|
||||||
|
emit.br(ARMEmitter::Reg::r0);
|
||||||
|
emit.Bind(&l_GuestRIP);
|
||||||
|
emit.dc64(GuestRIP);
|
||||||
|
}
|
||||||
|
emit.Bind(&RunBlock);
|
||||||
|
|
||||||
|
auto UsedBytes = emit.GetCursorOffset();
|
||||||
|
emit.ClearICache(CodeBuffer, UsedBytes);
|
||||||
|
return UsedBytes;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Dispatcher::InitThreadPointers(FEXCore::Core::InternalThreadState *Thread) {
|
||||||
|
// Setup dispatcher specific pointers that need to be accessed from JIT code
|
||||||
|
{
|
||||||
|
auto &Common = Thread->CurrentFrame->Pointers.Common;
|
||||||
|
|
||||||
|
Common.DispatcherLoopTop = AbsoluteLoopTopAddress;
|
||||||
|
Common.DispatcherLoopTopFillSRA = AbsoluteLoopTopAddressFillSRA;
|
||||||
|
Common.ExitFunctionLinker = ExitFunctionLinkerAddress;
|
||||||
|
Common.ThreadStopHandlerSpillSRA = ThreadStopHandlerAddressSpillSRA;
|
||||||
|
Common.ThreadPauseHandlerSpillSRA = ThreadPauseHandlerAddressSpillSRA;
|
||||||
|
Common.GuestSignal_SIGILL = GuestSignal_SIGILL;
|
||||||
|
Common.GuestSignal_SIGTRAP = GuestSignal_SIGTRAP;
|
||||||
|
Common.GuestSignal_SIGSEGV = GuestSignal_SIGSEGV;
|
||||||
|
Common.SignalReturnHandler = SignalHandlerReturnAddress;
|
||||||
|
Common.SignalReturnHandlerRT = SignalHandlerReturnAddressRT;
|
||||||
|
|
||||||
|
auto &AArch64 = Thread->CurrentFrame->Pointers.AArch64;
|
||||||
|
AArch64.LUDIVHandler = LUDIVHandlerAddress;
|
||||||
|
AArch64.LDIVHandler = LDIVHandlerAddress;
|
||||||
|
AArch64.LUREMHandler = LUREMHandlerAddress;
|
||||||
|
AArch64.LREMHandler = LREMHandlerAddress;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fextl::unique_ptr<Dispatcher> Dispatcher::Create(FEXCore::Context::ContextImpl *CTX, const DispatcherConfig &Config) {
|
||||||
|
return fextl::make_unique<Dispatcher>(CTX, Config);
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -1,9 +1,15 @@
|
|||||||
// SPDX-License-Identifier: MIT
|
// SPDX-License-Identifier: MIT
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include "Interface/Core/ArchHelpers/Arm64Emitter.h"
|
||||||
|
|
||||||
#include <FEXCore/Core/CPUBackend.h>
|
#include <FEXCore/Core/CPUBackend.h>
|
||||||
#include <FEXCore/fextl/memory.h>
|
#include <FEXCore/fextl/memory.h>
|
||||||
|
|
||||||
|
#ifdef VIXL_SIMULATOR
|
||||||
|
#include <aarch64/simulator-aarch64.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <signal.h>
|
#include <signal.h>
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
@ -29,9 +35,15 @@ struct DispatcherConfig {
|
|||||||
bool StaticRegisterAllocation = false;
|
bool StaticRegisterAllocation = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
class Dispatcher {
|
#define STATE_PTR(STATE_TYPE, FIELD) \
|
||||||
|
STATE.R(), offsetof(FEXCore::Core::STATE_TYPE, FIELD)
|
||||||
|
|
||||||
|
class Dispatcher final : public Arm64Emitter {
|
||||||
public:
|
public:
|
||||||
virtual ~Dispatcher() = default;
|
static fextl::unique_ptr<Dispatcher> Create(FEXCore::Context::ContextImpl *CTX, const DispatcherConfig &Config);
|
||||||
|
|
||||||
|
Dispatcher(FEXCore::Context::ContextImpl *ctx, const DispatcherConfig &Config);
|
||||||
|
~Dispatcher() = default;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* @name Dispatch Helper functions
|
* @name Dispatch Helper functions
|
||||||
@ -57,46 +69,49 @@ public:
|
|||||||
uint64_t Start{};
|
uint64_t Start{};
|
||||||
uint64_t End{};
|
uint64_t End{};
|
||||||
|
|
||||||
virtual void InitThreadPointers(FEXCore::Core::InternalThreadState *Thread) = 0;
|
void InitThreadPointers(FEXCore::Core::InternalThreadState *Thread);
|
||||||
|
|
||||||
// These are across all arches for now
|
// These are across all arches for now
|
||||||
static constexpr size_t MaxGDBPauseCheckSize = 128;
|
static constexpr size_t MaxGDBPauseCheckSize = 128;
|
||||||
|
|
||||||
virtual size_t GenerateGDBPauseCheck(uint8_t *CodeBuffer, uint64_t GuestRIP) = 0;
|
size_t GenerateGDBPauseCheck(uint8_t *CodeBuffer, uint64_t GuestRIP);
|
||||||
|
|
||||||
static fextl::unique_ptr<Dispatcher> CreateX86(FEXCore::Context::ContextImpl *CTX, const DispatcherConfig &Config);
|
#ifdef VIXL_SIMULATOR
|
||||||
static fextl::unique_ptr<Dispatcher> CreateArm64(FEXCore::Context::ContextImpl *CTX, const DispatcherConfig &Config);
|
void ExecuteDispatch(FEXCore::Core::CpuStateFrame *Frame) ;
|
||||||
|
void ExecuteJITCallback(FEXCore::Core::CpuStateFrame *Frame, uint64_t RIP);
|
||||||
virtual void ExecuteDispatch(FEXCore::Core::CpuStateFrame *Frame) {
|
#else
|
||||||
|
void ExecuteDispatch(FEXCore::Core::CpuStateFrame *Frame) {
|
||||||
DispatchPtr(Frame);
|
DispatchPtr(Frame);
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual void ExecuteJITCallback(FEXCore::Core::CpuStateFrame *Frame, uint64_t RIP) {
|
void ExecuteJITCallback(FEXCore::Core::CpuStateFrame *Frame, uint64_t RIP) {
|
||||||
CallbackPtr(Frame, RIP);
|
CallbackPtr(Frame, RIP);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
virtual uint16_t GetSRAGPRCount() const {
|
uint16_t GetSRAGPRCount() const {
|
||||||
return 0U;
|
return StaticRegisters.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual uint16_t GetSRAFPRCount() const {
|
uint16_t GetSRAFPRCount() const {
|
||||||
return 0U;
|
return StaticFPRegisters.size();
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual void GetSRAGPRMapping(uint8_t Mapping[16]) const {
|
void GetSRAGPRMapping(uint8_t Mapping[16]) const {
|
||||||
|
for (size_t i = 0; i < StaticRegisters.size(); ++i) {
|
||||||
|
Mapping[i] = StaticRegisters[i].Idx();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
virtual void GetSRAFPRMapping(uint8_t Mapping[16]) const {
|
void GetSRAFPRMapping(uint8_t Mapping[16]) const {
|
||||||
|
for (size_t i = 0; i < StaticFPRegisters.size(); ++i) {
|
||||||
|
Mapping[i] = StaticFPRegisters[i].Idx();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const DispatcherConfig& GetConfig() const { return config; }
|
const DispatcherConfig& GetConfig() const { return config; }
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
Dispatcher(FEXCore::Context::ContextImpl *ctx, const DispatcherConfig &Config)
|
|
||||||
: CTX {ctx}
|
|
||||||
, config {Config}
|
|
||||||
{}
|
|
||||||
|
|
||||||
FEXCore::Context::ContextImpl *CTX;
|
FEXCore::Context::ContextImpl *CTX;
|
||||||
DispatcherConfig config;
|
DispatcherConfig config;
|
||||||
|
|
||||||
@ -109,6 +124,14 @@ protected:
|
|||||||
|
|
||||||
AsmDispatch DispatchPtr;
|
AsmDispatch DispatchPtr;
|
||||||
JITCallback CallbackPtr;
|
JITCallback CallbackPtr;
|
||||||
|
private:
|
||||||
|
// Long division helpers
|
||||||
|
uint64_t LUDIVHandlerAddress{};
|
||||||
|
uint64_t LDIVHandlerAddress{};
|
||||||
|
uint64_t LUREMHandlerAddress{};
|
||||||
|
uint64_t LREMHandlerAddress{};
|
||||||
|
|
||||||
|
void EmitDispatcher();
|
||||||
};
|
};
|
||||||
|
|
||||||
}
|
}
|
||||||
|
@ -7,7 +7,7 @@ $end_info$
|
|||||||
|
|
||||||
#include "Interface/Context/Context.h"
|
#include "Interface/Context/Context.h"
|
||||||
#include "Interface/Core/ArchHelpers/CodeEmitter/Emitter.h"
|
#include "Interface/Core/ArchHelpers/CodeEmitter/Emitter.h"
|
||||||
#include "Interface/Core/Dispatcher/Arm64Dispatcher.h"
|
#include "Interface/Core/Dispatcher/Dispatcher.h"
|
||||||
#include "Interface/Core/JIT/Arm64/JITClass.h"
|
#include "Interface/Core/JIT/Arm64/JITClass.h"
|
||||||
|
|
||||||
namespace FEXCore::CPU {
|
namespace FEXCore::CPU {
|
||||||
|
@ -16,7 +16,7 @@ $end_info$
|
|||||||
#include "Interface/Core/ArchHelpers/CodeEmitter/Emitter.h"
|
#include "Interface/Core/ArchHelpers/CodeEmitter/Emitter.h"
|
||||||
#include "Interface/Core/LookupCache.h"
|
#include "Interface/Core/LookupCache.h"
|
||||||
|
|
||||||
#include "Interface/Core/Dispatcher/Arm64Dispatcher.h"
|
#include "Interface/Core/Dispatcher/Dispatcher.h"
|
||||||
#include "Interface/Core/JIT/Arm64/JITClass.h"
|
#include "Interface/Core/JIT/Arm64/JITClass.h"
|
||||||
#include "Interface/Core/InternalThreadState.h"
|
#include "Interface/Core/InternalThreadState.h"
|
||||||
|
|
||||||
|
@ -7,7 +7,7 @@ $end_info$
|
|||||||
|
|
||||||
#include "Interface/Core/ArchHelpers/CodeEmitter/Emitter.h"
|
#include "Interface/Core/ArchHelpers/CodeEmitter/Emitter.h"
|
||||||
#include "Interface/Core/ArchHelpers/CodeEmitter/Registers.h"
|
#include "Interface/Core/ArchHelpers/CodeEmitter/Registers.h"
|
||||||
#include "Interface/Core/Dispatcher/Arm64Dispatcher.h"
|
#include "Interface/Core/Dispatcher/Dispatcher.h"
|
||||||
#include "Interface/Core/JIT/Arm64/JITClass.h"
|
#include "Interface/Core/JIT/Arm64/JITClass.h"
|
||||||
|
|
||||||
#include <FEXCore/Utils/MathUtils.h>
|
#include <FEXCore/Utils/MathUtils.h>
|
||||||
|
Loading…
x
Reference in New Issue
Block a user