mirror of
https://github.com/shadps4-emu/shadPS4.git
synced 2024-11-26 20:50:40 +00:00
macos: Remove need for TLS patch by storing TCB pointer in FS.
This commit is contained in:
parent
7551f061ad
commit
9e5047947b
@ -636,7 +636,7 @@ target_link_libraries(shadps4 PRIVATE Boost::headers GPUOpen::VulkanMemoryAlloca
|
||||
|
||||
if (APPLE)
|
||||
# Reserve system-managed memory space.
|
||||
target_link_options(shadps4 PRIVATE -Wl,-no_pie,-no_fixup_chains,-no_huge,-pagezero_size,0x400000,-segaddr,GUEST_SYSTEM,0x400000,-image_base,0x20000000000)
|
||||
target_link_options(shadps4 PRIVATE -Wl,-no_pie,-no_fixup_chains,-no_huge,-pagezero_size,0x4000,-segaddr,TCB_SPACE,0x4000,-segaddr,GUEST_SYSTEM,0x400000,-image_base,0x20000000000)
|
||||
|
||||
# Link MoltenVK for Vulkan support
|
||||
find_library(MOLTENVK MoltenVK REQUIRED)
|
||||
|
@ -499,7 +499,6 @@ static bool FilterTcbAccess(const ZydisDecodedOperand* operands) {
|
||||
|
||||
static void GenerateTcbAccess(const ZydisDecodedOperand* operands, Xbyak::CodeGenerator& c) {
|
||||
const auto dst = ZydisToXbyakRegisterOperand(operands[0]);
|
||||
const auto slot = GetTcbKey();
|
||||
|
||||
#if defined(_WIN32)
|
||||
// The following logic is based on the Kernel32.dll asm of TlsGetValue
|
||||
@ -507,6 +506,8 @@ static void GenerateTcbAccess(const ZydisDecodedOperand* operands, Xbyak::CodeGe
|
||||
static constexpr u32 TlsExpansionSlotsOffset = 0x1780;
|
||||
static constexpr u32 TlsMinimumAvailable = 64;
|
||||
|
||||
const auto slot = GetTcbKey();
|
||||
|
||||
// Load the pointer to the table of TLS slots.
|
||||
c.putSeg(gs);
|
||||
if (slot < TlsMinimumAvailable) {
|
||||
@ -520,11 +521,6 @@ static void GenerateTcbAccess(const ZydisDecodedOperand* operands, Xbyak::CodeGe
|
||||
// Load the pointer to our buffer.
|
||||
c.mov(dst, qword[dst + tls_index * sizeof(LPVOID)]);
|
||||
}
|
||||
#elif defined(__APPLE__)
|
||||
// The following logic is based on the Darwin implementation of _os_tsd_get_direct, used by
|
||||
// pthread_getspecific https://github.com/apple/darwin-xnu/blob/main/libsyscall/os/tsd.h#L89-L96
|
||||
c.putSeg(gs);
|
||||
c.mov(dst, qword[reinterpret_cast<void*>(slot * sizeof(void*))]);
|
||||
#else
|
||||
const auto src = ZydisToXbyakMemoryOperand(operands[1]);
|
||||
|
||||
@ -548,10 +544,10 @@ struct PatchInfo {
|
||||
};
|
||||
|
||||
static const std::unordered_map<ZydisMnemonic, PatchInfo> Patches = {
|
||||
#if defined(_WIN32) || defined(__APPLE__)
|
||||
// Windows and Apple need a trampoline.
|
||||
#if defined(_WIN32)
|
||||
// Windows needs a trampoline.
|
||||
{ZYDIS_MNEMONIC_MOV, {FilterTcbAccess, GenerateTcbAccess, true}},
|
||||
#else
|
||||
#elif !defined(__APPLE__)
|
||||
{ZYDIS_MNEMONIC_MOV, {FilterTcbAccess, GenerateTcbAccess, false}},
|
||||
#endif
|
||||
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include "core/libraries/kernel/threads/threads.h"
|
||||
#include "core/libraries/libs.h"
|
||||
#include "core/linker.h"
|
||||
#include "core/tls.h"
|
||||
#ifdef _WIN64
|
||||
#include <windows.h>
|
||||
#else
|
||||
@ -987,6 +988,7 @@ static void cleanup_thread(void* arg) {
|
||||
destructor(value);
|
||||
}
|
||||
}
|
||||
Core::SetTcbBase(nullptr);
|
||||
thread->is_almost_done = true;
|
||||
}
|
||||
|
||||
|
@ -106,6 +106,8 @@ void Linker::Execute() {
|
||||
RunMainEntry(m->GetEntryAddress(), &p, ProgramExitFunc);
|
||||
}
|
||||
}
|
||||
|
||||
SetTcbBase(nullptr);
|
||||
}
|
||||
|
||||
s32 Linker::LoadModule(const std::filesystem::path& elf_name, bool is_dynamic) {
|
||||
|
109
src/core/tls.cpp
109
src/core/tls.cpp
@ -9,7 +9,10 @@
|
||||
#ifdef _WIN32
|
||||
#include <windows.h>
|
||||
#elif defined(__APPLE__)
|
||||
#include <pthread.h>
|
||||
#include <architecture/i386/table.h>
|
||||
#include <boost/icl/interval_set.hpp>
|
||||
#include <i386/user_ldt.h>
|
||||
#include <sys/mman.h>
|
||||
#endif
|
||||
|
||||
namespace Core {
|
||||
@ -17,11 +20,17 @@ namespace Core {
|
||||
#ifdef _WIN32
|
||||
|
||||
static DWORD slot = 0;
|
||||
static std::once_flag slot_alloc_flag;
|
||||
|
||||
static void AllocTcbKey() {
|
||||
slot = TlsAlloc();
|
||||
}
|
||||
|
||||
u32 GetTcbKey() {
|
||||
std::call_once(slot_alloc_flag, &AllocTcbKey);
|
||||
return slot;
|
||||
}
|
||||
|
||||
void SetTcbBase(void* image_address) {
|
||||
const BOOL result = TlsSetValue(GetTcbKey(), image_address);
|
||||
ASSERT(result != 0);
|
||||
@ -33,27 +42,98 @@ Tcb* GetTcbBase() {
|
||||
|
||||
#elif defined(__APPLE__)
|
||||
|
||||
static pthread_key_t slot = 0;
|
||||
// Reserve space in the 32-bit address range for allocating TCB pages.
|
||||
asm(".zerofill TCB_SPACE,TCB_SPACE,__guest_system,0x3FC000");
|
||||
|
||||
static void AllocTcbKey() {
|
||||
ASSERT(pthread_key_create(&slot, nullptr) == 0);
|
||||
static constexpr u64 ldt_region_base = 0x4000;
|
||||
static constexpr u64 ldt_region_size = 0x3FC000;
|
||||
static constexpr u16 ldt_block_size = 0x1000;
|
||||
static constexpr u16 ldt_index_base = 8;
|
||||
static constexpr u16 ldt_index_total = (ldt_region_size - ldt_region_base) / ldt_block_size;
|
||||
|
||||
static boost::icl::interval_set<u16> free_ldts{};
|
||||
static std::mutex free_ldts_lock;
|
||||
static std::once_flag ldt_region_init_flag;
|
||||
|
||||
static u16 GetLdtIndex() {
|
||||
sel_t selector;
|
||||
asm volatile("mov %%fs, %0" : "=r"(selector));
|
||||
return selector.index;
|
||||
}
|
||||
|
||||
static void InitLdtRegion() {
|
||||
const void* result =
|
||||
mmap(reinterpret_cast<void*>(ldt_region_base), ldt_region_size, PROT_READ | PROT_WRITE,
|
||||
MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
|
||||
ASSERT_MSG(result != MAP_FAILED, "Failed to map memory region for LDT entries.");
|
||||
|
||||
free_ldts +=
|
||||
boost::icl::interval<u16>::right_open(ldt_index_base, ldt_index_base + ldt_index_total);
|
||||
}
|
||||
|
||||
static void** SetupThreadLdt() {
|
||||
std::call_once(ldt_region_init_flag, InitLdtRegion);
|
||||
|
||||
// Allocate a new LDT index for the current thread.
|
||||
u16 ldt_index;
|
||||
{
|
||||
std::unique_lock lock{free_ldts_lock};
|
||||
ASSERT_MSG(!free_ldts.empty(), "Out of LDT space.");
|
||||
ldt_index = first(*free_ldts.begin());
|
||||
free_ldts -= ldt_index;
|
||||
}
|
||||
const u64 addr = ldt_region_base + (ldt_index - ldt_index_base) * ldt_block_size;
|
||||
|
||||
// Create an LDT entry for the TCB.
|
||||
const ldt_entry ldt{.data{
|
||||
.base00 = static_cast<u16>(addr),
|
||||
.base16 = static_cast<u8>(addr >> 16),
|
||||
.base24 = static_cast<u8>(addr >> 24),
|
||||
.limit00 = static_cast<u16>(ldt_block_size - 1),
|
||||
.limit16 = 0,
|
||||
.type = DESC_DATA_WRITE,
|
||||
.dpl = 3, // User accessible
|
||||
.present = 1, // Segment present
|
||||
.stksz = DESC_DATA_32B,
|
||||
.granular = DESC_GRAN_BYTE,
|
||||
}};
|
||||
int ret = i386_set_ldt(ldt_index, &ldt, 1);
|
||||
ASSERT_MSG(ret == ldt_index,
|
||||
"Failed to set LDT for TLS area: expected {}, but syscall returned {}", ldt_index,
|
||||
ret);
|
||||
|
||||
// Set the FS segment to the created LDT.
|
||||
const sel_t sel{
|
||||
.rpl = USER_PRIV,
|
||||
.ti = SEL_LDT,
|
||||
.index = ldt_index,
|
||||
};
|
||||
asm volatile("mov %0, %%fs" ::"r"(sel));
|
||||
|
||||
return reinterpret_cast<void**>(addr);
|
||||
}
|
||||
|
||||
static void FreeThreadLdt() {
|
||||
std::unique_lock lock{free_ldts_lock};
|
||||
free_ldts += GetLdtIndex();
|
||||
}
|
||||
|
||||
void SetTcbBase(void* image_address) {
|
||||
ASSERT(pthread_setspecific(GetTcbKey(), image_address) == 0);
|
||||
if (image_address != nullptr) {
|
||||
*SetupThreadLdt() = image_address;
|
||||
} else {
|
||||
FreeThreadLdt();
|
||||
}
|
||||
}
|
||||
|
||||
Tcb* GetTcbBase() {
|
||||
return reinterpret_cast<Tcb*>(pthread_getspecific(GetTcbKey()));
|
||||
Tcb* tcb;
|
||||
asm volatile("mov %%fs:0x0, %0" : "=r"(tcb));
|
||||
return tcb;
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
// Placeholder for code compatibility.
|
||||
static constexpr u32 slot = 0;
|
||||
|
||||
static void AllocTcbKey() {}
|
||||
|
||||
void SetTcbBase(void* image_address) {
|
||||
asm volatile("wrgsbase %0" ::"r"(image_address) : "memory");
|
||||
}
|
||||
@ -66,11 +146,4 @@ Tcb* GetTcbBase() {
|
||||
|
||||
#endif
|
||||
|
||||
static std::once_flag slot_alloc_flag;
|
||||
|
||||
u32 GetTcbKey() {
|
||||
std::call_once(slot_alloc_flag, &AllocTcbKey);
|
||||
return slot;
|
||||
}
|
||||
|
||||
} // namespace Core
|
||||
|
@ -22,8 +22,10 @@ struct Tcb {
|
||||
void* tcb_thread;
|
||||
};
|
||||
|
||||
#ifdef _WIN32
|
||||
/// Gets the thread local storage key for the TCB block.
|
||||
u32 GetTcbKey();
|
||||
#endif
|
||||
|
||||
/// Sets the data pointer to the TCB block.
|
||||
void SetTcbBase(void* image_address);
|
||||
|
Loading…
Reference in New Issue
Block a user