core/memory: Pooled memory implementation (#1085)

This commit is contained in:
Daniel R. 2024-09-29 09:28:41 +02:00 committed by GitHub
parent 5e98a3e1d8
commit 80bf46da4c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
12 changed files with 301 additions and 11 deletions

View File

@ -28,4 +28,16 @@ template <typename T>
return (value & 0x3FFF) == 0;
}
template <typename T>
requires std::is_integral_v<T>
[[nodiscard]] constexpr bool Is64KBAligned(T value) {
return (value & 0xFFFF) == 0;
}
template <typename T>
requires std::is_integral_v<T>
[[nodiscard]] constexpr bool Is2MBAligned(T value) {
return (value & 0x1FFFFF) == 0;
}
} // namespace Common

View File

@ -290,7 +290,8 @@ int PS4_SYSV_ABI sceKernelMkdir(const char* path, u16 mode) {
}
// CUSA02456: path = /aotl after sceSaveDataMount(mode = 1)
if (dir_name.empty() || !std::filesystem::create_directory(dir_name)) {
std::error_code ec;
if (dir_name.empty() || !std::filesystem::create_directory(dir_name, ec)) {
return SCE_KERNEL_ERROR_EIO;
}

View File

@ -56,7 +56,7 @@ void KernelSignalRequest() {
}
static void KernelServiceThread(std::stop_token stoken) {
Common::SetCurrentThreadName("Kernel_ServiceThread");
Common::SetCurrentThreadName("shadPS4:Kernel_ServiceThread");
while (!stoken.stop_requested()) {
HLE_TRACE;
@ -186,6 +186,16 @@ void* PS4_SYSV_ABI posix_mmap(void* addr, u64 len, int prot, int flags, int fd,
return ptr;
}
s32 PS4_SYSV_ABI sceKernelConfiguredFlexibleMemorySize(u64* sizeOut) {
if (sizeOut == nullptr) {
return ORBIS_KERNEL_ERROR_EINVAL;
}
auto* memory = Core::Memory::Instance();
*sizeOut = memory->GetTotalFlexibleSize();
return ORBIS_OK;
}
static uint64_t g_mspace_atomic_id_mask = 0;
static uint64_t g_mstate_table[64] = {0};
@ -403,10 +413,12 @@ void LibKernel_Register(Core::Loader::SymbolsResolver* sym) {
// obj
LIB_OBJ("f7uOxY9mM1U", "libkernel", 1, "libkernel", 1, 1, &g_stack_chk_guard);
// misc
LIB_FUNCTION("JGfTMBOdUJo", "libkernel", 1, "libkernel", 1, 1, sceKernelGetFsSandboxRandomWord);
LIB_FUNCTION("XVL8So3QJUk", "libkernel", 1, "libkernel", 1, 1, posix_connect);
LIB_FUNCTION("6xVpy0Fdq+I", "libkernel", 1, "libkernel", 1, 1, _sigprocmask);
// memory
LIB_FUNCTION("OMDRKKAZ8I4", "libkernel", 1, "libkernel", 1, 1, sceKernelDebugRaiseException);
LIB_FUNCTION("rTXw65xmLIA", "libkernel", 1, "libkernel", 1, 1, sceKernelAllocateDirectMemory);
@ -443,6 +455,14 @@ void LibKernel_Register(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("2SKEx6bSq-4", "libkernel", 1, "libkernel", 1, 1, sceKernelBatchMap);
LIB_FUNCTION("kBJzF8x4SyE", "libkernel", 1, "libkernel", 1, 1, sceKernelBatchMap2);
LIB_FUNCTION("DGMG3JshrZU", "libkernel", 1, "libkernel", 1, 1, sceKernelSetVirtualRangeName);
LIB_FUNCTION("n1-v6FgU7MQ", "libkernel", 1, "libkernel", 1, 1,
sceKernelConfiguredFlexibleMemorySize);
// Memory pool
LIB_FUNCTION("qCSfqDILlns", "libkernel", 1, "libkernel", 1, 1, sceKernelMemoryPoolExpand);
LIB_FUNCTION("pU-QydtGcGY", "libkernel", 1, "libkernel", 1, 1, sceKernelMemoryPoolReserve);
LIB_FUNCTION("Vzl66WmfLvk", "libkernel", 1, "libkernel", 1, 1, sceKernelMemoryPoolCommit);
LIB_FUNCTION("LXo1tpFqJGs", "libkernel", 1, "libkernel", 1, 1, sceKernelMemoryPoolDecommit);
// equeue
LIB_FUNCTION("D0OdFMjp46I", "libkernel", 1, "libkernel", 1, 1, sceKernelCreateEqueue);

View File

@ -347,4 +347,102 @@ s32 PS4_SYSV_ABI sceKernelSetVirtualRangeName(const void* addr, size_t len, cons
memory->NameVirtualRange(std::bit_cast<VAddr>(addr), len, name);
return ORBIS_OK;
}
s32 PS4_SYSV_ABI sceKernelMemoryPoolExpand(u64 searchStart, u64 searchEnd, size_t len,
size_t alignment, u64* physAddrOut) {
if (searchStart < 0 || searchEnd <= searchStart) {
LOG_ERROR(Kernel_Vmm, "Provided address range is invalid!");
return SCE_KERNEL_ERROR_EINVAL;
}
const bool is_in_range = searchEnd - searchStart >= len;
if (len <= 0 || !Common::Is64KBAligned(len) || !is_in_range) {
LOG_ERROR(Kernel_Vmm, "Provided address range is invalid!");
return SCE_KERNEL_ERROR_EINVAL;
}
if (alignment != 0 && !Common::Is64KBAligned(alignment)) {
LOG_ERROR(Kernel_Vmm, "Alignment value is invalid!");
return SCE_KERNEL_ERROR_EINVAL;
}
if (physAddrOut == nullptr) {
LOG_ERROR(Kernel_Vmm, "Result physical address pointer is null!");
return SCE_KERNEL_ERROR_EINVAL;
}
auto* memory = Core::Memory::Instance();
PAddr phys_addr = memory->PoolExpand(searchStart, searchEnd, len, alignment);
*physAddrOut = static_cast<s64>(phys_addr);
LOG_INFO(Kernel_Vmm,
"searchStart = {:#x}, searchEnd = {:#x}, len = {:#x}, alignment = {:#x}, physAddrOut "
"= {:#x}",
searchStart, searchEnd, len, alignment, phys_addr);
return ORBIS_OK;
}
s32 PS4_SYSV_ABI sceKernelMemoryPoolReserve(void* addrIn, size_t len, size_t alignment, int flags,
void** addrOut) {
LOG_INFO(Kernel_Vmm, "addrIn = {}, len = {:#x}, alignment = {:#x}, flags = {:#x}",
fmt::ptr(addrIn), len, alignment, flags);
if (addrIn == nullptr) {
LOG_ERROR(Kernel_Vmm, "Address is invalid!");
return SCE_KERNEL_ERROR_EINVAL;
}
if (len == 0 || !Common::Is2MBAligned(len)) {
LOG_ERROR(Kernel_Vmm, "Map size is either zero or not 2MB aligned!");
return SCE_KERNEL_ERROR_EINVAL;
}
if (alignment != 0) {
if ((!std::has_single_bit(alignment) && !Common::Is2MBAligned(alignment))) {
LOG_ERROR(Kernel_Vmm, "Alignment value is invalid!");
return SCE_KERNEL_ERROR_EINVAL;
}
}
auto* memory = Core::Memory::Instance();
const VAddr in_addr = reinterpret_cast<VAddr>(addrIn);
const auto map_flags = static_cast<Core::MemoryMapFlags>(flags);
memory->PoolReserve(addrOut, in_addr, len, map_flags, alignment);
return ORBIS_OK;
}
s32 PS4_SYSV_ABI sceKernelMemoryPoolCommit(void* addr, size_t len, int type, int prot, int flags) {
if (addr == nullptr) {
LOG_ERROR(Kernel_Vmm, "Address is invalid!");
return SCE_KERNEL_ERROR_EINVAL;
}
if (len == 0 || !Common::Is64KBAligned(len)) {
LOG_ERROR(Kernel_Vmm, "Map size is either zero or not 64KB aligned!");
return SCE_KERNEL_ERROR_EINVAL;
}
LOG_INFO(Kernel_Vmm, "addr = {}, len = {:#x}, type = {:#x}, prot = {:#x}, flags = {:#x}",
fmt::ptr(addr), len, type, prot, flags);
const VAddr in_addr = reinterpret_cast<VAddr>(addr);
const auto mem_prot = static_cast<Core::MemoryProt>(prot);
auto* memory = Core::Memory::Instance();
return memory->PoolCommit(in_addr, len, mem_prot);
}
s32 PS4_SYSV_ABI sceKernelMemoryPoolDecommit(void* addr, size_t len, int flags) {
if (addr == nullptr) {
LOG_ERROR(Kernel_Vmm, "Address is invalid!");
return SCE_KERNEL_ERROR_EINVAL;
}
if (len == 0 || !Common::Is64KBAligned(len)) {
LOG_ERROR(Kernel_Vmm, "Map size is either zero or not 64KB aligned!");
return SCE_KERNEL_ERROR_EINVAL;
}
LOG_INFO(Kernel_Vmm, "addr = {}, len = {:#x}, flags = {:#x}", fmt::ptr(addr), len, flags);
const VAddr pool_addr = reinterpret_cast<VAddr>(addr);
auto* memory = Core::Memory::Instance();
memory->PoolDecommit(pool_addr, len);
return ORBIS_OK;
}
} // namespace Libraries::Kernel

View File

@ -114,4 +114,11 @@ s32 PS4_SYSV_ABI sceKernelBatchMap2(OrbisKernelBatchMapEntry* entries, int numEn
s32 PS4_SYSV_ABI sceKernelSetVirtualRangeName(const void* addr, size_t len, const char* name);
s32 PS4_SYSV_ABI sceKernelMemoryPoolExpand(u64 searchStart, u64 searchEnd, size_t len,
size_t alignment, u64* physAddrOut);
s32 PS4_SYSV_ABI sceKernelMemoryPoolReserve(void* addrIn, size_t len, size_t alignment, int flags,
void** addrOut);
s32 PS4_SYSV_ABI sceKernelMemoryPoolCommit(void* addr, size_t len, int type, int prot, int flags);
s32 PS4_SYSV_ABI sceKernelMemoryPoolDecommit(void* addr, size_t len, int flags);
} // namespace Libraries::Kernel

View File

@ -414,6 +414,7 @@ ScePthreadMutex* createMutex(ScePthreadMutex* addr) {
if (addr == nullptr || *addr != nullptr) {
return addr;
}
const VAddr vaddr = reinterpret_cast<VAddr>(addr);
std::string name = fmt::format("mutex{:#x}", vaddr);
scePthreadMutexInit(addr, nullptr, name.c_str());
@ -515,9 +516,12 @@ int PS4_SYSV_ABI scePthreadMutexattrSettype(ScePthreadMutexattr* attr, int type)
ptype = PTHREAD_MUTEX_RECURSIVE;
break;
case ORBIS_PTHREAD_MUTEX_NORMAL:
case ORBIS_PTHREAD_MUTEX_ADAPTIVE:
ptype = PTHREAD_MUTEX_NORMAL;
break;
case ORBIS_PTHREAD_MUTEX_ADAPTIVE:
LOG_ERROR(Kernel_Pthread, "Unimplemented adaptive mutex");
ptype = PTHREAD_MUTEX_ERRORCHECK;
break;
default:
return SCE_KERNEL_ERROR_EINVAL;
}
@ -1620,6 +1624,10 @@ void pthreadSymbolsRegister(Core::Loader::SymbolsResolver* sym) {
LIB_FUNCTION("upoVrzMHFeE", "libkernel", 1, "libkernel", 1, 1, scePthreadMutexTrylock);
LIB_FUNCTION("IafI2PxcPnQ", "libkernel", 1, "libkernel", 1, 1, scePthreadMutexTimedlock);
// scePthreadMutexInitForInternalLibc, scePthreadMutexattrInitForInternalLibc
LIB_FUNCTION("qH1gXoq71RY", "libkernel", 1, "libkernel", 1, 1, scePthreadMutexInit);
LIB_FUNCTION("n2MMpvU8igI", "libkernel", 1, "libkernel", 1, 1, scePthreadMutexattrInit);
// cond calls
LIB_FUNCTION("2Tb92quprl0", "libkernel", 1, "libkernel", 1, 1, scePthreadCondInit);
LIB_FUNCTION("m5-2bsNfv7s", "libkernel", 1, "libkernel", 1, 1, scePthreadCondattrInit);

View File

@ -79,7 +79,7 @@ static void backup(const std::filesystem::path& dir_name) {
}
static void BackupThreadBody() {
Common::SetCurrentThreadName("SaveData_BackupThread");
Common::SetCurrentThreadName("shadPS4:SaveData_BackupThread");
while (g_backup_status != WorkerStatus::Stopping) {
g_backup_status = WorkerStatus::Waiting;

View File

@ -66,7 +66,7 @@ static void SaveFileSafe(void* buf, size_t count, const std::filesystem::path& p
}
[[noreturn]] void SaveThreadLoop() {
Common::SetCurrentThreadName("SaveData_SaveDataMemoryThread");
Common::SetCurrentThreadName("shadPS4:SaveData_SaveDataMemoryThread");
std::mutex mtx;
while (true) {
{

View File

@ -260,7 +260,7 @@ void VideoOutDriver::PresentThread(std::stop_token token) {
static constexpr std::chrono::nanoseconds VblankPeriod{16666667};
const auto vblank_period = VblankPeriod / Config::vblankDiv();
Common::SetCurrentThreadName("PresentThread");
Common::SetCurrentThreadName("shadPS4:PresentThread");
Common::SetCurrentThreadRealtime(vblank_period);
Common::AccurateTimer timer{vblank_period};

View File

@ -51,6 +51,35 @@ void MemoryManager::SetupMemoryRegions(u64 flexible_size) {
total_flexible_size, total_direct_size);
}
PAddr MemoryManager::PoolExpand(PAddr search_start, PAddr search_end, size_t size, u64 alignment) {
std::scoped_lock lk{mutex};
auto dmem_area = FindDmemArea(search_start);
const auto is_suitable = [&] {
const auto aligned_base = alignment > 0 ? Common::AlignUp(dmem_area->second.base, alignment)
: dmem_area->second.base;
const auto alignment_size = aligned_base - dmem_area->second.base;
const auto remaining_size =
dmem_area->second.size >= alignment_size ? dmem_area->second.size - alignment_size : 0;
return dmem_area->second.is_free && remaining_size >= size;
};
while (!is_suitable() && dmem_area->second.GetEnd() <= search_end) {
dmem_area++;
}
ASSERT_MSG(is_suitable(), "Unable to find free direct memory area: size = {:#x}", size);
// Align free position
PAddr free_addr = dmem_area->second.base;
free_addr = alignment > 0 ? Common::AlignUp(free_addr, alignment) : free_addr;
// Add the allocated region to the list and commit its pages.
auto& area = CarveDmemArea(free_addr, size)->second;
area.is_free = false;
area.is_pooled = true;
return free_addr;
}
PAddr MemoryManager::Allocate(PAddr search_start, PAddr search_end, size_t size, u64 alignment,
int memory_type) {
std::scoped_lock lk{mutex};
@ -112,6 +141,43 @@ void MemoryManager::Free(PAddr phys_addr, size_t size) {
MergeAdjacent(dmem_map, dmem_area);
}
int MemoryManager::PoolReserve(void** out_addr, VAddr virtual_addr, size_t size,
MemoryMapFlags flags, u64 alignment) {
std::scoped_lock lk{mutex};
virtual_addr = (virtual_addr == 0) ? impl.SystemManagedVirtualBase() : virtual_addr;
alignment = alignment > 0 ? alignment : 2_MB;
VAddr mapped_addr = alignment > 0 ? Common::AlignUp(virtual_addr, alignment) : virtual_addr;
// Fixed mapping means the virtual address must exactly match the provided one.
if (True(flags & MemoryMapFlags::Fixed)) {
const auto& vma = FindVMA(mapped_addr)->second;
// If the VMA is mapped, unmap the region first.
if (vma.IsMapped()) {
UnmapMemoryImpl(mapped_addr, size);
}
const size_t remaining_size = vma.base + vma.size - mapped_addr;
ASSERT_MSG(vma.type == VMAType::Free && remaining_size >= size);
}
// Find the first free area starting with provided virtual address.
if (False(flags & MemoryMapFlags::Fixed)) {
mapped_addr = SearchFree(mapped_addr, size, alignment);
}
// Add virtual memory area
const auto new_vma_handle = CarveVMA(mapped_addr, size);
auto& new_vma = new_vma_handle->second;
new_vma.disallow_merge = True(flags & MemoryMapFlags::NoCoalesce);
new_vma.prot = MemoryProt::NoAccess;
new_vma.name = "";
new_vma.type = VMAType::PoolReserved;
MergeAdjacent(vma_map, new_vma_handle);
*out_addr = std::bit_cast<void*>(mapped_addr);
return ORBIS_OK;
}
int MemoryManager::Reserve(void** out_addr, VAddr virtual_addr, size_t size, MemoryMapFlags flags,
u64 alignment) {
std::scoped_lock lk{mutex};
@ -149,6 +215,36 @@ int MemoryManager::Reserve(void** out_addr, VAddr virtual_addr, size_t size, Mem
return ORBIS_OK;
}
int MemoryManager::PoolCommit(VAddr virtual_addr, size_t size, MemoryProt prot) {
std::scoped_lock lk{mutex};
const u64 alignment = 64_KB;
// When virtual addr is zero, force it to virtual_base. The guest cannot pass Fixed
// flag so we will take the branch that searches for free (or reserved) mappings.
virtual_addr = (virtual_addr == 0) ? impl.SystemManagedVirtualBase() : virtual_addr;
VAddr mapped_addr = Common::AlignUp(virtual_addr, alignment);
// This should return SCE_KERNEL_ERROR_ENOMEM but shouldn't normally happen.
const auto& vma = FindVMA(mapped_addr)->second;
const size_t remaining_size = vma.base + vma.size - mapped_addr;
ASSERT_MSG(!vma.IsMapped() && remaining_size >= size);
// Perform the mapping.
void* out_addr = impl.Map(mapped_addr, size, alignment, -1, false);
TRACK_ALLOC(out_addr, size, "VMEM");
auto& new_vma = CarveVMA(mapped_addr, size)->second;
new_vma.disallow_merge = false;
new_vma.prot = prot;
new_vma.name = "";
new_vma.type = Core::VMAType::Pooled;
new_vma.is_exec = false;
new_vma.phys_base = 0;
return ORBIS_OK;
}
int MemoryManager::MapMemory(void** out_addr, VAddr virtual_addr, size_t size, MemoryProt prot,
MemoryMapFlags flags, VMAType type, std::string_view name,
bool is_exec, PAddr phys_addr, u64 alignment) {
@ -232,6 +328,39 @@ int MemoryManager::MapFile(void** out_addr, VAddr virtual_addr, size_t size, Mem
return ORBIS_OK;
}
void MemoryManager::PoolDecommit(VAddr virtual_addr, size_t size) {
std::scoped_lock lk{mutex};
const auto it = FindVMA(virtual_addr);
const auto& vma_base = it->second;
ASSERT_MSG(vma_base.Contains(virtual_addr, size),
"Existing mapping does not contain requested unmap range");
const auto vma_base_addr = vma_base.base;
const auto vma_base_size = vma_base.size;
const auto phys_base = vma_base.phys_base;
const bool is_exec = vma_base.is_exec;
const auto start_in_vma = virtual_addr - vma_base_addr;
const auto type = vma_base.type;
rasterizer->UnmapMemory(virtual_addr, size);
// Mark region as free and attempt to coalesce it with neighbours.
const auto new_it = CarveVMA(virtual_addr, size);
auto& vma = new_it->second;
vma.type = VMAType::PoolReserved;
vma.prot = MemoryProt::NoAccess;
vma.phys_base = 0;
vma.disallow_merge = false;
vma.name = "";
MergeAdjacent(vma_map, new_it);
// Unmap the memory region.
impl.Unmap(vma_base_addr, vma_base_size, start_in_vma, start_in_vma + size, phys_base, is_exec,
false, false);
TRACK_FREE(virtual_addr, "VMEM");
}
void MemoryManager::UnmapMemory(VAddr virtual_addr, size_t size) {
std::scoped_lock lk{mutex};
UnmapMemoryImpl(virtual_addr, size);

View File

@ -50,15 +50,17 @@ enum class VMAType : u32 {
Direct = 2,
Flexible = 3,
Pooled = 4,
Stack = 5,
Code = 6,
File = 7,
PoolReserved = 5,
Stack = 6,
Code = 7,
File = 8,
};
struct DirectMemoryArea {
PAddr base = 0;
size_t size = 0;
int memory_type = 0;
bool is_pooled = false;
bool is_free = true;
PAddr GetEnd() const {
@ -96,7 +98,7 @@ struct VirtualMemoryArea {
}
bool IsMapped() const noexcept {
return type != VMAType::Free && type != VMAType::Reserved;
return type != VMAType::Free && type != VMAType::Reserved && type != VMAType::PoolReserved;
}
bool CanMergeWith(const VirtualMemoryArea& next) const {
@ -135,6 +137,10 @@ public:
return total_direct_size;
}
u64 GetTotalFlexibleSize() const {
return total_flexible_size;
}
u64 GetAvailableFlexibleSize() const {
return total_flexible_size - flexible_usage;
}
@ -145,14 +151,21 @@ public:
void SetupMemoryRegions(u64 flexible_size);
PAddr PoolExpand(PAddr search_start, PAddr search_end, size_t size, u64 alignment);
PAddr Allocate(PAddr search_start, PAddr search_end, size_t size, u64 alignment,
int memory_type);
void Free(PAddr phys_addr, size_t size);
int PoolReserve(void** out_addr, VAddr virtual_addr, size_t size, MemoryMapFlags flags,
u64 alignment = 0);
int Reserve(void** out_addr, VAddr virtual_addr, size_t size, MemoryMapFlags flags,
u64 alignment = 0);
int PoolCommit(VAddr virtual_addr, size_t size, MemoryProt prot);
int MapMemory(void** out_addr, VAddr virtual_addr, size_t size, MemoryProt prot,
MemoryMapFlags flags, VMAType type, std::string_view name = "",
bool is_exec = false, PAddr phys_addr = -1, u64 alignment = 0);
@ -160,6 +173,8 @@ public:
int MapFile(void** out_addr, VAddr virtual_addr, size_t size, MemoryProt prot,
MemoryMapFlags flags, uintptr_t fd, size_t offset);
void PoolDecommit(VAddr virtual_addr, size_t size);
void UnmapMemory(VAddr virtual_addr, size_t size);
int QueryProtection(VAddr addr, void** start, void** end, u32* prot);

View File

@ -44,7 +44,7 @@ Liverpool::~Liverpool() {
}
void Liverpool::Process(std::stop_token stoken) {
Common::SetCurrentThreadName("GPU_CommandProcessor");
Common::SetCurrentThreadName("shadPS4:GPU_CommandProcessor");
while (!stoken.stop_requested()) {
{