From 64f6012dbaeb0607976421adc8b7625e608bcd8b Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Mon, 26 May 2014 14:18:06 -0700 Subject: [PATCH 1/4] Support memsets of framebuffers as uploads. Technically should clear stencil too... maybe it'd be better to handle separately. --- Core/HLE/ReplaceTables.cpp | 9 +++------ Core/HLE/sceKernelInterrupt.cpp | 3 +++ GPU/GLES/Framebuffer.cpp | 4 +++- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/Core/HLE/ReplaceTables.cpp b/Core/HLE/ReplaceTables.cpp index e4f6d5bb9..d886f343a 100644 --- a/Core/HLE/ReplaceTables.cpp +++ b/Core/HLE/ReplaceTables.cpp @@ -168,12 +168,9 @@ static int Replace_memset() { u8 *dst = Memory::GetPointerUnchecked(destPtr); u8 value = PARAM(1); u32 bytes = PARAM(2); - bool skip = false; - if (Memory::IsVRAMAddress(destPtr) || Memory::IsVRAMAddress(destPtr)) { - skip = gpu->UpdateMemory(destPtr, destPtr, bytes); - } - if (!skip) { - memset(dst, value, bytes); + memset(dst, value, bytes); + if (Memory::IsVRAMAddress(destPtr)) { + gpu->UpdateMemory(destPtr, destPtr, bytes); } RETURN(destPtr); #ifndef MOBILE_DEVICE diff --git a/Core/HLE/sceKernelInterrupt.cpp b/Core/HLE/sceKernelInterrupt.cpp index 36e57208e..62b8ac266 100644 --- a/Core/HLE/sceKernelInterrupt.cpp +++ b/Core/HLE/sceKernelInterrupt.cpp @@ -553,6 +553,9 @@ u32 sceKernelMemset(u32 addr, u32 fillc, u32 n) u8 c = fillc & 0xff; DEBUG_LOG(SCEINTC, "sceKernelMemset(ptr = %08x, c = %02x, n = %08x)", addr, c, n); Memory::Memset(addr, c, n); + if (Memory::IsVRAMAddress(addr)) { + gpu->UpdateMemory(addr, addr, n); + } return addr; } diff --git a/GPU/GLES/Framebuffer.cpp b/GPU/GLES/Framebuffer.cpp index e1fc7265e..d22811963 100644 --- a/GPU/GLES/Framebuffer.cpp +++ b/GPU/GLES/Framebuffer.cpp @@ -1880,9 +1880,11 @@ bool FramebufferManager::NotifyFramebufferCopy(u32 src, u32 dst, int size) { } } + bool actuallyMemset = src == dst; + // TODO: Do ReadFramebufferToMemory etc where applicable. // This will slow down MotoGP but make the hack above unnecessary. - if (dstBuffer && srcBuffer) { + if (dstBuffer && srcBuffer && !actuallyMemset) { if (srcBuffer == dstBuffer) { WARN_LOG_REPORT_ONCE(dstsrccpy, G3D, "Intra-buffer memcpy (not supported) %08x -> %08x", src, dst); } else { From b73c5754189948969b947abefd7411f1be389a41 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Mon, 26 May 2014 16:49:32 -0700 Subject: [PATCH 2/4] Support swizzled framebuffer downloads. Used in God Eater 2 when showing the load save screen. --- Core/HLE/ReplaceTables.cpp | 37 +++++++++++++++++++++++++++++++++++++ Core/MIPS/MIPSAnalyst.cpp | 26 ++++++++++++++++++++++++++ assets/knownfuncs.ini | 1 + 3 files changed, 64 insertions(+) diff --git a/Core/HLE/ReplaceTables.cpp b/Core/HLE/ReplaceTables.cpp index d886f343a..df3039b39 100644 --- a/Core/HLE/ReplaceTables.cpp +++ b/Core/HLE/ReplaceTables.cpp @@ -142,6 +142,42 @@ static int Replace_memcpy16() { return 10 + bytes / 4; // approximation } +static int Replace_memcpy_swizzled() { + u32 destPtr = PARAM(0); + u32 srcPtr = PARAM(1); + u32 pitch = PARAM(2); + u32 h = PARAM(4); + if (Memory::IsVRAMAddress(srcPtr)) { + // Cheat a bit to force a download of the framebuffer. + // VRAM + 0x00400000 is simply a VRAM mirror. + gpu->UpdateMemory(srcPtr ^ 0x00400000, srcPtr, pitch * h); + } + u8 *dstp = Memory::GetPointerUnchecked(destPtr); + const u8 *srcp = Memory::GetPointerUnchecked(srcPtr); + + const u8 *ysrcp = srcp; + for (u32 y = 0; y < h; y += 8) { + const u8 *xsrcp = ysrcp; + for (u32 x = 0; x < pitch; x += 16) { + const u8 *src = xsrcp; + for (int n = 0; n < 8; ++n) { + memcpy(dstp, src, 16); + src += pitch; + dstp += 16; + } + xsrcp += 16; + } + ysrcp += 8 * pitch; + } + + RETURN(0); +#ifndef MOBILE_DEVICE + CBreakPoints::ExecMemCheck(srcPtr, false, pitch * h, currentMIPS->pc); + CBreakPoints::ExecMemCheck(destPtr, true, pitch * h, currentMIPS->pc); +#endif + return 10 + (pitch * h) / 4; // approximation +} + static int Replace_memmove() { u32 destPtr = PARAM(0); u32 srcPtr = PARAM(1); @@ -420,6 +456,7 @@ static const ReplacementTableEntry entries[] = { { "ceilf", &Replace_ceilf, 0, 0}, { "memcpy", &Replace_memcpy, 0, 0}, { "memcpy16", &Replace_memcpy16, 0, 0}, + { "memcpy_swizzled", &Replace_memcpy_swizzled, 0, 0}, { "memmove", &Replace_memmove, 0, 0}, { "memset", &Replace_memset, 0, 0}, { "strlen", &Replace_strlen, 0, 0}, diff --git a/Core/MIPS/MIPSAnalyst.cpp b/Core/MIPS/MIPSAnalyst.cpp index 7b413ee78..942e22ca3 100644 --- a/Core/MIPS/MIPSAnalyst.cpp +++ b/Core/MIPS/MIPSAnalyst.cpp @@ -220,6 +220,7 @@ static const HardHashTableEntry hardcodedHashes[] = { { 0x6301fa5149bd973a, 120, "wcscat", }, { 0x658b07240a690dbd, 36, "strlen", }, { 0x66122f0ab50b2ef9, 296, "dl_write_dither_matrix_5", }, + { 0x66f7f1beccbc104a, 256, "memcpy_swizzled", }, // God Eater 2 { 0x679e647e34ecf7f1, 132, "roundf", }, { 0x67afe74d9ec72f52, 4380, "_strtod_r", }, { 0x68b22c2aa4b8b915, 400, "sqrt", }, @@ -738,6 +739,7 @@ skip: bool looking = false; bool end = false; bool isStraightLeaf = true; + bool decreasedSp = false; u32 addr; u32 addrNextSym = 0; @@ -761,6 +763,8 @@ skip: furthestBranch = 0; looking = false; end = false; + isStraightLeaf = false; + decreasedSp = false; continue; } @@ -771,6 +775,7 @@ skip: if (target > furthestBranch) { furthestBranch = target; } + // j X } else if ((op & 0xFC000000) == 0x08000000) { u32 sureTarget = GetJumpTarget(addr); // Check for a tail call. Might not even have a jr ra. @@ -782,6 +787,18 @@ skip: end = true; } } else if (sureTarget != INVALIDTARGET && sureTarget > addr && sureTarget > furthestBranch) { + static const u32 MAX_JUMP_FORWARD = 128; + // If it's a nearby forward jump, and not a stackless leaf, assume not a tail call. + if (sureTarget <= addr + MAX_JUMP_FORWARD && decreasedSp) { + // But let's check the delay slot. + MIPSOpcode op = Memory::Read_Instruction(addr + 4); + // addiu sp, sp, +X + if ((op & 0xFFFF8000) != 0x27BD0000) { + furthestBranch = sureTarget; + continue; + } + } + // A jump later. Probably tail, but let's check if it jumps back. u32 knownEnd = furthestBranch == 0 ? addr : furthestBranch; u32 jumpback = ScanAheadForJumpback(sureTarget, currentFunction.start, knownEnd); @@ -806,6 +823,14 @@ skip: end = true; } } + // addiu sp, sp, -X + if ((op & 0xFFFF8000) == 0x27BD8000) { + decreasedSp = true; + } + // addiu sp, sp, +X + if ((op & 0xFFFF8000) == 0x27BD0000) { + decreasedSp = false; + } if (looking) { if (addr >= furthestBranch) { @@ -838,6 +863,7 @@ skip: looking = false; end = false; isStraightLeaf = true; + decreasedSp = false; currentFunction.start = addr+4; } } diff --git a/assets/knownfuncs.ini b/assets/knownfuncs.ini index 2ab8fa7fa..4dd2e379e 100644 --- a/assets/knownfuncs.ini +++ b/assets/knownfuncs.ini @@ -140,6 +140,7 @@ 6301fa5149bd973a:120 = wcscat 658b07240a690dbd:36 = strlen 66122f0ab50b2ef9:296 = dl_write_dither_matrix_5 +66f7f1beccbc104a:256 = memcpy_swizzled 679e647e34ecf7f1:132 = roundf 67afe74d9ec72f52:4380 = _strtod_r 68b22c2aa4b8b915:400 = sqrt From 8dcc09c9e1555819f64216b3ef31b86ea777eb81 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Tue, 27 May 2014 01:14:29 -0700 Subject: [PATCH 3/4] Use a separate func for gpu memset(). --- Core/HLE/ReplaceTables.cpp | 15 ++++++----- Core/HLE/sceDmac.cpp | 2 +- Core/HLE/sceKernelInterrupt.cpp | 9 ++++--- GPU/Directx9/GPU_DX9.cpp | 7 +++++- GPU/Directx9/GPU_DX9.h | 3 ++- GPU/GLES/Framebuffer.cpp | 8 +++--- GPU/GLES/Framebuffer.h | 2 +- GPU/GLES/GLES_GPU.cpp | 44 ++++++++++++++++++++++++++++----- GPU/GLES/GLES_GPU.h | 6 +++-- GPU/GPUInterface.h | 10 +++++++- GPU/Null/NullGpu.cpp | 8 +++++- GPU/Null/NullGpu.h | 3 ++- GPU/Software/SoftGpu.cpp | 11 ++++++++- GPU/Software/SoftGpu.h | 3 ++- 14 files changed, 100 insertions(+), 31 deletions(-) diff --git a/Core/HLE/ReplaceTables.cpp b/Core/HLE/ReplaceTables.cpp index df3039b39..a6d460d83 100644 --- a/Core/HLE/ReplaceTables.cpp +++ b/Core/HLE/ReplaceTables.cpp @@ -106,7 +106,7 @@ static int Replace_memcpy() { u32 bytes = PARAM(2); bool skip = false; if (Memory::IsVRAMAddress(destPtr) || Memory::IsVRAMAddress(srcPtr)) { - skip = gpu->UpdateMemory(destPtr, srcPtr, bytes); + skip = gpu->PerformMemoryCopy(destPtr, srcPtr, bytes); } if (!skip && bytes != 0) { u8 *dst = Memory::GetPointerUnchecked(destPtr); @@ -127,7 +127,7 @@ static int Replace_memcpy16() { u32 bytes = PARAM(2) * 16; bool skip = false; if (Memory::IsVRAMAddress(destPtr) || Memory::IsVRAMAddress(srcPtr)) { - skip = gpu->UpdateMemory(destPtr, srcPtr, bytes); + skip = gpu->PerformMemoryCopy(destPtr, srcPtr, bytes); } if (!skip && bytes != 0) { u8 *dst = Memory::GetPointerUnchecked(destPtr); @@ -150,7 +150,7 @@ static int Replace_memcpy_swizzled() { if (Memory::IsVRAMAddress(srcPtr)) { // Cheat a bit to force a download of the framebuffer. // VRAM + 0x00400000 is simply a VRAM mirror. - gpu->UpdateMemory(srcPtr ^ 0x00400000, srcPtr, pitch * h); + gpu->PerformMemoryCopy(srcPtr ^ 0x00400000, srcPtr, pitch * h); } u8 *dstp = Memory::GetPointerUnchecked(destPtr); const u8 *srcp = Memory::GetPointerUnchecked(srcPtr); @@ -184,7 +184,7 @@ static int Replace_memmove() { u32 bytes = PARAM(2); bool skip = false; if (Memory::IsVRAMAddress(destPtr) || Memory::IsVRAMAddress(srcPtr)) { - skip = gpu->UpdateMemory(destPtr, srcPtr, bytes); + skip = gpu->PerformMemoryCopy(destPtr, srcPtr, bytes); } if (!skip && bytes != 0) { u8 *dst = Memory::GetPointerUnchecked(destPtr); @@ -204,9 +204,12 @@ static int Replace_memset() { u8 *dst = Memory::GetPointerUnchecked(destPtr); u8 value = PARAM(1); u32 bytes = PARAM(2); - memset(dst, value, bytes); + bool skip = false; if (Memory::IsVRAMAddress(destPtr)) { - gpu->UpdateMemory(destPtr, destPtr, bytes); + skip = gpu->PerformMemorySet(destPtr, value, bytes); + } + if (!skip) { + memset(dst, value, bytes); } RETURN(destPtr); #ifndef MOBILE_DEVICE diff --git a/Core/HLE/sceDmac.cpp b/Core/HLE/sceDmac.cpp index 4fd89dffb..9478c7574 100644 --- a/Core/HLE/sceDmac.cpp +++ b/Core/HLE/sceDmac.cpp @@ -50,7 +50,7 @@ int __DmacMemcpy(u32 dst, u32 src, u32 size) { bool skip = false; if (Memory::IsVRAMAddress(src) || Memory::IsVRAMAddress(dst)) { - skip = gpu->UpdateMemory(dst, src, size); + skip = gpu->PerformMemoryCopy(dst, src, size); } if (!skip) { Memory::Memcpy(dst, Memory::GetPointer(src), size); diff --git a/Core/HLE/sceKernelInterrupt.cpp b/Core/HLE/sceKernelInterrupt.cpp index 62b8ac266..c6bb3d4f7 100644 --- a/Core/HLE/sceKernelInterrupt.cpp +++ b/Core/HLE/sceKernelInterrupt.cpp @@ -552,9 +552,12 @@ u32 sceKernelMemset(u32 addr, u32 fillc, u32 n) { u8 c = fillc & 0xff; DEBUG_LOG(SCEINTC, "sceKernelMemset(ptr = %08x, c = %02x, n = %08x)", addr, c, n); - Memory::Memset(addr, c, n); + bool skip = false; if (Memory::IsVRAMAddress(addr)) { - gpu->UpdateMemory(addr, addr, n); + skip = gpu->PerformMemorySet(addr, fillc, n); + } + if (!skip) { + Memory::Memset(addr, c, n); } return addr; } @@ -565,7 +568,7 @@ u32 sceKernelMemcpy(u32 dst, u32 src, u32 size) bool skip = false; if (Memory::IsVRAMAddress(src) || Memory::IsVRAMAddress(dst)) { - skip = gpu->UpdateMemory(dst, src, size); + skip = gpu->PerformMemoryCopy(dst, src, size); } // Technically should crash if these are invalid and size > 0... diff --git a/GPU/Directx9/GPU_DX9.cpp b/GPU/Directx9/GPU_DX9.cpp index 1749b11de..712071e13 100644 --- a/GPU/Directx9/GPU_DX9.cpp +++ b/GPU/Directx9/GPU_DX9.cpp @@ -1315,7 +1315,12 @@ void DIRECTX9_GPU::InvalidateCacheInternal(u32 addr, int size, GPUInvalidationTy framebufferManager_.UpdateFromMemory(addr, size); } -bool DIRECTX9_GPU::UpdateMemory(u32 dest, u32 src, int size) { +bool DIRECTX9_GPU::PerformMemoryCopy(u32 dest, u32 src, int size) { + InvalidateCache(dest, size, GPU_INVALIDATE_HINT); + return false; +} + +bool DIRECTX9_GPU::PerformMemorySet(u32 dest, u8 v, int size) { InvalidateCache(dest, size, GPU_INVALIDATE_HINT); return false; } diff --git a/GPU/Directx9/GPU_DX9.h b/GPU/Directx9/GPU_DX9.h index 9f6cb8625..b7bd89390 100644 --- a/GPU/Directx9/GPU_DX9.h +++ b/GPU/Directx9/GPU_DX9.h @@ -46,7 +46,8 @@ public: virtual void BeginFrame(); virtual void UpdateStats(); virtual void InvalidateCache(u32 addr, int size, GPUInvalidationType type); - virtual bool UpdateMemory(u32 dest, u32 src, int size); + virtual bool PerformMemoryCopy(u32 dest, u32 src, int size); + virtual bool PerformMemorySet(u32 dest, u8 v, int size); virtual void ClearCacheNextFrame(); virtual void DeviceLost(); // Only happens on Android. Drop all textures and shaders. diff --git a/GPU/GLES/Framebuffer.cpp b/GPU/GLES/Framebuffer.cpp index d22811963..b2df8d2f5 100644 --- a/GPU/GLES/Framebuffer.cpp +++ b/GPU/GLES/Framebuffer.cpp @@ -1,4 +1,4 @@ -// Copyright (c) 2012- PPSSPP Project. +// Copyright (c) 2012- PPSSPP Project. // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by @@ -1851,7 +1851,7 @@ void FramebufferManager::UpdateFromMemory(u32 addr, int size, bool safe) { } } -bool FramebufferManager::NotifyFramebufferCopy(u32 src, u32 dst, int size) { +bool FramebufferManager::NotifyFramebufferCopy(u32 src, u32 dst, int size, bool isMemset) { if (!useBufferedRendering_ || updateVRAM_) { return false; } @@ -1880,11 +1880,9 @@ bool FramebufferManager::NotifyFramebufferCopy(u32 src, u32 dst, int size) { } } - bool actuallyMemset = src == dst; - // TODO: Do ReadFramebufferToMemory etc where applicable. // This will slow down MotoGP but make the hack above unnecessary. - if (dstBuffer && srcBuffer && !actuallyMemset) { + if (dstBuffer && srcBuffer && !isMemset) { if (srcBuffer == dstBuffer) { WARN_LOG_REPORT_ONCE(dstsrccpy, G3D, "Intra-buffer memcpy (not supported) %08x -> %08x", src, dst); } else { diff --git a/GPU/GLES/Framebuffer.h b/GPU/GLES/Framebuffer.h index 3eef9aef3..e65b4b680 100644 --- a/GPU/GLES/Framebuffer.h +++ b/GPU/GLES/Framebuffer.h @@ -211,7 +211,7 @@ public: } inline bool ShouldDownloadFramebuffer(const VirtualFramebuffer *vfb) const; - bool NotifyFramebufferCopy(u32 src, u32 dest, int size); + bool NotifyFramebufferCopy(u32 src, u32 dest, int size, bool isMemset = false); void DestroyFramebuf(VirtualFramebuffer *vfb); diff --git a/GPU/GLES/GLES_GPU.cpp b/GPU/GLES/GLES_GPU.cpp index dbda6f590..af620f5bd 100644 --- a/GPU/GLES/GLES_GPU.cpp +++ b/GPU/GLES/GLES_GPU.cpp @@ -662,7 +662,11 @@ void GLES_GPU::ProcessEvent(GPUEvent ev) { break; case GPU_EVENT_FB_MEMCPY: - UpdateMemoryInternal(ev.fb_memcpy.dst, ev.fb_memcpy.src, ev.fb_memcpy.size); + PerformMemoryCopyInternal(ev.fb_memcpy.dst, ev.fb_memcpy.src, ev.fb_memcpy.size); + break; + + case GPU_EVENT_FB_MEMSET: + PerformMemorySetInternal(ev.fb_memset.dst, ev.fb_memset.v, ev.fb_memset.size); break; default: @@ -1968,16 +1972,20 @@ void GLES_GPU::InvalidateCacheInternal(u32 addr, int size, GPUInvalidationType t } } -void GLES_GPU::UpdateMemoryInternal(u32 dest, u32 src, int size) { +void GLES_GPU::PerformMemoryCopyInternal(u32 dest, u32 src, int size) { if (!framebufferManager_.NotifyFramebufferCopy(src, dest, size)) { Memory::Memcpy(dest, Memory::GetPointer(src), size); - InvalidateCache(dest, size, GPU_INVALIDATE_HINT); - } else { + } + InvalidateCache(dest, size, GPU_INVALIDATE_HINT); +} + +void GLES_GPU::PerformMemorySetInternal(u32 dest, u8 v, int size) { + if (!framebufferManager_.NotifyFramebufferCopy(dest, dest, size, true)) { InvalidateCache(dest, size, GPU_INVALIDATE_HINT); } } -bool GLES_GPU::UpdateMemory(u32 dest, u32 src, int size) { +bool GLES_GPU::PerformMemoryCopy(u32 dest, u32 src, int size) { // Track stray copies of a framebuffer in RAM. MotoGP does this. if (framebufferManager_.MayIntersectFramebuffer(src) || framebufferManager_.MayIntersectFramebuffer(dest)) { if (IsOnSeparateCPUThread()) { @@ -1990,7 +1998,7 @@ bool GLES_GPU::UpdateMemory(u32 dest, u32 src, int size) { // This is a memcpy, so we need to wait for it to complete. SyncThread(); } else { - UpdateMemoryInternal(dest, src, size); + PerformMemoryCopyInternal(dest, src, size); } return true; } @@ -1999,6 +2007,30 @@ bool GLES_GPU::UpdateMemory(u32 dest, u32 src, int size) { return false; } +bool GLES_GPU::PerformMemorySet(u32 dest, u8 v, int size) { + // This may indicate a memset, usually to 0, of a framebuffer. + if (framebufferManager_.MayIntersectFramebuffer(dest)) { + Memory::Memset(dest, v, size); + + if (IsOnSeparateCPUThread()) { + GPUEvent ev(GPU_EVENT_FB_MEMSET); + ev.fb_memset.dst = dest; + ev.fb_memset.v = v; + ev.fb_memset.size = size; + ScheduleEvent(ev); + + // We don't need to wait for the framebuffer to be updated. + } else { + PerformMemorySetInternal(dest, v, size); + } + return true; + } + + // Or perhaps a texture, let's invalidate. + InvalidateCache(dest, size, GPU_INVALIDATE_HINT); + return false; +} + void GLES_GPU::ClearCacheNextFrame() { textureCache_.ClearNextFrame(); } diff --git a/GPU/GLES/GLES_GPU.h b/GPU/GLES/GLES_GPU.h index 872811c91..c2625a1c7 100644 --- a/GPU/GLES/GLES_GPU.h +++ b/GPU/GLES/GLES_GPU.h @@ -44,7 +44,8 @@ public: virtual void BeginFrame(); virtual void UpdateStats(); virtual void InvalidateCache(u32 addr, int size, GPUInvalidationType type); - virtual bool UpdateMemory(u32 dest, u32 src, int size); + virtual bool PerformMemoryCopy(u32 dest, u32 src, int size); + virtual bool PerformMemorySet(u32 dest, u8 v, int size); virtual void ClearCacheNextFrame(); virtual void DeviceLost(); // Only happens on Android. Drop all textures and shaders. @@ -151,7 +152,8 @@ private: void InitClearInternal(); void BeginFrameInternal(); void CopyDisplayToOutputInternal(); - void UpdateMemoryInternal(u32 dest, u32 src, int size); + void PerformMemoryCopyInternal(u32 dest, u32 src, int size); + void PerformMemorySetInternal(u32 dest, u8 v, int size); void InvalidateCacheInternal(u32 addr, int size, GPUInvalidationType type); static CommandInfo cmdInfo_[256]; diff --git a/GPU/GPUInterface.h b/GPU/GPUInterface.h index 65f9d8a35..b7250dc68 100644 --- a/GPU/GPUInterface.h +++ b/GPU/GPUInterface.h @@ -164,6 +164,7 @@ enum GPUEventType { GPU_EVENT_FINISH_EVENT_LOOP, GPU_EVENT_SYNC_THREAD, GPU_EVENT_FB_MEMCPY, + GPU_EVENT_FB_MEMSET, }; struct GPUEvent { @@ -182,6 +183,12 @@ struct GPUEvent { u32 src; int size; } fb_memcpy; + // GPU_EVENT_FB_MEMSET + struct { + u32 dst; + u8 v; + int size; + } fb_memset; }; operator GPUEventType() const { @@ -234,7 +241,8 @@ public: // If size = -1, invalidate everything. virtual void InvalidateCache(u32 addr, int size, GPUInvalidationType type) = 0; // Update either RAM from VRAM, or VRAM from RAM... or even VRAM from VRAM. - virtual bool UpdateMemory(u32 dest, u32 src, int size) = 0; + virtual bool PerformMemoryCopy(u32 dest, u32 src, int size) = 0; + virtual bool PerformMemorySet(u32 dest, u8 v, int size) = 0; // Will cause the texture cache to be cleared at the start of the next frame. virtual void ClearCacheNextFrame() = 0; diff --git a/GPU/Null/NullGpu.cpp b/GPU/Null/NullGpu.cpp index 5b881e86e..c9731e1aa 100644 --- a/GPU/Null/NullGpu.cpp +++ b/GPU/Null/NullGpu.cpp @@ -657,7 +657,13 @@ void NullGPU::InvalidateCache(u32 addr, int size, GPUInvalidationType type) { // Nothing to invalidate. } -bool NullGPU::UpdateMemory(u32 dest, u32 src, int size) { +bool NullGPU::PerformMemoryCopy(u32 dest, u32 src, int size) { + // Nothing to update. + InvalidateCache(dest, size, GPU_INVALIDATE_HINT); + return false; +} + +bool NullGPU::PerformMemorySet(u32 dest, u8 v, int size) { // Nothing to update. InvalidateCache(dest, size, GPU_INVALIDATE_HINT); return false; diff --git a/GPU/Null/NullGpu.h b/GPU/Null/NullGpu.h index db236f002..c0d92a55f 100644 --- a/GPU/Null/NullGpu.h +++ b/GPU/Null/NullGpu.h @@ -34,7 +34,8 @@ public: virtual void CopyDisplayToOutput() {} virtual void UpdateStats(); virtual void InvalidateCache(u32 addr, int size, GPUInvalidationType type); - virtual bool UpdateMemory(u32 dest, u32 src, int size); + virtual bool PerformMemoryCopy(u32 dest, u32 src, int size); + virtual bool PerformMemorySet(u32 dest, u8 v, int size); virtual void ClearCacheNextFrame() {}; virtual void DeviceLost() {} diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp index 994a9dc30..2e3f0e1f1 100644 --- a/GPU/Software/SoftGpu.cpp +++ b/GPU/Software/SoftGpu.cpp @@ -851,7 +851,16 @@ void SoftGPU::InvalidateCache(u32 addr, int size, GPUInvalidationType type) // Nothing to invalidate. } -bool SoftGPU::UpdateMemory(u32 dest, u32 src, int size) +bool SoftGPU::PerformMemoryCopy(u32 dest, u32 src, int size) +{ + // Nothing to update. + InvalidateCache(dest, size, GPU_INVALIDATE_HINT); + // Let's just be safe. + framebufferDirty_ = true; + return false; +} + +bool SoftGPU::PerformMemorySet(u32 dest, u8 v, int size) { // Nothing to update. InvalidateCache(dest, size, GPU_INVALIDATE_HINT); diff --git a/GPU/Software/SoftGpu.h b/GPU/Software/SoftGpu.h index 038d79e9b..6844c3161 100644 --- a/GPU/Software/SoftGpu.h +++ b/GPU/Software/SoftGpu.h @@ -59,7 +59,8 @@ public: virtual void CopyDisplayToOutput(); virtual void UpdateStats(); virtual void InvalidateCache(u32 addr, int size, GPUInvalidationType type); - virtual bool UpdateMemory(u32 dest, u32 src, int size); + virtual bool PerformMemoryCopy(u32 dest, u32 src, int size); + virtual bool PerformMemorySet(u32 dest, u8 v, int size); virtual void ClearCacheNextFrame() {}; virtual void DeviceLost() {} From 0de41cd028443f336247ab9c02a607b1ceff9d3b Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Tue, 27 May 2014 07:50:08 -0700 Subject: [PATCH 4/4] Don't savestate replacement funcs. This makes it so we can reorder them as we like, which is nicer, and also makes sure there won't be weird compatibility problems or etc. --- Core/HLE/ReplaceTables.cpp | 21 +++++++++++++++++++++ Core/HLE/ReplaceTables.h | 4 ++++ Core/SaveState.cpp | 9 ++++++--- 3 files changed, 31 insertions(+), 3 deletions(-) diff --git a/Core/HLE/ReplaceTables.cpp b/Core/HLE/ReplaceTables.cpp index a6d460d83..0c0ab7ce5 100644 --- a/Core/HLE/ReplaceTables.cpp +++ b/Core/HLE/ReplaceTables.cpp @@ -560,6 +560,27 @@ void RestoreReplacedInstructions(u32 startAddr, u32 endAddr) { replacedInstructions.erase(start, end); } +std::map SaveAndClearReplacements() { + std::map saved; + for (auto it = replacedInstructions.begin(), end = replacedInstructions.end(); it != end; ++it) { + const u32 addr = it->first; + const u32 curInstr = Memory::Read_U32(addr); + if (MIPS_IS_REPLACEMENT(curInstr)) { + saved[addr] = curInstr; + Memory::Write_U32(it->second, addr); + } + } + return saved; +} + +void RestoreSavedReplacements(const std::map &saved) { + for (auto it = saved.begin(), end = saved.end(); it != end; ++it) { + const u32 addr = it->first; + // Just put the replacements back. + Memory::Write_U32(it->second, addr); + } +} + bool GetReplacedOpAt(u32 address, u32 *op) { u32 instr = Memory::Read_Opcode_JIT(address).encoding; if (MIPS_IS_REPLACEMENT(instr)) { diff --git a/Core/HLE/ReplaceTables.h b/Core/HLE/ReplaceTables.h index 23c38f5e5..8c8f1d5ee 100644 --- a/Core/HLE/ReplaceTables.h +++ b/Core/HLE/ReplaceTables.h @@ -61,3 +61,7 @@ void WriteReplaceInstruction(u32 address, u64 hash, int size); void RestoreReplacedInstruction(u32 address); void RestoreReplacedInstructions(u32 startAddr, u32 endAddr); bool GetReplacedOpAt(u32 address, u32 *op); + +// For savestates. If you call SaveAndClearReplacements(), you must call RestoreSavedReplacements(). +std::map SaveAndClearReplacements(); +void RestoreSavedReplacements(const std::map &saved); diff --git a/Core/SaveState.cpp b/Core/SaveState.cpp index b45886de7..855979c02 100644 --- a/Core/SaveState.cpp +++ b/Core/SaveState.cpp @@ -33,6 +33,7 @@ #include "Core/FileSystems/MetaFileSystem.h" #include "Core/ELF/ParamSFO.h" #include "Core/HLE/HLE.h" +#include "Core/HLE/ReplaceTables.h" #include "Core/HLE/sceKernel.h" #include "Core/MemMap.h" #include "Core/MIPS/MIPS.h" @@ -226,10 +227,12 @@ namespace SaveState // Memory is a bit tricky when jit is enabled, since there's emuhacks in it. if (MIPSComp::jit && p.mode == p.MODE_WRITE) { - auto blocks = MIPSComp::jit->GetBlockCache(); - auto saved = blocks->SaveAndClearEmuHackOps(); + auto blockCache = MIPSComp::jit->GetBlockCache(); + auto savedReplacements = SaveAndClearReplacements(); + auto savedBlocks = blockCache->SaveAndClearEmuHackOps(); Memory::DoState(p); - blocks->RestoreSavedEmuHackOps(saved); + blockCache->RestoreSavedEmuHackOps(savedBlocks); + RestoreSavedReplacements(savedReplacements); } else Memory::DoState(p);