From fddcbfc5fa5783e8dd12bce5bdc9682f0d1f35f7 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Mon, 19 Sep 2022 12:01:34 -0700 Subject: [PATCH] GE Debugger: Include rendered CLUTs in frame dumps. Like with textures, this uses the VRAM address directly. --- GPU/Common/TextureCacheCommon.cpp | 8 +++- GPU/Debugger/Playback.cpp | 48 +++++++++++++++++---- GPU/Debugger/Record.cpp | 72 +++++++++++++++++++++---------- GPU/Debugger/RecordFormat.h | 1 + 4 files changed, 97 insertions(+), 32 deletions(-) diff --git a/GPU/Common/TextureCacheCommon.cpp b/GPU/Common/TextureCacheCommon.cpp index cba174d416..952776c076 100644 --- a/GPU/Common/TextureCacheCommon.cpp +++ b/GPU/Common/TextureCacheCommon.cpp @@ -37,6 +37,7 @@ #include "GPU/Common/ShaderId.h" #include "GPU/Common/GPUStateUtils.h" #include "GPU/Debugger/Debugger.h" +#include "GPU/Debugger/Record.h" #include "GPU/GPUCommon.h" #include "GPU/GPUInterface.h" #include "GPU/GPUState.h" @@ -1267,14 +1268,17 @@ void TextureCacheCommon::LoadClut(u32 clutAddr, u32 loadBytes) { // It's possible for a game to load CLUT outside valid memory without crashing, should result in zeroes. u32 bytes = Memory::ValidSize(clutAddr, loadBytes); - if (clutRenderAddress_ != 0xFFFFFFFF && PSP_CoreParameter().compat.flags().AllowDownloadCLUT) { + bool performDownload = PSP_CoreParameter().compat.flags().AllowDownloadCLUT; + if (GPURecord::IsActive()) + performDownload = true; + if (clutRenderAddress_ != 0xFFFFFFFF && performDownload) { framebufferManager_->DownloadFramebufferForClut(clutRenderAddress_, clutRenderOffset_ + bytes); Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes); if (bytes < loadBytes) { memset((u8 *)clutBufRaw_ + bytes, 0x00, loadBytes - bytes); } } else { - // Here we could check for clutRenderAddres_ != 0xFFFFFFFF and zero the CLUT or something, + // Here we could check for clutRenderAddress_ != 0xFFFFFFFF and zero the CLUT or something, // but choosing not to for now. Though the results of loading the CLUT from RAM here is // almost certainly going to be bogus. #ifdef _M_SSE diff --git a/GPU/Debugger/Playback.cpp b/GPU/Debugger/Playback.cpp index aa34ce3ccf..c47eba66b3 100644 --- a/GPU/Debugger/Playback.cpp +++ b/GPU/Debugger/Playback.cpp @@ -298,6 +298,7 @@ private: void Registers(u32 ptr, u32 sz); void Vertices(u32 ptr, u32 sz); void Indices(u32 ptr, u32 sz); + void ClutAddr(u32 ptr, u32 sz); void Clut(u32 ptr, u32 sz); void TransferSrc(u32 ptr, u32 sz); void Memset(u32 ptr, u32 sz); @@ -308,6 +309,8 @@ private: void Display(u32 ptr, u32 sz); u32 execMemcpyDest = 0; + u32 execClutAddr = 0; + u32 execClutFlags = 0; u32 execListBuf = 0; u32 execListPos = 0; u32 execListID = 0; @@ -472,15 +475,40 @@ void DumpExecute::Indices(u32 ptr, u32 sz) { execListQueue.push_back((GE_CMD_IADDR << 24) | (psp & 0x00FFFFFF)); } -void DumpExecute::Clut(u32 ptr, u32 sz) { - u32 psp = mapping_.Map(ptr, sz, std::bind(&DumpExecute::SyncStall, this)); - if (psp == 0) { - ERROR_LOG(SYSTEM, "Unable to allocate for clut"); - return; - } +void DumpExecute::ClutAddr(u32 ptr, u32 sz) { + struct ClutAddrData { + u32 addr; + u32 flags; + }; + const ClutAddrData *data = (const ClutAddrData *)(pushbuf_.data() + ptr); + execClutAddr = data->addr; + execClutFlags = data->flags; +} - execListQueue.push_back((GE_CMD_CLUTADDRUPPER << 24) | ((psp >> 8) & 0x00FF0000)); - execListQueue.push_back((GE_CMD_CLUTADDR << 24) | (psp & 0x00FFFFFF)); +void DumpExecute::Clut(u32 ptr, u32 sz) { + // This is always run when we have the actual address set. + if (execClutAddr != 0) { + const bool isTarget = (execClutFlags & 1) != 0; + const bool unchangedVRAM = (execClutFlags & 2) != 0; + + // TODO: Could use drawnVRAM flag, but it can be wrong. + // Could potentially always skip if !isTarget, but playing it safe for offset texture behavior. + if (Memory::IsValidRange(execClutAddr, sz) && !unchangedVRAM && (!isTarget || !g_Config.bSoftwareRendering)) { + // Intentionally don't trigger an upload here. + Memory::MemcpyUnchecked(execClutAddr, pushbuf_.data() + ptr, sz); + } + + execClutAddr = 0; + } else { + u32 psp = mapping_.Map(ptr, sz, std::bind(&DumpExecute::SyncStall, this)); + if (psp == 0) { + ERROR_LOG(SYSTEM, "Unable to allocate for clut"); + return; + } + + execListQueue.push_back((GE_CMD_CLUTADDRUPPER << 24) | ((psp >> 8) & 0x00FF0000)); + execListQueue.push_back((GE_CMD_CLUTADDR << 24) | (psp & 0x00FFFFFF)); + } } void DumpExecute::TransferSrc(u32 ptr, u32 sz) { @@ -619,6 +647,10 @@ bool DumpExecute::Run() { Indices(cmd.ptr, cmd.sz); break; + case CommandType::CLUTADDR: + ClutAddr(cmd.ptr, cmd.sz); + break; + case CommandType::CLUT: Clut(cmd.ptr, cmd.sz); break; diff --git a/GPU/Debugger/Record.cpp b/GPU/Debugger/Record.cpp index d4a1838787..248ba1e48c 100644 --- a/GPU/Debugger/Record.cpp +++ b/GPU/Debugger/Record.cpp @@ -320,6 +320,34 @@ static Command EmitCommandWithRAM(CommandType t, const void *p, u32 sz, u32 alig return cmd; } +static u32 GetTargetFlags(u32 addr, u32 sizeInRAM) { + const bool isTarget = lastRenderTargets.find(addr) != lastRenderTargets.end(); + + bool isDirtyVRAM = false; + bool isDrawnVRAM = false; + uint32_t start = (addr >> DIRTY_VRAM_SHIFT) & DIRTY_VRAM_MASK; + for (uint32_t i = 0; i < (sizeInRAM + DIRTY_VRAM_ROUND) >> DIRTY_VRAM_SHIFT; ++i) { + DirtyVRAMFlag flag = dirtyVRAM[start + i]; + isDirtyVRAM = isDirtyVRAM || flag != DirtyVRAMFlag::CLEAN; + isDrawnVRAM = isDrawnVRAM || flag == DirtyVRAMFlag::DRAWN; + + // Mark the VRAM clean now that it's been copied to VRAM. + if (flag == DirtyVRAMFlag::DIRTY) + dirtyVRAM[start + i] = DirtyVRAMFlag::CLEAN; + } + + // The isTarget flag is mostly used for replay of dumps on a PSP. + u32 flags = isTarget ? 1 : 0; + // The unchangedVRAM flag tells us we can skip recopying. + if (!isDirtyVRAM) + flags |= 2; + // And the drawn flag tells us this data was potentially drawn to. + if (isDrawnVRAM) + flags |= 4; + + return flags; +} + static void EmitTextureData(int level, u32 texaddr) { GETextureFormat format = gstate.getTextureFormat(); int w = gstate.getTextureWidth(level); @@ -327,7 +355,6 @@ static void EmitTextureData(int level, u32 texaddr) { int bufw = GetTextureBufw(level, texaddr, format); int extraw = w > bufw ? w - bufw : 0; u32 sizeInRAM = (textureBitsPerPixel[format] * (bufw * h + extraw)) / 8; - const bool isTarget = lastRenderTargets.find(texaddr) != lastRenderTargets.end(); CommandType type = CommandType((int)CommandType::TEXTURE0 + level); const u8 *p = Memory::GetPointerUnchecked(texaddr); @@ -342,27 +369,7 @@ static void EmitTextureData(int level, u32 texaddr) { u32 pad; }; - bool isDirtyVRAM = false; - bool isDrawnVRAM = false; - uint32_t start = (texaddr >> DIRTY_VRAM_SHIFT) & DIRTY_VRAM_MASK; - for (uint32_t i = 0; i < (sizeInRAM + DIRTY_VRAM_ROUND) >> DIRTY_VRAM_SHIFT; ++i) { - DirtyVRAMFlag flag = dirtyVRAM[start + i]; - isDirtyVRAM = isDirtyVRAM || flag != DirtyVRAMFlag::CLEAN; - isDrawnVRAM = isDrawnVRAM || flag == DirtyVRAMFlag::DRAWN; - - // Mark the VRAM clean now that it's been copied to VRAM. - if (flag == DirtyVRAMFlag::DIRTY) - dirtyVRAM[start + i] = DirtyVRAMFlag::CLEAN; - } - - // The isTarget flag is mostly used for replay of dumps on a PSP. - u32 flags = isTarget ? 1 : 0; - // The unchangedVRAM flag tells us we can skip recopying. - if (!isDirtyVRAM) - flags |= 2; - // And the drawn flag tells us this data was potentially drawn to. - if (isDrawnVRAM) - flags |= 4; + u32 flags = GetTargetFlags(texaddr, sizeInRAM); FramebufData framebuf{ texaddr, bufw, flags }; framebufData.resize(sizeof(framebuf) + bytes); memcpy(&framebufData[0], &framebuf, sizeof(framebuf)); @@ -468,12 +475,33 @@ static void EmitTransfer(u32 op) { static void EmitClut(u32 op) { u32 addr = gstate.getClutAddress(); + + // Hardware rendering may be using a framebuffer as CLUT. + // To get at this, we first run the command (normally we're called right before it has run.) + if (Memory::IsVRAMAddress(addr)) + gpuDebug->SetCmdValue(op); + // Actually should only be 0x3F, but we allow enhanced CLUTs. See #15727. u32 blocks = (op & 0x7F) == 0x40 ? 0x40 : (op & 0x3F); u32 bytes = blocks * 32; bytes = Memory::ValidSize(addr, bytes); if (bytes != 0) { + // Send the original address so VRAM can be reasoned about. + if (Memory::IsVRAMAddress(addr)) { + struct ClutAddrData { + u32 addr; + u32 flags; + }; + u32 flags = GetTargetFlags(addr, bytes); + ClutAddrData data{ addr, flags }; + + FlushRegisters(); + Command cmd{CommandType::CLUTADDR, sizeof(data), (u32)pushbuf.size()}; + pushbuf.resize(pushbuf.size() + sizeof(data)); + memcpy(pushbuf.data() + cmd.ptr, &data, sizeof(data)); + commands.push_back(cmd); + } EmitCommandWithRAM(CommandType::CLUT, Memory::GetPointerUnchecked(addr), bytes, 16); } diff --git a/GPU/Debugger/RecordFormat.h b/GPU/Debugger/RecordFormat.h index 8079136d0c..dc7abe3ec1 100644 --- a/GPU/Debugger/RecordFormat.h +++ b/GPU/Debugger/RecordFormat.h @@ -49,6 +49,7 @@ enum class CommandType : u8 { MEMCPYDEST = 7, MEMCPYDATA = 8, DISPLAY = 9, + CLUTADDR = 10, TEXTURE0 = 0x10, TEXTURE1 = 0x11,