GE Debugger: Include rendered CLUTs in frame dumps.

Like with textures, this uses the VRAM address directly.
This commit is contained in:
Unknown W. Brackets 2022-09-19 12:01:34 -07:00
parent 3e0b3ba1a8
commit fddcbfc5fa
4 changed files with 97 additions and 32 deletions

View File

@ -37,6 +37,7 @@
#include "GPU/Common/ShaderId.h"
#include "GPU/Common/GPUStateUtils.h"
#include "GPU/Debugger/Debugger.h"
#include "GPU/Debugger/Record.h"
#include "GPU/GPUCommon.h"
#include "GPU/GPUInterface.h"
#include "GPU/GPUState.h"
@ -1267,14 +1268,17 @@ void TextureCacheCommon::LoadClut(u32 clutAddr, u32 loadBytes) {
// It's possible for a game to load CLUT outside valid memory without crashing, should result in zeroes.
u32 bytes = Memory::ValidSize(clutAddr, loadBytes);
if (clutRenderAddress_ != 0xFFFFFFFF && PSP_CoreParameter().compat.flags().AllowDownloadCLUT) {
bool performDownload = PSP_CoreParameter().compat.flags().AllowDownloadCLUT;
if (GPURecord::IsActive())
performDownload = true;
if (clutRenderAddress_ != 0xFFFFFFFF && performDownload) {
framebufferManager_->DownloadFramebufferForClut(clutRenderAddress_, clutRenderOffset_ + bytes);
Memory::MemcpyUnchecked(clutBufRaw_, clutAddr, bytes);
if (bytes < loadBytes) {
memset((u8 *)clutBufRaw_ + bytes, 0x00, loadBytes - bytes);
}
} else {
// Here we could check for clutRenderAddres_ != 0xFFFFFFFF and zero the CLUT or something,
// Here we could check for clutRenderAddress_ != 0xFFFFFFFF and zero the CLUT or something,
// but choosing not to for now. Though the results of loading the CLUT from RAM here is
// almost certainly going to be bogus.
#ifdef _M_SSE

View File

@ -298,6 +298,7 @@ private:
void Registers(u32 ptr, u32 sz);
void Vertices(u32 ptr, u32 sz);
void Indices(u32 ptr, u32 sz);
void ClutAddr(u32 ptr, u32 sz);
void Clut(u32 ptr, u32 sz);
void TransferSrc(u32 ptr, u32 sz);
void Memset(u32 ptr, u32 sz);
@ -308,6 +309,8 @@ private:
void Display(u32 ptr, u32 sz);
u32 execMemcpyDest = 0;
u32 execClutAddr = 0;
u32 execClutFlags = 0;
u32 execListBuf = 0;
u32 execListPos = 0;
u32 execListID = 0;
@ -472,15 +475,40 @@ void DumpExecute::Indices(u32 ptr, u32 sz) {
execListQueue.push_back((GE_CMD_IADDR << 24) | (psp & 0x00FFFFFF));
}
void DumpExecute::Clut(u32 ptr, u32 sz) {
u32 psp = mapping_.Map(ptr, sz, std::bind(&DumpExecute::SyncStall, this));
if (psp == 0) {
ERROR_LOG(SYSTEM, "Unable to allocate for clut");
return;
}
void DumpExecute::ClutAddr(u32 ptr, u32 sz) {
struct ClutAddrData {
u32 addr;
u32 flags;
};
const ClutAddrData *data = (const ClutAddrData *)(pushbuf_.data() + ptr);
execClutAddr = data->addr;
execClutFlags = data->flags;
}
execListQueue.push_back((GE_CMD_CLUTADDRUPPER << 24) | ((psp >> 8) & 0x00FF0000));
execListQueue.push_back((GE_CMD_CLUTADDR << 24) | (psp & 0x00FFFFFF));
void DumpExecute::Clut(u32 ptr, u32 sz) {
// This is always run when we have the actual address set.
if (execClutAddr != 0) {
const bool isTarget = (execClutFlags & 1) != 0;
const bool unchangedVRAM = (execClutFlags & 2) != 0;
// TODO: Could use drawnVRAM flag, but it can be wrong.
// Could potentially always skip if !isTarget, but playing it safe for offset texture behavior.
if (Memory::IsValidRange(execClutAddr, sz) && !unchangedVRAM && (!isTarget || !g_Config.bSoftwareRendering)) {
// Intentionally don't trigger an upload here.
Memory::MemcpyUnchecked(execClutAddr, pushbuf_.data() + ptr, sz);
}
execClutAddr = 0;
} else {
u32 psp = mapping_.Map(ptr, sz, std::bind(&DumpExecute::SyncStall, this));
if (psp == 0) {
ERROR_LOG(SYSTEM, "Unable to allocate for clut");
return;
}
execListQueue.push_back((GE_CMD_CLUTADDRUPPER << 24) | ((psp >> 8) & 0x00FF0000));
execListQueue.push_back((GE_CMD_CLUTADDR << 24) | (psp & 0x00FFFFFF));
}
}
void DumpExecute::TransferSrc(u32 ptr, u32 sz) {
@ -619,6 +647,10 @@ bool DumpExecute::Run() {
Indices(cmd.ptr, cmd.sz);
break;
case CommandType::CLUTADDR:
ClutAddr(cmd.ptr, cmd.sz);
break;
case CommandType::CLUT:
Clut(cmd.ptr, cmd.sz);
break;

View File

@ -320,6 +320,34 @@ static Command EmitCommandWithRAM(CommandType t, const void *p, u32 sz, u32 alig
return cmd;
}
static u32 GetTargetFlags(u32 addr, u32 sizeInRAM) {
const bool isTarget = lastRenderTargets.find(addr) != lastRenderTargets.end();
bool isDirtyVRAM = false;
bool isDrawnVRAM = false;
uint32_t start = (addr >> DIRTY_VRAM_SHIFT) & DIRTY_VRAM_MASK;
for (uint32_t i = 0; i < (sizeInRAM + DIRTY_VRAM_ROUND) >> DIRTY_VRAM_SHIFT; ++i) {
DirtyVRAMFlag flag = dirtyVRAM[start + i];
isDirtyVRAM = isDirtyVRAM || flag != DirtyVRAMFlag::CLEAN;
isDrawnVRAM = isDrawnVRAM || flag == DirtyVRAMFlag::DRAWN;
// Mark the VRAM clean now that it's been copied to VRAM.
if (flag == DirtyVRAMFlag::DIRTY)
dirtyVRAM[start + i] = DirtyVRAMFlag::CLEAN;
}
// The isTarget flag is mostly used for replay of dumps on a PSP.
u32 flags = isTarget ? 1 : 0;
// The unchangedVRAM flag tells us we can skip recopying.
if (!isDirtyVRAM)
flags |= 2;
// And the drawn flag tells us this data was potentially drawn to.
if (isDrawnVRAM)
flags |= 4;
return flags;
}
static void EmitTextureData(int level, u32 texaddr) {
GETextureFormat format = gstate.getTextureFormat();
int w = gstate.getTextureWidth(level);
@ -327,7 +355,6 @@ static void EmitTextureData(int level, u32 texaddr) {
int bufw = GetTextureBufw(level, texaddr, format);
int extraw = w > bufw ? w - bufw : 0;
u32 sizeInRAM = (textureBitsPerPixel[format] * (bufw * h + extraw)) / 8;
const bool isTarget = lastRenderTargets.find(texaddr) != lastRenderTargets.end();
CommandType type = CommandType((int)CommandType::TEXTURE0 + level);
const u8 *p = Memory::GetPointerUnchecked(texaddr);
@ -342,27 +369,7 @@ static void EmitTextureData(int level, u32 texaddr) {
u32 pad;
};
bool isDirtyVRAM = false;
bool isDrawnVRAM = false;
uint32_t start = (texaddr >> DIRTY_VRAM_SHIFT) & DIRTY_VRAM_MASK;
for (uint32_t i = 0; i < (sizeInRAM + DIRTY_VRAM_ROUND) >> DIRTY_VRAM_SHIFT; ++i) {
DirtyVRAMFlag flag = dirtyVRAM[start + i];
isDirtyVRAM = isDirtyVRAM || flag != DirtyVRAMFlag::CLEAN;
isDrawnVRAM = isDrawnVRAM || flag == DirtyVRAMFlag::DRAWN;
// Mark the VRAM clean now that it's been copied to VRAM.
if (flag == DirtyVRAMFlag::DIRTY)
dirtyVRAM[start + i] = DirtyVRAMFlag::CLEAN;
}
// The isTarget flag is mostly used for replay of dumps on a PSP.
u32 flags = isTarget ? 1 : 0;
// The unchangedVRAM flag tells us we can skip recopying.
if (!isDirtyVRAM)
flags |= 2;
// And the drawn flag tells us this data was potentially drawn to.
if (isDrawnVRAM)
flags |= 4;
u32 flags = GetTargetFlags(texaddr, sizeInRAM);
FramebufData framebuf{ texaddr, bufw, flags };
framebufData.resize(sizeof(framebuf) + bytes);
memcpy(&framebufData[0], &framebuf, sizeof(framebuf));
@ -468,12 +475,33 @@ static void EmitTransfer(u32 op) {
static void EmitClut(u32 op) {
u32 addr = gstate.getClutAddress();
// Hardware rendering may be using a framebuffer as CLUT.
// To get at this, we first run the command (normally we're called right before it has run.)
if (Memory::IsVRAMAddress(addr))
gpuDebug->SetCmdValue(op);
// Actually should only be 0x3F, but we allow enhanced CLUTs. See #15727.
u32 blocks = (op & 0x7F) == 0x40 ? 0x40 : (op & 0x3F);
u32 bytes = blocks * 32;
bytes = Memory::ValidSize(addr, bytes);
if (bytes != 0) {
// Send the original address so VRAM can be reasoned about.
if (Memory::IsVRAMAddress(addr)) {
struct ClutAddrData {
u32 addr;
u32 flags;
};
u32 flags = GetTargetFlags(addr, bytes);
ClutAddrData data{ addr, flags };
FlushRegisters();
Command cmd{CommandType::CLUTADDR, sizeof(data), (u32)pushbuf.size()};
pushbuf.resize(pushbuf.size() + sizeof(data));
memcpy(pushbuf.data() + cmd.ptr, &data, sizeof(data));
commands.push_back(cmd);
}
EmitCommandWithRAM(CommandType::CLUT, Memory::GetPointerUnchecked(addr), bytes, 16);
}

View File

@ -49,6 +49,7 @@ enum class CommandType : u8 {
MEMCPYDEST = 7,
MEMCPYDATA = 8,
DISPLAY = 9,
CLUTADDR = 10,
TEXTURE0 = 0x10,
TEXTURE1 = 0x11,