mirror of
https://github.com/hrydgard/ppsspp.git
synced 2024-11-23 13:30:02 +00:00
Debugger: Track unchanged VRAM to avoid copy.
See #15251, the framedump here reused a VRAM texture for many draws, and this caused it to recopy the texture over and over again. Use a flag to skip if possible.
This commit is contained in:
parent
eb765a80f8
commit
539e2bbbf8
@ -564,8 +564,10 @@ void DumpExecute::Framebuf(int level, u32 ptr, u32 sz) {
|
||||
u32 headerSize = (u32)sizeof(FramebufData);
|
||||
u32 pspSize = sz - headerSize;
|
||||
const bool isTarget = (framebuf->flags & 1) != 0;
|
||||
const bool unchangedVRAM = (framebuf->flags & 2) != 0;
|
||||
// TODO: Could use drawnVRAM flag, but it can be wrong.
|
||||
// Could potentially always skip if !isTarget, but playing it safe for offset texture behavior.
|
||||
if (Memory::IsValidRange(framebuf->addr, pspSize) && (!isTarget || !g_Config.bSoftwareRendering)) {
|
||||
if (Memory::IsValidRange(framebuf->addr, pspSize) && !unchangedVRAM && (!isTarget || !g_Config.bSoftwareRendering)) {
|
||||
// Intentionally don't trigger an upload here.
|
||||
Memory::MemcpyUnchecked(framebuf->addr, pushbuf_.data() + ptr + headerSize, pspSize);
|
||||
}
|
||||
|
@ -57,6 +57,17 @@ static std::vector<u32> lastRegisters;
|
||||
static std::vector<u32> lastTextures;
|
||||
static std::set<u32> lastRenderTargets;
|
||||
|
||||
enum class DirtyVRAMFlag : uint8_t {
|
||||
CLEAN = 0,
|
||||
DIRTY = 1,
|
||||
DRAWN = 2,
|
||||
};
|
||||
static constexpr uint32_t DIRTY_VRAM_SHIFT = 8;
|
||||
static constexpr uint32_t DIRTY_VRAM_ROUND = (1 << DIRTY_VRAM_SHIFT) - 1;
|
||||
static constexpr uint32_t DIRTY_VRAM_SIZE = (2 * 1024 * 1024) >> DIRTY_VRAM_SHIFT;
|
||||
static constexpr uint32_t DIRTY_VRAM_MASK = (2 * 1024 * 1024 - 1) >> DIRTY_VRAM_SHIFT;
|
||||
static DirtyVRAMFlag dirtyVRAM[DIRTY_VRAM_SIZE];
|
||||
|
||||
static void FlushRegisters() {
|
||||
if (!lastRegisters.empty()) {
|
||||
Command last{CommandType::REGISTERS};
|
||||
@ -90,6 +101,43 @@ static Path GenRecordingFilename() {
|
||||
return dumpDir / StringFromFormat("%s_%04d.ppdmp", prefix.c_str(), 9999);
|
||||
}
|
||||
|
||||
static void DirtyAllVRAM(DirtyVRAMFlag flag) {
|
||||
for (uint32_t i = 0; i < DIRTY_VRAM_SIZE; ++i)
|
||||
dirtyVRAM[i] = flag;
|
||||
}
|
||||
|
||||
static void DirtyVRAM(u32 start, u32 sz, DirtyVRAMFlag flag) {
|
||||
u32 count = (sz + DIRTY_VRAM_ROUND) >> DIRTY_VRAM_SHIFT;
|
||||
u32 first = (start >> 10) & DIRTY_VRAM_MASK;
|
||||
if (first + count > DIRTY_VRAM_SIZE) {
|
||||
DirtyAllVRAM(flag);
|
||||
return;
|
||||
}
|
||||
|
||||
for (u32 i = 0; i < count; ++i)
|
||||
dirtyVRAM[first + i] = flag;
|
||||
}
|
||||
|
||||
static void DirtyDrawnVRAM() {
|
||||
int w = std::max(gstate.getScissorX2(), gstate.getRegionX2()) + 1;
|
||||
int h = std::max(gstate.getScissorY2(), gstate.getRegionY2()) + 1;
|
||||
|
||||
bool drawZ = gstate.isDepthWriteEnabled() && gstate.isDepthTestEnabled();
|
||||
bool clearZ = gstate.isModeClear() && gstate.isClearModeDepthMask();
|
||||
if (drawZ || clearZ) {
|
||||
int bytes = 2 * gstate.DepthBufStride() * h;
|
||||
if (w > gstate.DepthBufStride())
|
||||
bytes += 2 * (w - gstate.DepthBufStride());
|
||||
DirtyVRAM(gstate.getDepthBufAddress(), bytes, DirtyVRAMFlag::DRAWN);
|
||||
}
|
||||
|
||||
int bpp = gstate.FrameBufFormat() == GE_FORMAT_8888 ? 4 : 2;
|
||||
int bytes = bpp * gstate.FrameBufStride() * h;
|
||||
if (w > gstate.FrameBufStride())
|
||||
bytes += bpp * (w - gstate.FrameBufStride());
|
||||
DirtyVRAM(gstate.getFrameBufAddress(), bytes, DirtyVRAMFlag::DRAWN);
|
||||
}
|
||||
|
||||
static void BeginRecording() {
|
||||
active = true;
|
||||
nextFrame = false;
|
||||
@ -103,6 +151,7 @@ static void BeginRecording() {
|
||||
gstate.Save((u32_le *)(pushbuf.data() + ptr));
|
||||
|
||||
commands.push_back({CommandType::INIT, sz, ptr});
|
||||
DirtyAllVRAM(DirtyVRAMFlag::DIRTY);
|
||||
}
|
||||
|
||||
static void WriteCompressed(FILE *fp, const void *p, size_t sz) {
|
||||
@ -278,8 +327,22 @@ static void EmitTextureData(int level, u32 texaddr) {
|
||||
u32 pad;
|
||||
};
|
||||
|
||||
bool isDirtyVRAM = false;
|
||||
bool isDrawnVRAM = false;
|
||||
for (uint32_t i = 0; i < (sizeInRAM + DIRTY_VRAM_ROUND) >> DIRTY_VRAM_SHIFT; ++i) {
|
||||
DirtyVRAMFlag flag = dirtyVRAM[(texaddr >> DIRTY_VRAM_SHIFT) + i];
|
||||
isDirtyVRAM = isDirtyVRAM || flag != DirtyVRAMFlag::CLEAN;
|
||||
isDrawnVRAM = isDrawnVRAM || flag == DirtyVRAMFlag::DRAWN;
|
||||
}
|
||||
|
||||
// The isTarget flag is mostly used for replay of dumps on a PSP.
|
||||
u32 flags = isTarget ? 1 : 0;
|
||||
// The unchangedVRAM flag tells us we can skip recopying.
|
||||
if (!isDirtyVRAM)
|
||||
flags |= 2;
|
||||
// And the drawn flag tells us this data was potentially drawn to.
|
||||
if (isDrawnVRAM)
|
||||
flags |= 4;
|
||||
FramebufData framebuf{ texaddr, bufw, flags };
|
||||
framebufData.resize(sizeof(framebuf) + bytes);
|
||||
memcpy(&framebufData[0], &framebuf, sizeof(framebuf));
|
||||
@ -352,7 +415,8 @@ static void EmitTransfer(u32 op) {
|
||||
FlushRegisters();
|
||||
|
||||
// This may not make a lot of sense right now, unless it's to a framebuf...
|
||||
if (!Memory::IsVRAMAddress(gstate.getTransferDstAddress())) {
|
||||
u32 dstBasePtr = gstate.getTransferDstAddress();
|
||||
if (!Memory::IsVRAMAddress(dstBasePtr)) {
|
||||
// Skip, not VRAM, so can't affect drawing (we flush textures each prim.)
|
||||
return;
|
||||
}
|
||||
@ -361,6 +425,9 @@ static void EmitTransfer(u32 op) {
|
||||
u32 srcStride = gstate.getTransferSrcStride();
|
||||
int srcX = gstate.getTransferSrcX();
|
||||
int srcY = gstate.getTransferSrcY();
|
||||
u32 dstStride = gstate.getTransferDstStride();
|
||||
int dstX = gstate.getTransferDstX();
|
||||
int dstY = gstate.getTransferDstY();
|
||||
int width = gstate.getTransferWidth();
|
||||
int height = gstate.getTransferHeight();
|
||||
int bpp = gstate.getTransferBpp();
|
||||
@ -368,8 +435,12 @@ static void EmitTransfer(u32 op) {
|
||||
u32 srcBytes = ((srcY + height - 1) * srcStride + (srcX + width)) * bpp;
|
||||
srcBytes = Memory::ValidSize(srcBasePtr, srcBytes);
|
||||
|
||||
u32 dstBytes = ((dstY + height - 1) * dstStride + (dstX + width)) * bpp;
|
||||
dstBytes = Memory::ValidSize(dstBasePtr, dstBytes);
|
||||
|
||||
if (srcBytes != 0) {
|
||||
EmitCommandWithRAM(CommandType::TRANSFERSRC, Memory::GetPointerUnchecked(srcBasePtr), srcBytes, 16);
|
||||
DirtyVRAM(dstBasePtr, dstBytes, DirtyVRAMFlag::DIRTY);
|
||||
}
|
||||
|
||||
lastRegisters.push_back(op);
|
||||
@ -391,6 +462,7 @@ static void EmitPrim(u32 op) {
|
||||
FlushPrimState(op & 0x0000FFFF);
|
||||
|
||||
lastRegisters.push_back(op);
|
||||
DirtyDrawnVRAM();
|
||||
}
|
||||
|
||||
static void EmitBezierSpline(u32 op) {
|
||||
@ -399,6 +471,7 @@ static void EmitBezierSpline(u32 op) {
|
||||
FlushPrimState(ucount * vcount);
|
||||
|
||||
lastRegisters.push_back(op);
|
||||
DirtyDrawnVRAM();
|
||||
}
|
||||
|
||||
bool IsActive() {
|
||||
@ -503,6 +576,7 @@ void NotifyMemcpy(u32 dest, u32 src, u32 sz) {
|
||||
sz = Memory::ValidSize(dest, sz);
|
||||
if (sz != 0) {
|
||||
EmitCommandWithRAM(CommandType::MEMCPYDATA, Memory::GetPointer(dest), sz, 1);
|
||||
DirtyVRAM(dest, sz, DirtyVRAMFlag::DIRTY);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -525,6 +599,7 @@ void NotifyMemset(u32 dest, int v, u32 sz) {
|
||||
Command cmd{CommandType::MEMSET, sizeof(data), (u32)pushbuf.size()};
|
||||
pushbuf.resize(pushbuf.size() + sizeof(data));
|
||||
memcpy(pushbuf.data() + cmd.ptr, &data, sizeof(data));
|
||||
DirtyVRAM(dest, sz, DirtyVRAMFlag::DIRTY);
|
||||
}
|
||||
}
|
||||
|
||||
@ -533,6 +608,8 @@ void NotifyUpload(u32 dest, u32 sz) {
|
||||
return;
|
||||
}
|
||||
NotifyMemcpy(dest, dest, sz);
|
||||
if (Memory::IsVRAMAddress(dest))
|
||||
DirtyVRAM(dest, sz, DirtyVRAMFlag::DIRTY);
|
||||
}
|
||||
|
||||
static bool HasDrawCommands() {
|
||||
@ -618,4 +695,8 @@ void NotifyFrame() {
|
||||
}
|
||||
}
|
||||
|
||||
void NotifyCPU() {
|
||||
DirtyAllVRAM(DirtyVRAMFlag::DIRTY);
|
||||
}
|
||||
|
||||
};
|
||||
|
@ -37,5 +37,6 @@ void NotifyMemset(u32 dest, int v, u32 sz);
|
||||
void NotifyUpload(u32 dest, u32 sz);
|
||||
void NotifyDisplay(u32 addr, int stride, int fmt);
|
||||
void NotifyFrame();
|
||||
void NotifyCPU();
|
||||
|
||||
};
|
||||
|
@ -1021,6 +1021,8 @@ bool GPUCommon::InterpretList(DisplayList &list) {
|
||||
}
|
||||
|
||||
FinishDeferred();
|
||||
if (debugRecording_)
|
||||
GPURecord::NotifyCPU();
|
||||
|
||||
// We haven't run the op at list.pc, so it shouldn't count.
|
||||
if (cycleLastPC != list.pc) {
|
||||
|
Loading…
Reference in New Issue
Block a user