From ab6bc4cb165bc3707582d892ab8e798425a8b00a Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sat, 28 Jan 2017 11:39:34 +0100 Subject: [PATCH] Restores a lost optimization from uv prescale - avoiding flushing. Should fix #9219. Also #ifdef away some invalid-address checks in hot paths in the GPU. --- GPU/Directx9/GPU_DX9.cpp | 34 +++++++++---------------------- GPU/GLES/GPU_GLES.cpp | 43 ++++++++++++--------------------------- GPU/GLES/GPU_GLES.h | 5 ----- GPU/GPUCommon.cpp | 30 +++++++++++++++++++++++---- GPU/GPUCommon.h | 21 +++++++++++++++++++ GPU/Vulkan/GPU_Vulkan.cpp | 34 +++++++++---------------------- GPU/Vulkan/GPU_Vulkan.h | 4 ---- 7 files changed, 80 insertions(+), 91 deletions(-) diff --git a/GPU/Directx9/GPU_DX9.cpp b/GPU/Directx9/GPU_DX9.cpp index 27f299e39..002c222a6 100644 --- a/GPU/Directx9/GPU_DX9.cpp +++ b/GPU/Directx9/GPU_DX9.cpp @@ -86,10 +86,10 @@ static const CommandTableEntry commandTable[] = { // Changes that dirty texture scaling. { GE_CMD_TEXMAPMODE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_UVSCALEOFFSET }, - { GE_CMD_TEXSCALEU, FLAG_EXECUTEONCHANGE, DIRTY_UVSCALEOFFSET, &GPU_DX9::Execute_TexScaleU }, - { GE_CMD_TEXSCALEV, FLAG_EXECUTEONCHANGE, DIRTY_UVSCALEOFFSET, &GPU_DX9::Execute_TexScaleV }, - { GE_CMD_TEXOFFSETU, FLAG_EXECUTEONCHANGE, DIRTY_UVSCALEOFFSET, &GPU_DX9::Execute_TexOffsetU }, - { GE_CMD_TEXOFFSETV, FLAG_EXECUTEONCHANGE, DIRTY_UVSCALEOFFSET, &GPU_DX9::Execute_TexOffsetV }, + { GE_CMD_TEXSCALEU, FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_TexScaleU }, + { GE_CMD_TEXSCALEV, FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_TexScaleV }, + { GE_CMD_TEXOFFSETU, FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_TexOffsetU }, + { GE_CMD_TEXOFFSETV, FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_TexOffsetV }, // Changes that dirty the current texture. { GE_CMD_TEXSIZE0, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPU_DX9::Execute_TexSize0 }, @@ -741,6 +741,8 @@ void GPU_DX9::Execute_VertexTypeSkinning(u32 op, u32 diff) { } void GPU_DX9::Execute_Prim(u32 op, u32 diff) { + SetDrawType(DRAW_PRIM); + // This drives all drawing. All other state we just buffer up, then we apply it only // when it's time to draw. As most PSP games set state redundantly ALL THE TIME, this is a huge optimization. @@ -810,6 +812,8 @@ void GPU_DX9::Execute_Prim(u32 op, u32 diff) { } void GPU_DX9::Execute_Bezier(u32 op, u32 diff) { + SetDrawType(DRAW_BEZIER); + // This also make skipping drawing very effective. framebufferManagerDX9_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason); if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) { @@ -853,6 +857,8 @@ void GPU_DX9::Execute_Bezier(u32 op, u32 diff) { } void GPU_DX9::Execute_Spline(u32 op, u32 diff) { + SetDrawType(DRAW_SPLINE); + // This also make skipping drawing very effective. framebufferManagerDX9_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason); if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) { @@ -898,26 +904,6 @@ void GPU_DX9::Execute_Spline(u32 op, u32 diff) { AdvanceVerts(gstate.vertType, count, bytesRead); } -void GPU_DX9::Execute_TexScaleU(u32 op, u32 diff) { - gstate_c.uv.uScale = getFloat24(op); - gstate_c.Dirty(DIRTY_UVSCALEOFFSET); -} - -void GPU_DX9::Execute_TexScaleV(u32 op, u32 diff) { - gstate_c.uv.vScale = getFloat24(op); - gstate_c.Dirty(DIRTY_UVSCALEOFFSET); -} - -void GPU_DX9::Execute_TexOffsetU(u32 op, u32 diff) { - gstate_c.uv.uOff = getFloat24(op); - gstate_c.Dirty(DIRTY_UVSCALEOFFSET); -} - -void GPU_DX9::Execute_TexOffsetV(u32 op, u32 diff) { - gstate_c.uv.vOff = getFloat24(op); - gstate_c.Dirty(DIRTY_UVSCALEOFFSET); -} - void GPU_DX9::Execute_TexSize0(u32 op, u32 diff) { // Render to texture may have overridden the width/height. // Don't reset it unless the size is different / the texture has changed. diff --git a/GPU/GLES/GPU_GLES.cpp b/GPU/GLES/GPU_GLES.cpp index a3eaab44b..6620fe864 100644 --- a/GPU/GLES/GPU_GLES.cpp +++ b/GPU/GLES/GPU_GLES.cpp @@ -87,15 +87,14 @@ static const CommandTableEntry commandTable[] = { {GE_CMD_MINZ, FLAG_FLUSHBEFOREONCHANGE, DIRTY_DEPTHRANGE}, {GE_CMD_MAXZ, FLAG_FLUSHBEFOREONCHANGE, DIRTY_DEPTHRANGE}, - // Changes that dirty texture scaling. - {GE_CMD_TEXMAPMODE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_UVSCALEOFFSET}, - {GE_CMD_TEXSCALEU, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, DIRTY_UVSCALEOFFSET, &GPU_GLES::Execute_TexScaleU}, - {GE_CMD_TEXSCALEV, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, DIRTY_UVSCALEOFFSET, &GPU_GLES::Execute_TexScaleV}, - {GE_CMD_TEXOFFSETU, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, DIRTY_UVSCALEOFFSET, &GPU_GLES::Execute_TexOffsetU}, - {GE_CMD_TEXOFFSETV, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, DIRTY_UVSCALEOFFSET, &GPU_GLES::Execute_TexOffsetV}, + {GE_CMD_TEXMAPMODE, FLAG_FLUSHBEFOREONCHANGE, 0}, + {GE_CMD_TEXSCALEU, FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_TexScaleU}, + {GE_CMD_TEXSCALEV, FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_TexScaleV}, + {GE_CMD_TEXOFFSETU, FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_TexOffsetU}, + {GE_CMD_TEXOFFSETV, FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_TexOffsetV}, // Changes that dirty the current texture. - {GE_CMD_TEXSIZE0, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPU_GLES::Execute_TexSize0}, + {GE_CMD_TEXSIZE0, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, DIRTY_UVSCALEOFFSET, &GPU_GLES::Execute_TexSize0}, {GE_CMD_TEXSIZE1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS}, {GE_CMD_TEXSIZE2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS}, {GE_CMD_TEXSIZE3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS}, @@ -910,6 +909,8 @@ void GPU_GLES::Execute_Iaddr(u32 op, u32 diff) { } void GPU_GLES::Execute_Prim(u32 op, u32 diff) { + SetDrawType(DRAW_PRIM); + // This drives all drawing. All other state we just buffer up, then we apply it only // when it's time to draw. As most PSP games set state redundantly ALL THE TIME, this is a huge optimization. @@ -1000,6 +1001,8 @@ void GPU_GLES::Execute_VertexTypeSkinning(u32 op, u32 diff) { } void GPU_GLES::Execute_Bezier(u32 op, u32 diff) { + SetDrawType(DRAW_BEZIER); + // This also make skipping drawing very effective. framebufferManagerGL_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason); if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) { @@ -1054,6 +1057,8 @@ void GPU_GLES::Execute_Bezier(u32 op, u32 diff) { } void GPU_GLES::Execute_Spline(u32 op, u32 diff) { + SetDrawType(DRAW_SPLINE); + // This also make skipping drawing very effective. framebufferManagerGL_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason); if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) { @@ -1122,35 +1127,13 @@ void GPU_GLES::Execute_Spline(u32 op, u32 diff) { AdvanceVerts(gstate.vertType, count, bytesRead); } -void GPU_GLES::Execute_TexScaleU(u32 op, u32 diff) { - gstate_c.uv.uScale = getFloat24(op); - gstate_c.Dirty(DIRTY_UVSCALEOFFSET); -} - -void GPU_GLES::Execute_TexScaleV(u32 op, u32 diff) { - gstate_c.uv.vScale = getFloat24(op); - gstate_c.Dirty(DIRTY_UVSCALEOFFSET); -} - -void GPU_GLES::Execute_TexOffsetU(u32 op, u32 diff) { - gstate_c.uv.uOff = getFloat24(op); - gstate_c.Dirty(DIRTY_UVSCALEOFFSET); -} - -void GPU_GLES::Execute_TexOffsetV(u32 op, u32 diff) { - gstate_c.uv.vOff = getFloat24(op); - gstate_c.Dirty(DIRTY_UVSCALEOFFSET); -} - void GPU_GLES::Execute_TexSize0(u32 op, u32 diff) { // Render to texture may have overridden the width/height. // Don't reset it unless the size is different / the texture has changed. if (diff || gstate_c.IsDirty(DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS)) { gstate_c.curTextureWidth = gstate.getTextureWidth(0); gstate_c.curTextureHeight = gstate.getTextureHeight(0); - gstate_c.Dirty(DIRTY_UVSCALEOFFSET); - // We will need to reset the texture now. - gstate_c.Dirty(DIRTY_TEXTURE_PARAMS); + gstate_c.Dirty(DIRTY_UVSCALEOFFSET | DIRTY_TEXTURE_PARAMS); } } diff --git a/GPU/GLES/GPU_GLES.h b/GPU/GLES/GPU_GLES.h index e6e9d4cd5..5efa75ea3 100644 --- a/GPU/GLES/GPU_GLES.h +++ b/GPU/GLES/GPU_GLES.h @@ -93,11 +93,6 @@ public: void Execute_LoadClut(u32 op, u32 diff); void Execute_VertexType(u32 op, u32 diff); void Execute_VertexTypeSkinning(u32 op, u32 diff); - - void Execute_TexScaleU(u32 op, u32 diff); - void Execute_TexScaleV(u32 op, u32 diff); - void Execute_TexOffsetU(u32 op, u32 diff); - void Execute_TexOffsetV(u32 op, u32 diff); void Execute_TexSize0(u32 op, u32 diff); // Using string because it's generic - makes no assumptions on the size of the shader IDs of this backend. diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index 54e688670..1c2da2b74 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -844,12 +844,14 @@ void GPUCommon::Execute_Origin(u32 op, u32 diff) { void GPUCommon::Execute_Jump(u32 op, u32 diff) { easy_guard guard(listLock); const u32 target = gstate_c.getRelativeAddress(op & 0x00FFFFFC); - if (Memory::IsValidAddress(target)) { - UpdatePC(currentList->pc, target - 4); - currentList->pc = target - 4; // pc will be increased after we return, counteract that - } else { +#ifdef _DEBUG + if (!Memory::IsValidAddress(target)) { ERROR_LOG_REPORT(G3D, "JUMP to illegal address %08x - ignoring! data=%06x", target, op & 0x00FFFFFF); + return; } +#endif + UpdatePC(currentList->pc, target - 4); + currentList->pc = target - 4; // pc will be increased after we return, counteract that } void GPUCommon::Execute_BJump(u32 op, u32 diff) { @@ -872,10 +874,12 @@ void GPUCommon::Execute_Call(u32 op, u32 diff) { // Saint Seiya needs correct support for relative calls. const u32 retval = currentList->pc + 4; const u32 target = gstate_c.getRelativeAddress(op & 0x00FFFFFC); +#ifdef _DEBUG if (!Memory::IsValidAddress(target)) { ERROR_LOG_REPORT(G3D, "CALL to illegal address %08x - ignoring! data=%06x", target, op & 0x00FFFFFF); return; } +#endif // Bone matrix optimization - many games will CALL a bone matrix (!). if ((Memory::ReadUnchecked_U32(target) >> 24) == GE_CMD_BONEMATRIXDATA) { @@ -911,10 +915,12 @@ void GPUCommon::Execute_Ret(u32 op, u32 diff) { const u32 target = stackEntry.pc & 0x0FFFFFFF; UpdatePC(currentList->pc, target - 4); currentList->pc = target - 4; +#ifdef _DEBUG if (!Memory::IsValidAddress(currentList->pc)) { ERROR_LOG_REPORT(G3D, "Invalid DL PC %08x on return", currentList->pc); UpdateState(GPUSTATE_ERROR); } +#endif } } @@ -1075,6 +1081,22 @@ void GPUCommon::Execute_End(u32 op, u32 diff) { } } +void GPUCommon::Execute_TexScaleU(u32 op, u32 diff) { + gstate_c.uv.uScale = getFloat24(op); +} + +void GPUCommon::Execute_TexScaleV(u32 op, u32 diff) { + gstate_c.uv.vScale = getFloat24(op); +} + +void GPUCommon::Execute_TexOffsetU(u32 op, u32 diff) { + gstate_c.uv.uOff = getFloat24(op); +} + +void GPUCommon::Execute_TexOffsetV(u32 op, u32 diff) { + gstate_c.uv.vOff = getFloat24(op); +} + void GPUCommon::Execute_Bezier(u32 op, u32 diff) { // This also make skipping drawing very effective. framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason); diff --git a/GPU/GPUCommon.h b/GPU/GPUCommon.h index 851b9a38d..e6ac92ecc 100644 --- a/GPU/GPUCommon.h +++ b/GPU/GPUCommon.h @@ -4,6 +4,7 @@ #include "Common/MemoryUtil.h" #include "Core/ThreadEventQueue.h" #include "GPU/GPUInterface.h" +#include "GPU/GPUState.h" #include "GPU/Common/GPUDebugInterface.h" #if defined(__ANDROID__) @@ -18,6 +19,13 @@ class FramebufferManagerCommon; class TextureCacheCommon; class DrawEngineCommon; +enum DrawType { + DRAW_UNKNOWN, + DRAW_PRIM, + DRAW_SPLINE, + DRAW_BEZIER, +}; + class GPUCommon : public GPUThreadEventQueue, public GPUDebugInterface { public: GPUCommon(); @@ -86,6 +94,11 @@ public: void Execute_BoundingBox(u32 op, u32 diff); void Execute_BlockTransferStart(u32 op, u32 diff); + void Execute_TexScaleU(u32 op, u32 diff); + void Execute_TexScaleV(u32 op, u32 diff); + void Execute_TexOffsetU(u32 op, u32 diff); + void Execute_TexOffsetV(u32 op, u32 diff); + void Execute_WorldMtxNum(u32 op, u32 diff); void Execute_WorldMtxData(u32 op, u32 diff); void Execute_ViewMtxNum(u32 op, u32 diff); @@ -178,6 +191,13 @@ public: } protected: + void SetDrawType(DrawType type) { + if (type != lastDraw_) { + gstate_c.Dirty(DIRTY_UVSCALEOFFSET); + lastDraw_ = type; + } + } + virtual void InitClearInternal() {} virtual void BeginFrameInternal() {} virtual void CopyDisplayToOutputInternal() {} @@ -262,6 +282,7 @@ protected: bool dumpThisFrame_; bool interruptsEnabled_; bool resized_; + DrawType lastDraw_; private: diff --git a/GPU/Vulkan/GPU_Vulkan.cpp b/GPU/Vulkan/GPU_Vulkan.cpp index 9973f917b..d91f0b6e6 100644 --- a/GPU/Vulkan/GPU_Vulkan.cpp +++ b/GPU/Vulkan/GPU_Vulkan.cpp @@ -85,10 +85,10 @@ static const CommandTableEntry commandTable[] = { // Changes that dirty texture scaling. { GE_CMD_TEXMAPMODE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_UVSCALEOFFSET }, - { GE_CMD_TEXSCALEU, FLAG_EXECUTEONCHANGE, DIRTY_UVSCALEOFFSET, &GPU_Vulkan::Execute_TexScaleU }, - { GE_CMD_TEXSCALEV, FLAG_EXECUTEONCHANGE, DIRTY_UVSCALEOFFSET, &GPU_Vulkan::Execute_TexScaleV }, - { GE_CMD_TEXOFFSETU, FLAG_EXECUTEONCHANGE, DIRTY_UVSCALEOFFSET, &GPU_Vulkan::Execute_TexOffsetU }, - { GE_CMD_TEXOFFSETV, FLAG_EXECUTEONCHANGE, DIRTY_UVSCALEOFFSET, &GPU_Vulkan::Execute_TexOffsetV }, + { GE_CMD_TEXSCALEU, FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_TexScaleU }, + { GE_CMD_TEXSCALEV, FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_TexScaleV }, + { GE_CMD_TEXOFFSETU, FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_TexOffsetU }, + { GE_CMD_TEXOFFSETV, FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_TexOffsetV }, // Changes that dirty the current texture. { GE_CMD_TEXSIZE0, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPU_Vulkan::Execute_TexSize0 }, @@ -767,6 +767,8 @@ void GPU_Vulkan::Execute_Iaddr(u32 op, u32 diff) { } void GPU_Vulkan::Execute_Prim(u32 op, u32 diff) { + SetDrawType(DRAW_PRIM); + // This drives all drawing. All other state we just buffer up, then we apply it only // when it's time to draw. As most PSP games set state redundantly ALL THE TIME, this is a huge optimization. @@ -840,6 +842,8 @@ void GPU_Vulkan::Execute_VertexType(u32 op, u32 diff) { } void GPU_Vulkan::Execute_Bezier(u32 op, u32 diff) { + SetDrawType(DRAW_BEZIER); + // This also make skipping drawing very effective. framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason); if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) { @@ -883,6 +887,8 @@ void GPU_Vulkan::Execute_Bezier(u32 op, u32 diff) { } void GPU_Vulkan::Execute_Spline(u32 op, u32 diff) { + SetDrawType(DRAW_SPLINE); + // This also make skipping drawing very effective. framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason); if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) { @@ -928,26 +934,6 @@ void GPU_Vulkan::Execute_Spline(u32 op, u32 diff) { AdvanceVerts(gstate.vertType, count, bytesRead); } -void GPU_Vulkan::Execute_TexScaleU(u32 op, u32 diff) { - gstate_c.uv.uScale = getFloat24(op); - gstate_c.Dirty(DIRTY_UVSCALEOFFSET); -} - -void GPU_Vulkan::Execute_TexScaleV(u32 op, u32 diff) { - gstate_c.uv.vScale = getFloat24(op); - gstate_c.Dirty(DIRTY_UVSCALEOFFSET); -} - -void GPU_Vulkan::Execute_TexOffsetU(u32 op, u32 diff) { - gstate_c.uv.uOff = getFloat24(op); - gstate_c.Dirty(DIRTY_UVSCALEOFFSET); -} - -void GPU_Vulkan::Execute_TexOffsetV(u32 op, u32 diff) { - gstate_c.uv.vOff = getFloat24(op); - gstate_c.Dirty(DIRTY_UVSCALEOFFSET); -} - void GPU_Vulkan::Execute_TexSize0(u32 op, u32 diff) { // Render to texture may have overridden the width/height. // Don't reset it unless the size is different / the texture has changed. diff --git a/GPU/Vulkan/GPU_Vulkan.h b/GPU/Vulkan/GPU_Vulkan.h index 4bf10b297..5ccd08921 100644 --- a/GPU/Vulkan/GPU_Vulkan.h +++ b/GPU/Vulkan/GPU_Vulkan.h @@ -79,10 +79,6 @@ public: void Execute_Bezier(u32 op, u32 diff); void Execute_Spline(u32 op, u32 diff); void Execute_VertexType(u32 op, u32 diff); - void Execute_TexScaleU(u32 op, u32 diff); - void Execute_TexScaleV(u32 op, u32 diff); - void Execute_TexOffsetU(u32 op, u32 diff); - void Execute_TexOffsetV(u32 op, u32 diff); void Execute_TexSize0(u32 op, u32 diff); void Execute_LoadClut(u32 op, u32 diff); void Execute_BoneMtxNum(u32 op, u32 diff);