From 1e65a691f4154e94625dc9e359b0b267451eb57a Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 27 Oct 2013 14:43:58 -0700 Subject: [PATCH 1/3] Cap the number of vertexes per flush. Might not be realistic, but we crash if we go over. Pretty unlikely to happen in real games, but I suppose not impossible. Happens in the vertex speed demo (#3106.) --- GPU/Directx9/TransformPipelineDX9.cpp | 12 ++++++++---- GPU/Directx9/TransformPipelineDX9.h | 1 + GPU/GLES/TransformPipeline.cpp | 12 ++++++++---- GPU/GLES/TransformPipeline.h | 1 + 4 files changed, 18 insertions(+), 8 deletions(-) diff --git a/GPU/Directx9/TransformPipelineDX9.cpp b/GPU/Directx9/TransformPipelineDX9.cpp index 47b7dde2e9..af9ddaf3f4 100644 --- a/GPU/Directx9/TransformPipelineDX9.cpp +++ b/GPU/Directx9/TransformPipelineDX9.cpp @@ -69,9 +69,10 @@ int D3DPrimCount(D3DPRIMITIVETYPE prim, int size) { } enum { - DECODED_VERTEX_BUFFER_SIZE = 65536 * 48, - DECODED_INDEX_BUFFER_SIZE = 65536 * 20, - TRANSFORMED_VERTEX_BUFFER_SIZE = 65536 * sizeof(TransformedVertex) + VERTEX_BUFFER_MAX = 65536, + DECODED_VERTEX_BUFFER_SIZE = VERTEX_BUFFER_MAX * 48, + DECODED_INDEX_BUFFER_SIZE = VERTEX_BUFFER_MAX * 20, + TRANSFORMED_VERTEX_BUFFER_SIZE = VERTEX_BUFFER_MAX * sizeof(TransformedVertex) }; @@ -90,6 +91,7 @@ TransformDrawEngineDX9::TransformDrawEngineDX9() textureCache_(0), framebufferManager_(0), numDrawCalls(0), + vertexCountInDrawCalls(0), uvScale(0) { decimationCounter_ = VERTEXCACHE_DECIMATION_INTERVAL; // Allocate nicely aligned memory. Maybe graphics drivers will @@ -914,7 +916,7 @@ void TransformDrawEngineDX9::SubmitPrim(void *verts, void *inds, GEPrimitiveType if (vertexCount == 0) return; // we ignore zero-sized draw calls. - if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawCalls >= MAX_DEFERRED_DRAW_CALLS) + if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawCalls >= MAX_DEFERRED_DRAW_CALLS || vertexCountInDrawCalls + vertexCount > VERTEX_BUFFER_MAX) Flush(); // TODO: Is this the right thing to do? @@ -951,6 +953,7 @@ void TransformDrawEngineDX9::SubmitPrim(void *verts, void *inds, GEPrimitiveType uvScale[numDrawCalls] = gstate_c.uv; } numDrawCalls++; + vertexCountInDrawCalls += vertexCount; } void TransformDrawEngineDX9::DecodeVerts() { @@ -1330,6 +1333,7 @@ rotateVBO: indexGen.Reset(); collectedVerts = 0; numDrawCalls = 0; + vertexCountInDrawCalls = 0; prevPrim_ = GE_PRIM_INVALID; #ifndef _XBOX diff --git a/GPU/Directx9/TransformPipelineDX9.h b/GPU/Directx9/TransformPipelineDX9.h index 352cb237d1..ad42803697 100644 --- a/GPU/Directx9/TransformPipelineDX9.h +++ b/GPU/Directx9/TransformPipelineDX9.h @@ -186,6 +186,7 @@ private: enum { MAX_DEFERRED_DRAW_CALLS = 128 }; DeferredDrawCall drawCalls[MAX_DEFERRED_DRAW_CALLS]; int numDrawCalls; + int vertexCountInDrawCalls; int decimationCounter_; diff --git a/GPU/GLES/TransformPipeline.cpp b/GPU/GLES/TransformPipeline.cpp index 4a0927ef91..cea9ed579e 100644 --- a/GPU/GLES/TransformPipeline.cpp +++ b/GPU/GLES/TransformPipeline.cpp @@ -99,9 +99,10 @@ const GLuint glprim[8] = { }; enum { - DECODED_VERTEX_BUFFER_SIZE = 65536 * 48, - DECODED_INDEX_BUFFER_SIZE = 65536 * 20, - TRANSFORMED_VERTEX_BUFFER_SIZE = 65536 * sizeof(TransformedVertex) + VERTEX_BUFFER_MAX = 65536, + DECODED_VERTEX_BUFFER_SIZE = VERTEX_BUFFER_MAX * 48, + DECODED_INDEX_BUFFER_SIZE = VERTEX_BUFFER_MAX * 20, + TRANSFORMED_VERTEX_BUFFER_SIZE = VERTEX_BUFFER_MAX * sizeof(TransformedVertex) }; #define QUAD_INDICES_MAX 32768 @@ -123,6 +124,7 @@ TransformDrawEngine::TransformDrawEngine() textureCache_(0), framebufferManager_(0), numDrawCalls(0), + vertexCountInDrawCalls(0), uvScale(0) { decimationCounter_ = VERTEXCACHE_DECIMATION_INTERVAL; // Allocate nicely aligned memory. Maybe graphics drivers will @@ -914,7 +916,7 @@ void TransformDrawEngine::SubmitPrim(void *verts, void *inds, GEPrimitiveType pr if (vertexCount == 0) return; // we ignore zero-sized draw calls. - if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawCalls >= MAX_DEFERRED_DRAW_CALLS) + if (!indexGen.PrimCompatible(prevPrim_, prim) || numDrawCalls >= MAX_DEFERRED_DRAW_CALLS || vertexCountInDrawCalls + vertexCount > VERTEX_BUFFER_MAX) Flush(); // TODO: Is this the right thing to do? @@ -951,6 +953,7 @@ void TransformDrawEngine::SubmitPrim(void *verts, void *inds, GEPrimitiveType pr uvScale[numDrawCalls] = gstate_c.uv; } numDrawCalls++; + vertexCountInDrawCalls += vertexCount; } void TransformDrawEngine::DecodeVerts() { @@ -1326,6 +1329,7 @@ rotateVBO: indexGen.Reset(); collectedVerts = 0; numDrawCalls = 0; + vertexCountInDrawCalls = 0; prevPrim_ = GE_PRIM_INVALID; #ifndef USING_GLES2 diff --git a/GPU/GLES/TransformPipeline.h b/GPU/GLES/TransformPipeline.h index 0cb0866a0f..12d48f94c7 100644 --- a/GPU/GLES/TransformPipeline.h +++ b/GPU/GLES/TransformPipeline.h @@ -191,6 +191,7 @@ private: enum { MAX_DEFERRED_DRAW_CALLS = 128 }; DeferredDrawCall drawCalls[MAX_DEFERRED_DRAW_CALLS]; int numDrawCalls; + int vertexCountInDrawCalls; int decimationCounter_; From 98e257c95da04de4985160f80c5362281fd0bf41 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 27 Oct 2013 15:05:57 -0700 Subject: [PATCH 2/3] Add GPU cycle estimation to the softgpu. --- GPU/Software/SoftGpu.cpp | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp index 54e6585e07..9ada32d821 100644 --- a/GPU/Software/SoftGpu.cpp +++ b/GPU/Software/SoftGpu.cpp @@ -268,6 +268,32 @@ void SoftGPU::FastRunLoop(DisplayList &list) { } } +int EstimatePerVertexCost() { + // TODO: This is transform cost, also account for rasterization cost somehow... although it probably + // runs in parallel with transform. + + // Also, this is all pure guesswork. If we can find a way to do measurements, that would be great. + + // GTA wants a low value to run smooth, GoW wants a high value (otherwise it thinks things + // went too fast and starts doing all the work over again). + + int cost = 20; + if (gstate.isLightingEnabled()) { + cost += 10; + } + + for (int i = 0; i < 4; i++) { + if (gstate.isLightChanEnabled(i)) + cost += 10; + } + if (gstate.getUVGenMode() != GE_TEXMAP_TEXTURE_COORDS) { + cost += 20; + } + // TODO: morphcount + + return cost; +} + void SoftGPU::ExecuteOp(u32 op, u32 diff) { u32 cmd = op >> 24; @@ -303,6 +329,7 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) if (type != GE_PRIM_TRIANGLES && type != GE_PRIM_TRIANGLE_STRIP && type != GE_PRIM_TRIANGLE_FAN && type != GE_PRIM_RECTANGLES) { ERROR_LOG_REPORT(G3D, "Software: DL DrawPrim type: %s count: %i vaddr= %08x, iaddr= %08x", type<7 ? types[type] : "INVALID", count, gstate_c.vertexAddr, gstate_c.indexAddr); + cyclesExecuted += EstimatePerVertexCost() * count; break; } @@ -321,6 +348,7 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) indices = Memory::GetPointer(gstate_c.indexAddr); } + cyclesExecuted += EstimatePerVertexCost() * count; TransformUnit::SubmitPrimitive(verts, indices, type, count, gstate.vertType); } break; From b62e36e997f93d7361452edec13e47a3eac60292 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 27 Oct 2013 16:50:03 -0700 Subject: [PATCH 3/3] Support frameskip in softgpu. --- GPU/Software/SoftGpu.cpp | 8 ++++++-- GPU/Software/SoftGpu.h | 4 ++++ 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/GPU/Software/SoftGpu.cpp b/GPU/Software/SoftGpu.cpp index 9ada32d821..24044d6e9c 100644 --- a/GPU/Software/SoftGpu.cpp +++ b/GPU/Software/SoftGpu.cpp @@ -349,7 +349,9 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) } cyclesExecuted += EstimatePerVertexCost() * count; - TransformUnit::SubmitPrimitive(verts, indices, type, count, gstate.vertType); + if (!(gstate_c.skipDrawReason & SKIPDRAW_SKIPFRAME)) { + TransformUnit::SubmitPrimitive(verts, indices, type, count, gstate.vertType); + } } break; @@ -389,7 +391,9 @@ void SoftGPU::ExecuteOp(u32 op, u32 diff) break; } - TransformUnit::SubmitSpline(control_points, indices, sp_ucount, sp_vcount, sp_utype, sp_vtype, gstate.getPatchPrimitiveType(), gstate.vertType); + if (!(gstate_c.skipDrawReason & SKIPDRAW_SKIPFRAME)) { + TransformUnit::SubmitSpline(control_points, indices, sp_ucount, sp_vcount, sp_utype, sp_vtype, gstate.getPatchPrimitiveType(), gstate.vertType); + } DEBUG_LOG(G3D,"DL DRAW SPLINE: %i x %i, %i x %i", sp_ucount, sp_vcount, sp_utype, sp_vtype); } break; diff --git a/GPU/Software/SoftGpu.h b/GPU/Software/SoftGpu.h index 2359c7323e..99e4c52b12 100644 --- a/GPU/Software/SoftGpu.h +++ b/GPU/Software/SoftGpu.h @@ -73,6 +73,10 @@ public: fullInfo = "Software"; } + virtual bool FramebufferReallyDirty() { + return !(gstate_c.skipDrawReason & SKIPDRAW_SKIPFRAME); + } + virtual bool GetCurrentFramebuffer(GPUDebugBuffer &buffer); virtual bool GetCurrentDepthbuffer(GPUDebugBuffer &buffer); virtual bool GetCurrentStencilbuffer(GPUDebugBuffer &buffer);