From dfb91d4532faadda92d38354b70009d577543631 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Fri, 23 Aug 2013 11:26:13 +0200 Subject: [PATCH] Track number of GPU commands executed per call level. This is to get an idea if it would be beneficial to compile and cache display lists. Looks like it would be for some games, for example in GTA it's often 20k/80k/20k commands per frame - that is 100k executed commands per frame that could potentially just be executed instead of interpreted. Likewise in Wipeout. Of course, this means yet another cache with invalidation issues etc.. --- Core/HLE/sceDisplay.cpp | 2 ++ GPU/GPUCommon.cpp | 7 +++++-- GPU/GPUState.h | 2 ++ 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/Core/HLE/sceDisplay.cpp b/Core/HLE/sceDisplay.cpp index 54f7dc3e0..922ba5874 100644 --- a/Core/HLE/sceDisplay.cpp +++ b/Core/HLE/sceDisplay.cpp @@ -274,6 +274,7 @@ void __DisplayGetDebugStats(char stats[2048]) { "Cached Draw calls: %i\n" "Num Tracked Vertex Arrays: %i\n" "Cycles executed: %d (%f per vertex)\n" + "Commands per call level: %i %i %i %i\n" "Vertices Submitted: %i\n" "Cached Vertices Drawn: %i\n" "Uncached Vertices Drawn: %i\n" @@ -296,6 +297,7 @@ void __DisplayGetDebugStats(char stats[2048]) { gpuStats.numTrackedVertexArrays, gpuStats.vertexGPUCycles + gpuStats.otherGPUCycles, vertexAverageCycles, + gpuStats.gpuCommandsAtCallLevel[0],gpuStats.gpuCommandsAtCallLevel[1],gpuStats.gpuCommandsAtCallLevel[2],gpuStats.gpuCommandsAtCallLevel[3], gpuStats.numVertsSubmitted, gpuStats.numCachedVertsDrawn, gpuStats.numUncachedVertsDrawn, diff --git a/GPU/GPUCommon.cpp b/GPU/GPUCommon.cpp index b886bb7eb..830713434 100644 --- a/GPU/GPUCommon.cpp +++ b/GPU/GPUCommon.cpp @@ -493,10 +493,13 @@ void GPUCommon::SlowRunLoop(DisplayList &list) // The newPC parameter is used for jumps, we don't count cycles between. inline void GPUCommon::UpdatePC(u32 currentPC, u32 newPC) { // Rough estimate, 2 CPU ticks (it's double the clock rate) per GPU instruction. - cyclesExecuted += 2 * (currentPC - cycleLastPC) / 4; - gpuStats.otherGPUCycles += 2 * (currentPC - cycleLastPC) / 4; + int executed = 2 * (currentPC - cycleLastPC) / 4; + cyclesExecuted += executed; + gpuStats.otherGPUCycles += executed; cycleLastPC = newPC == 0 ? currentPC : newPC; + gpuStats.gpuCommandsAtCallLevel[std::min(currentList->stackptr, 3)] += executed; + // Exit the runloop and recalculate things. This isn't common. downcount = 0; } diff --git a/GPU/GPUState.h b/GPU/GPUState.h index 3721fec64..f63424d31 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -432,6 +432,7 @@ struct GPUStatistics { msProcessingDisplayLists = 0; vertexGPUCycles = 0; otherGPUCycles = 0; + memset(gpuCommandsAtCallLevel, 0, sizeof(gpuCommandsAtCallLevel)); } // Per frame statistics @@ -449,6 +450,7 @@ struct GPUStatistics { double msProcessingDisplayLists; int vertexGPUCycles; int otherGPUCycles; + int gpuCommandsAtCallLevel[4]; // Total statistics, updated by the GPU core in UpdateStats int numVBlanks;