Track number of GPU commands executed per call level.

This is to get an idea if it would be beneficial to compile and cache display lists. Looks like it would be for some games, for example in GTA it's often 20k/80k/20k commands per frame - that is 100k executed commands per frame that could potentially just be executed instead of interpreted. Likewise in Wipeout. Of course, this means yet another cache with invalidation issues etc..
This commit is contained in:
Henrik Rydgard 2013-08-23 11:26:13 +02:00
parent 89e476d9f2
commit dfb91d4532
3 changed files with 9 additions and 2 deletions

View File

@ -274,6 +274,7 @@ void __DisplayGetDebugStats(char stats[2048]) {
"Cached Draw calls: %i\n"
"Num Tracked Vertex Arrays: %i\n"
"Cycles executed: %d (%f per vertex)\n"
"Commands per call level: %i %i %i %i\n"
"Vertices Submitted: %i\n"
"Cached Vertices Drawn: %i\n"
"Uncached Vertices Drawn: %i\n"
@ -296,6 +297,7 @@ void __DisplayGetDebugStats(char stats[2048]) {
gpuStats.numTrackedVertexArrays,
gpuStats.vertexGPUCycles + gpuStats.otherGPUCycles,
vertexAverageCycles,
gpuStats.gpuCommandsAtCallLevel[0],gpuStats.gpuCommandsAtCallLevel[1],gpuStats.gpuCommandsAtCallLevel[2],gpuStats.gpuCommandsAtCallLevel[3],
gpuStats.numVertsSubmitted,
gpuStats.numCachedVertsDrawn,
gpuStats.numUncachedVertsDrawn,

View File

@ -493,10 +493,13 @@ void GPUCommon::SlowRunLoop(DisplayList &list)
// The newPC parameter is used for jumps, we don't count cycles between.
inline void GPUCommon::UpdatePC(u32 currentPC, u32 newPC) {
// Rough estimate, 2 CPU ticks (it's double the clock rate) per GPU instruction.
cyclesExecuted += 2 * (currentPC - cycleLastPC) / 4;
gpuStats.otherGPUCycles += 2 * (currentPC - cycleLastPC) / 4;
int executed = 2 * (currentPC - cycleLastPC) / 4;
cyclesExecuted += executed;
gpuStats.otherGPUCycles += executed;
cycleLastPC = newPC == 0 ? currentPC : newPC;
gpuStats.gpuCommandsAtCallLevel[std::min(currentList->stackptr, 3)] += executed;
// Exit the runloop and recalculate things. This isn't common.
downcount = 0;
}

View File

@ -432,6 +432,7 @@ struct GPUStatistics {
msProcessingDisplayLists = 0;
vertexGPUCycles = 0;
otherGPUCycles = 0;
memset(gpuCommandsAtCallLevel, 0, sizeof(gpuCommandsAtCallLevel));
}
// Per frame statistics
@ -449,6 +450,7 @@ struct GPUStatistics {
double msProcessingDisplayLists;
int vertexGPUCycles;
int otherGPUCycles;
int gpuCommandsAtCallLevel[4];
// Total statistics, updated by the GPU core in UpdateStats
int numVBlanks;