Cache vertex decoders. No big boost now, but opens for optimizing them more, and lets us keep stats of the most common ones.

This commit is contained in:
Henrik Rydgard 2013-03-24 12:28:42 +01:00
parent 15e3a7750f
commit d96c588288
6 changed files with 98 additions and 24 deletions

View File

@ -55,6 +55,7 @@ enum {
TransformDrawEngine::TransformDrawEngine()
: collectedVerts(0),
prevPrim_(-1),
dec_(0),
lastVType_(-1),
curVbo_(0),
shaderManager_(0),
@ -142,8 +143,9 @@ void TransformDrawEngine::DrawBezier(int ucount, int vcount) {
}
if (!(gstate.vertType & GE_VTYPE_TC_MASK)) {
dec.SetVertexType(gstate.vertType);
u32 newVertType = dec.InjectUVs(decoded2, Memory::GetPointer(gstate_c.vertexAddr), customUV, 16);
VertexDecoder *dec = GetVertexDecoder(gstate.vertType);
dec->SetVertexType(gstate.vertType);
u32 newVertType = dec->InjectUVs(decoded2, Memory::GetPointer(gstate_c.vertexAddr), customUV, 16);
SubmitPrim(decoded2, &indices[0], GE_PRIM_TRIANGLES, c, newVertType, GE_VTYPE_IDX_16BIT, 0);
} else {
SubmitPrim(Memory::GetPointer(gstate_c.vertexAddr), &indices[0], GE_PRIM_TRIANGLES, c, gstate.vertType, GE_VTYPE_IDX_16BIT, 0);
@ -184,8 +186,9 @@ void TransformDrawEngine::DrawSpline(int ucount, int vcount, int utype, int vtyp
}
if (!(gstate.vertType & GE_VTYPE_TC_MASK)) {
dec.SetVertexType(gstate.vertType);
u32 newVertType = dec.InjectUVs(decoded2, Memory::GetPointer(gstate_c.vertexAddr), customUV, 16);
VertexDecoder *dec = GetVertexDecoder(gstate.vertType);
dec->SetVertexType(gstate.vertType);
u32 newVertType = dec->InjectUVs(decoded2, Memory::GetPointer(gstate_c.vertexAddr), customUV, 16);
SubmitPrim(decoded2, &indices[0], GE_PRIM_TRIANGLES, c, newVertType, GE_VTYPE_IDX_16BIT, 0);
} else {
SubmitPrim(Memory::GetPointer(gstate_c.vertexAddr), &indices[0], GE_PRIM_TRIANGLES, c, gstate.vertType, GE_VTYPE_IDX_16BIT, 0);
@ -762,6 +765,16 @@ void TransformDrawEngine::SoftwareTransformAndDraw(
}
}
VertexDecoder *TransformDrawEngine::GetVertexDecoder(u32 vtype) {
auto iter = decoderMap_.find(vtype);
if (iter != decoderMap_.end())
return iter->second;
VertexDecoder *dec = new VertexDecoder();
dec->SetVertexType(vtype);
decoderMap_[vtype] = dec;
return dec;
}
void TransformDrawEngine::SubmitPrim(void *verts, void *inds, int prim, int vertexCount, u32 vertType, int forceIndexType, int *bytesRead) {
if (vertexCount == 0)
return; // we ignore zero-sized draw calls.
@ -772,16 +785,14 @@ void TransformDrawEngine::SubmitPrim(void *verts, void *inds, int prim, int vert
// If vtype has changed, setup the vertex decoder.
// TODO: Simply cache the setup decoders instead.
if (vertType != lastVType_) {
dec.SetVertexType(vertType);
dec_ = GetVertexDecoder(vertType);
lastVType_ = vertType;
}
if (bytesRead)
*bytesRead = vertexCount * dec.VertexSize();
dec_->IncrementStat(STAT_VERTSSUBMITTED, vertexCount);
if (!indexGen.Empty()) {
gpuStats.numJoins++;
}
if (bytesRead)
*bytesRead = vertexCount * dec_->VertexSize();
gpuStats.numDrawCalls++;
gpuStats.numVertsSubmitted += vertexCount;
@ -812,7 +823,7 @@ void TransformDrawEngine::DecodeVerts() {
void *inds = dc.inds;
if (indexType == GE_VTYPE_IDX_NONE >> GE_VTYPE_IDX_SHIFT) {
// Decode the verts and apply morphing. Simple.
dec.DecodeVerts(decoded + collectedVerts * (int)dec.GetDecVtxFmt().stride,
dec_->DecodeVerts(decoded + collectedVerts * (int)dec_->GetDecVtxFmt().stride,
dc.verts, indexLowerBound, indexUpperBound);
collectedVerts += indexUpperBound - indexLowerBound + 1;
indexGen.AddPrim(dc.prim, dc.vertexCount);
@ -848,7 +859,7 @@ void TransformDrawEngine::DecodeVerts() {
}
// 3. Decode that range of vertex data.
dec.DecodeVerts(decoded + collectedVerts * (int)dec.GetDecVtxFmt().stride,
dec_->DecodeVerts(decoded + collectedVerts * (int)dec_->GetDecVtxFmt().stride,
dc.verts, indexLowerBound, indexUpperBound);
collectedVerts += indexUpperBound - indexLowerBound + 1;
@ -861,7 +872,7 @@ void TransformDrawEngine::DecodeVerts() {
u32 TransformDrawEngine::ComputeHash() {
u32 fullhash = 0;
int vertexSize = dec.GetDecVtxFmt().stride;
int vertexSize = dec_->GetDecVtxFmt().stride;
// TODO: Add some caps both for numDrawCalls and num verts to check?
for (int i = 0; i < numDrawCalls; i++) {
@ -870,7 +881,7 @@ u32 TransformDrawEngine::ComputeHash() {
} else {
fullhash += CityHash32((const char *)drawCalls[i].verts + vertexSize * drawCalls[i].indexLowerBound,
vertexSize * (drawCalls[i].indexUpperBound - drawCalls[i].indexLowerBound));
int indexSize = (dec.VertexType() & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_16BIT ? 2 : 1;
int indexSize = (dec_->VertexType() & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_16BIT ? 2 : 1;
fullhash += CityHash32((const char *)drawCalls[i].inds, indexSize * drawCalls[i].vertexCount);
}
}
@ -913,6 +924,17 @@ void TransformDrawEngine::DecimateTrackedVertexArrays() {
else
++iter;
}
// Enable if you want to see vertex decoders in the log output. Need a better way.
#if 0
char buffer[16384];
for (std::map<u32, VertexDecoder*>::iterator dec = decoderMap_.begin(); dec != decoderMap_.end(); ++dec) {
char *ptr = buffer;
ptr += dec->second->ToString(ptr);
// *ptr++ = '\n';
NOTICE_LOG(HLE, buffer);
}
#endif
}
VertexArrayInfo::~VertexArrayInfo() {
@ -952,7 +974,7 @@ void TransformDrawEngine::Flush() {
vai = iter->second;
} else {
vai = new VertexArrayInfo();
vai->decFmt = dec.GetDecVtxFmt();
vai->decFmt = dec_->GetDecVtxFmt();
vai_[id] = vai;
}
@ -1015,7 +1037,7 @@ void TransformDrawEngine::Flush() {
glGenBuffers(1, &vai->vbo);
glBindBuffer(GL_ARRAY_BUFFER, vai->vbo);
glBufferData(GL_ARRAY_BUFFER, dec.GetDecVtxFmt().stride * indexGen.MaxIndex(), decoded, GL_STATIC_DRAW);
glBufferData(GL_ARRAY_BUFFER, dec_->GetDecVtxFmt().stride * indexGen.MaxIndex(), decoded, GL_STATIC_DRAW);
// If there's only been one primitive type, and it's either TRIANGLES, LINES or POINTS,
// there is no need for the index buffer we built. We can then use glDrawArrays instead
// for a very minor speed boost.
@ -1087,7 +1109,7 @@ rotateVBO:
if (curVbo_ == NUM_VBOS)
curVbo_ = 0;
glBindBuffer(GL_ARRAY_BUFFER, vbo);
glBufferData(GL_ARRAY_BUFFER, dec.GetDecVtxFmt().stride * indexGen.MaxIndex(), decoded, GL_STREAM_DRAW);
glBufferData(GL_ARRAY_BUFFER, dec_->GetDecVtxFmt().stride * indexGen.MaxIndex(), decoded, GL_STREAM_DRAW);
if (useElements) {
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ebo);
glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(short) * vertexCount, (GLvoid *)decIndex, GL_STREAM_DRAW);
@ -1101,7 +1123,7 @@ rotateVBO:
DEBUG_LOG(G3D, "Flush prim %i! %i verts in one go", prim, vertexCount);
SetupDecFmtForDraw(program, dec.GetDecVtxFmt(), vbo ? 0 : decoded);
SetupDecFmtForDraw(program, dec_->GetDecVtxFmt(), vbo ? 0 : decoded);
if (useElements) {
glDrawElements(glprim[prim], vertexCount, GL_UNSIGNED_SHORT, ebo ? 0 : (GLvoid*)decIndex);
if (ebo)
@ -1117,7 +1139,9 @@ rotateVBO:
prim = indexGen.Prim();
DEBUG_LOG(G3D, "Flush prim %i SW! %i verts in one go", prim, indexGen.VertexCount());
SoftwareTransformAndDraw(prim, decoded, program, indexGen.VertexCount(), dec.VertexType(), (void *)decIndex, GE_VTYPE_IDX_16BIT, dec.GetDecVtxFmt(),
SoftwareTransformAndDraw(
prim, decoded, program, indexGen.VertexCount(),
dec_->VertexType(), (void *)decIndex, GE_VTYPE_IDX_16BIT, dec_->GetDecVtxFmt(),
indexGen.MaxIndex());
}

View File

@ -17,6 +17,8 @@
#pragma once
#include <map>
#include "IndexGenerator.h"
#include "VertexDecoder.h"
#include "gfx/gl_lost_manager.h"
@ -125,6 +127,8 @@ private:
u32 ComputeFastDCID();
u32 ComputeHash(); // Reads deferred vertex data.
VertexDecoder *GetVertexDecoder(u32 vtype);
// Defer all vertex decoding to a Flush, so that we can hash and cache the
// generated buffers without having to redecode them every time.
struct DeferredDrawCall {
@ -143,9 +147,12 @@ private:
int collectedVerts;
int prevPrim_;
// Vertex collector buffers
VertexDecoder dec;
// Cached vertex decoders
std::map<u32, VertexDecoder *> decoderMap_;
VertexDecoder *dec_;
u32 lastVType_;
// Vertex collector buffers
u8 *decoded;
u16 *decIndex;

View File

@ -750,3 +750,26 @@ u32 VertexDecoder::InjectUVs(u8 *decoded, const void *verts, float *customuv, in
}
return customVertType;
}
int VertexDecoder::ToString(char *output) const {
char * start = output;
output += sprintf(output, "P: %i ", pos);
if (nrm)
output += sprintf(output, "N: %i ", nrm);
if (col)
output += sprintf(output, "C: %i ", col);
if (tc)
output += sprintf(output, "T: %i ", tc);
if (weighttype)
output += sprintf(output, "W: %i ", weighttype);
if (idx)
output += sprintf(output, "I: %i ", idx);
if (morphcount > 1)
output += sprintf(output, "Morph: %i ", morphcount);
output += sprintf(output, "Verts: %i ", stats_[STAT_VERTSSUBMITTED]);
if (throughmode)
output += sprintf(output, " (through)");
output += sprintf(output, " (size: %i)", VertexSize());
return output - start;
}

View File

@ -76,6 +76,11 @@ typedef void (VertexDecoder::*StepFunction)() const;
void GetIndexBounds(void *inds, int count, u32 vertType, u16 *indexLowerBound, u16 *indexUpperBound);
enum {
STAT_VERTSSUBMITTED = 0,
NUM_VERTEX_DECODER_STATS = 1
};
// Right now
// - only contains computed information
// - does decoding in nasty branchfilled loops
@ -147,6 +152,18 @@ public:
void Step_PosS16Through() const;
void Step_PosFloatThrough() const;
void ResetStats() {
memset(stats_, 0, sizeof(stats_));
}
void IncrementStat(int stat, int amount) {
stats_[stat] += amount;
}
// output must be big for safety.
// Returns number of chars written.
// Ugly for speed.
int ToString(char *output) const;
// Mutable decoder state
mutable u8 *decoded_;
@ -180,6 +197,8 @@ public:
int idx;
int morphcount;
int nweights;
int stats_[NUM_VERTEX_DECODER_STATS];
};
// Reads decoded vertex formats in a convenient way. For software transform and debugging.

View File

@ -282,7 +282,6 @@ struct GPUStatistics
memset(this, 0, sizeof(*this));
}
void resetFrame() {
numJoins = 0;
numDrawCalls = 0;
numCachedDrawCalls = 0;
numVertsSubmitted = 0;
@ -298,7 +297,6 @@ struct GPUStatistics
}
// Per frame statistics
int numJoins;
int numDrawCalls;
int numCachedDrawCalls;
int numFlushes;

View File

@ -52,7 +52,10 @@ void GeDisassembleOp(u32 pc, u32 op, u32 prev, char *buffer) {
"TRIANGLE_FAN",
"RECTANGLES",
};
sprintf(buffer, "DrawPrim type: %s count: %i vaddr= %08x, iaddr= %08x", type < 7 ? types[type] : "INVALID", count, gstate_c.vertexAddr, gstate_c.indexAddr);
if (gstate.vertType & GE_VTYPE_IDX_MASK)
sprintf(buffer, "DrawPrim indexed type: %s count: %i vaddr= %08x, iaddr= %08x", type < 7 ? types[type] : "INVALID", count, gstate_c.vertexAddr, gstate_c.indexAddr);
else
sprintf(buffer, "DrawPrim type: %s count: %i vaddr= %08x", type < 7 ? types[type] : "INVALID", count, gstate_c.vertexAddr);
}
break;