mirror of
https://github.com/hrydgard/ppsspp.git
synced 2024-11-26 23:10:38 +00:00
Cache vertex decoders. No big boost now, but opens for optimizing them more, and lets us keep stats of the most common ones.
This commit is contained in:
parent
15e3a7750f
commit
d96c588288
@ -55,6 +55,7 @@ enum {
|
||||
TransformDrawEngine::TransformDrawEngine()
|
||||
: collectedVerts(0),
|
||||
prevPrim_(-1),
|
||||
dec_(0),
|
||||
lastVType_(-1),
|
||||
curVbo_(0),
|
||||
shaderManager_(0),
|
||||
@ -142,8 +143,9 @@ void TransformDrawEngine::DrawBezier(int ucount, int vcount) {
|
||||
}
|
||||
|
||||
if (!(gstate.vertType & GE_VTYPE_TC_MASK)) {
|
||||
dec.SetVertexType(gstate.vertType);
|
||||
u32 newVertType = dec.InjectUVs(decoded2, Memory::GetPointer(gstate_c.vertexAddr), customUV, 16);
|
||||
VertexDecoder *dec = GetVertexDecoder(gstate.vertType);
|
||||
dec->SetVertexType(gstate.vertType);
|
||||
u32 newVertType = dec->InjectUVs(decoded2, Memory::GetPointer(gstate_c.vertexAddr), customUV, 16);
|
||||
SubmitPrim(decoded2, &indices[0], GE_PRIM_TRIANGLES, c, newVertType, GE_VTYPE_IDX_16BIT, 0);
|
||||
} else {
|
||||
SubmitPrim(Memory::GetPointer(gstate_c.vertexAddr), &indices[0], GE_PRIM_TRIANGLES, c, gstate.vertType, GE_VTYPE_IDX_16BIT, 0);
|
||||
@ -184,8 +186,9 @@ void TransformDrawEngine::DrawSpline(int ucount, int vcount, int utype, int vtyp
|
||||
}
|
||||
|
||||
if (!(gstate.vertType & GE_VTYPE_TC_MASK)) {
|
||||
dec.SetVertexType(gstate.vertType);
|
||||
u32 newVertType = dec.InjectUVs(decoded2, Memory::GetPointer(gstate_c.vertexAddr), customUV, 16);
|
||||
VertexDecoder *dec = GetVertexDecoder(gstate.vertType);
|
||||
dec->SetVertexType(gstate.vertType);
|
||||
u32 newVertType = dec->InjectUVs(decoded2, Memory::GetPointer(gstate_c.vertexAddr), customUV, 16);
|
||||
SubmitPrim(decoded2, &indices[0], GE_PRIM_TRIANGLES, c, newVertType, GE_VTYPE_IDX_16BIT, 0);
|
||||
} else {
|
||||
SubmitPrim(Memory::GetPointer(gstate_c.vertexAddr), &indices[0], GE_PRIM_TRIANGLES, c, gstate.vertType, GE_VTYPE_IDX_16BIT, 0);
|
||||
@ -762,6 +765,16 @@ void TransformDrawEngine::SoftwareTransformAndDraw(
|
||||
}
|
||||
}
|
||||
|
||||
VertexDecoder *TransformDrawEngine::GetVertexDecoder(u32 vtype) {
|
||||
auto iter = decoderMap_.find(vtype);
|
||||
if (iter != decoderMap_.end())
|
||||
return iter->second;
|
||||
VertexDecoder *dec = new VertexDecoder();
|
||||
dec->SetVertexType(vtype);
|
||||
decoderMap_[vtype] = dec;
|
||||
return dec;
|
||||
}
|
||||
|
||||
void TransformDrawEngine::SubmitPrim(void *verts, void *inds, int prim, int vertexCount, u32 vertType, int forceIndexType, int *bytesRead) {
|
||||
if (vertexCount == 0)
|
||||
return; // we ignore zero-sized draw calls.
|
||||
@ -772,16 +785,14 @@ void TransformDrawEngine::SubmitPrim(void *verts, void *inds, int prim, int vert
|
||||
// If vtype has changed, setup the vertex decoder.
|
||||
// TODO: Simply cache the setup decoders instead.
|
||||
if (vertType != lastVType_) {
|
||||
dec.SetVertexType(vertType);
|
||||
dec_ = GetVertexDecoder(vertType);
|
||||
lastVType_ = vertType;
|
||||
}
|
||||
|
||||
if (bytesRead)
|
||||
*bytesRead = vertexCount * dec.VertexSize();
|
||||
dec_->IncrementStat(STAT_VERTSSUBMITTED, vertexCount);
|
||||
|
||||
if (!indexGen.Empty()) {
|
||||
gpuStats.numJoins++;
|
||||
}
|
||||
if (bytesRead)
|
||||
*bytesRead = vertexCount * dec_->VertexSize();
|
||||
|
||||
gpuStats.numDrawCalls++;
|
||||
gpuStats.numVertsSubmitted += vertexCount;
|
||||
@ -812,7 +823,7 @@ void TransformDrawEngine::DecodeVerts() {
|
||||
void *inds = dc.inds;
|
||||
if (indexType == GE_VTYPE_IDX_NONE >> GE_VTYPE_IDX_SHIFT) {
|
||||
// Decode the verts and apply morphing. Simple.
|
||||
dec.DecodeVerts(decoded + collectedVerts * (int)dec.GetDecVtxFmt().stride,
|
||||
dec_->DecodeVerts(decoded + collectedVerts * (int)dec_->GetDecVtxFmt().stride,
|
||||
dc.verts, indexLowerBound, indexUpperBound);
|
||||
collectedVerts += indexUpperBound - indexLowerBound + 1;
|
||||
indexGen.AddPrim(dc.prim, dc.vertexCount);
|
||||
@ -848,7 +859,7 @@ void TransformDrawEngine::DecodeVerts() {
|
||||
}
|
||||
|
||||
// 3. Decode that range of vertex data.
|
||||
dec.DecodeVerts(decoded + collectedVerts * (int)dec.GetDecVtxFmt().stride,
|
||||
dec_->DecodeVerts(decoded + collectedVerts * (int)dec_->GetDecVtxFmt().stride,
|
||||
dc.verts, indexLowerBound, indexUpperBound);
|
||||
collectedVerts += indexUpperBound - indexLowerBound + 1;
|
||||
|
||||
@ -861,7 +872,7 @@ void TransformDrawEngine::DecodeVerts() {
|
||||
|
||||
u32 TransformDrawEngine::ComputeHash() {
|
||||
u32 fullhash = 0;
|
||||
int vertexSize = dec.GetDecVtxFmt().stride;
|
||||
int vertexSize = dec_->GetDecVtxFmt().stride;
|
||||
|
||||
// TODO: Add some caps both for numDrawCalls and num verts to check?
|
||||
for (int i = 0; i < numDrawCalls; i++) {
|
||||
@ -870,7 +881,7 @@ u32 TransformDrawEngine::ComputeHash() {
|
||||
} else {
|
||||
fullhash += CityHash32((const char *)drawCalls[i].verts + vertexSize * drawCalls[i].indexLowerBound,
|
||||
vertexSize * (drawCalls[i].indexUpperBound - drawCalls[i].indexLowerBound));
|
||||
int indexSize = (dec.VertexType() & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_16BIT ? 2 : 1;
|
||||
int indexSize = (dec_->VertexType() & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_16BIT ? 2 : 1;
|
||||
fullhash += CityHash32((const char *)drawCalls[i].inds, indexSize * drawCalls[i].vertexCount);
|
||||
}
|
||||
}
|
||||
@ -913,6 +924,17 @@ void TransformDrawEngine::DecimateTrackedVertexArrays() {
|
||||
else
|
||||
++iter;
|
||||
}
|
||||
|
||||
// Enable if you want to see vertex decoders in the log output. Need a better way.
|
||||
#if 0
|
||||
char buffer[16384];
|
||||
for (std::map<u32, VertexDecoder*>::iterator dec = decoderMap_.begin(); dec != decoderMap_.end(); ++dec) {
|
||||
char *ptr = buffer;
|
||||
ptr += dec->second->ToString(ptr);
|
||||
// *ptr++ = '\n';
|
||||
NOTICE_LOG(HLE, buffer);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
VertexArrayInfo::~VertexArrayInfo() {
|
||||
@ -952,7 +974,7 @@ void TransformDrawEngine::Flush() {
|
||||
vai = iter->second;
|
||||
} else {
|
||||
vai = new VertexArrayInfo();
|
||||
vai->decFmt = dec.GetDecVtxFmt();
|
||||
vai->decFmt = dec_->GetDecVtxFmt();
|
||||
vai_[id] = vai;
|
||||
}
|
||||
|
||||
@ -1015,7 +1037,7 @@ void TransformDrawEngine::Flush() {
|
||||
|
||||
glGenBuffers(1, &vai->vbo);
|
||||
glBindBuffer(GL_ARRAY_BUFFER, vai->vbo);
|
||||
glBufferData(GL_ARRAY_BUFFER, dec.GetDecVtxFmt().stride * indexGen.MaxIndex(), decoded, GL_STATIC_DRAW);
|
||||
glBufferData(GL_ARRAY_BUFFER, dec_->GetDecVtxFmt().stride * indexGen.MaxIndex(), decoded, GL_STATIC_DRAW);
|
||||
// If there's only been one primitive type, and it's either TRIANGLES, LINES or POINTS,
|
||||
// there is no need for the index buffer we built. We can then use glDrawArrays instead
|
||||
// for a very minor speed boost.
|
||||
@ -1087,7 +1109,7 @@ rotateVBO:
|
||||
if (curVbo_ == NUM_VBOS)
|
||||
curVbo_ = 0;
|
||||
glBindBuffer(GL_ARRAY_BUFFER, vbo);
|
||||
glBufferData(GL_ARRAY_BUFFER, dec.GetDecVtxFmt().stride * indexGen.MaxIndex(), decoded, GL_STREAM_DRAW);
|
||||
glBufferData(GL_ARRAY_BUFFER, dec_->GetDecVtxFmt().stride * indexGen.MaxIndex(), decoded, GL_STREAM_DRAW);
|
||||
if (useElements) {
|
||||
glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, ebo);
|
||||
glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(short) * vertexCount, (GLvoid *)decIndex, GL_STREAM_DRAW);
|
||||
@ -1101,7 +1123,7 @@ rotateVBO:
|
||||
|
||||
DEBUG_LOG(G3D, "Flush prim %i! %i verts in one go", prim, vertexCount);
|
||||
|
||||
SetupDecFmtForDraw(program, dec.GetDecVtxFmt(), vbo ? 0 : decoded);
|
||||
SetupDecFmtForDraw(program, dec_->GetDecVtxFmt(), vbo ? 0 : decoded);
|
||||
if (useElements) {
|
||||
glDrawElements(glprim[prim], vertexCount, GL_UNSIGNED_SHORT, ebo ? 0 : (GLvoid*)decIndex);
|
||||
if (ebo)
|
||||
@ -1117,7 +1139,9 @@ rotateVBO:
|
||||
prim = indexGen.Prim();
|
||||
DEBUG_LOG(G3D, "Flush prim %i SW! %i verts in one go", prim, indexGen.VertexCount());
|
||||
|
||||
SoftwareTransformAndDraw(prim, decoded, program, indexGen.VertexCount(), dec.VertexType(), (void *)decIndex, GE_VTYPE_IDX_16BIT, dec.GetDecVtxFmt(),
|
||||
SoftwareTransformAndDraw(
|
||||
prim, decoded, program, indexGen.VertexCount(),
|
||||
dec_->VertexType(), (void *)decIndex, GE_VTYPE_IDX_16BIT, dec_->GetDecVtxFmt(),
|
||||
indexGen.MaxIndex());
|
||||
}
|
||||
|
||||
|
@ -17,6 +17,8 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <map>
|
||||
|
||||
#include "IndexGenerator.h"
|
||||
#include "VertexDecoder.h"
|
||||
#include "gfx/gl_lost_manager.h"
|
||||
@ -125,6 +127,8 @@ private:
|
||||
u32 ComputeFastDCID();
|
||||
u32 ComputeHash(); // Reads deferred vertex data.
|
||||
|
||||
VertexDecoder *GetVertexDecoder(u32 vtype);
|
||||
|
||||
// Defer all vertex decoding to a Flush, so that we can hash and cache the
|
||||
// generated buffers without having to redecode them every time.
|
||||
struct DeferredDrawCall {
|
||||
@ -143,9 +147,12 @@ private:
|
||||
int collectedVerts;
|
||||
int prevPrim_;
|
||||
|
||||
// Vertex collector buffers
|
||||
VertexDecoder dec;
|
||||
// Cached vertex decoders
|
||||
std::map<u32, VertexDecoder *> decoderMap_;
|
||||
VertexDecoder *dec_;
|
||||
u32 lastVType_;
|
||||
|
||||
// Vertex collector buffers
|
||||
u8 *decoded;
|
||||
u16 *decIndex;
|
||||
|
||||
|
@ -750,3 +750,26 @@ u32 VertexDecoder::InjectUVs(u8 *decoded, const void *verts, float *customuv, in
|
||||
}
|
||||
return customVertType;
|
||||
}
|
||||
|
||||
int VertexDecoder::ToString(char *output) const {
|
||||
char * start = output;
|
||||
output += sprintf(output, "P: %i ", pos);
|
||||
if (nrm)
|
||||
output += sprintf(output, "N: %i ", nrm);
|
||||
if (col)
|
||||
output += sprintf(output, "C: %i ", col);
|
||||
if (tc)
|
||||
output += sprintf(output, "T: %i ", tc);
|
||||
if (weighttype)
|
||||
output += sprintf(output, "W: %i ", weighttype);
|
||||
if (idx)
|
||||
output += sprintf(output, "I: %i ", idx);
|
||||
if (morphcount > 1)
|
||||
output += sprintf(output, "Morph: %i ", morphcount);
|
||||
output += sprintf(output, "Verts: %i ", stats_[STAT_VERTSSUBMITTED]);
|
||||
if (throughmode)
|
||||
output += sprintf(output, " (through)");
|
||||
|
||||
output += sprintf(output, " (size: %i)", VertexSize());
|
||||
return output - start;
|
||||
}
|
@ -76,6 +76,11 @@ typedef void (VertexDecoder::*StepFunction)() const;
|
||||
|
||||
void GetIndexBounds(void *inds, int count, u32 vertType, u16 *indexLowerBound, u16 *indexUpperBound);
|
||||
|
||||
enum {
|
||||
STAT_VERTSSUBMITTED = 0,
|
||||
NUM_VERTEX_DECODER_STATS = 1
|
||||
};
|
||||
|
||||
// Right now
|
||||
// - only contains computed information
|
||||
// - does decoding in nasty branchfilled loops
|
||||
@ -147,6 +152,18 @@ public:
|
||||
void Step_PosS16Through() const;
|
||||
void Step_PosFloatThrough() const;
|
||||
|
||||
void ResetStats() {
|
||||
memset(stats_, 0, sizeof(stats_));
|
||||
}
|
||||
|
||||
void IncrementStat(int stat, int amount) {
|
||||
stats_[stat] += amount;
|
||||
}
|
||||
|
||||
// output must be big for safety.
|
||||
// Returns number of chars written.
|
||||
// Ugly for speed.
|
||||
int ToString(char *output) const;
|
||||
|
||||
// Mutable decoder state
|
||||
mutable u8 *decoded_;
|
||||
@ -180,6 +197,8 @@ public:
|
||||
int idx;
|
||||
int morphcount;
|
||||
int nweights;
|
||||
|
||||
int stats_[NUM_VERTEX_DECODER_STATS];
|
||||
};
|
||||
|
||||
// Reads decoded vertex formats in a convenient way. For software transform and debugging.
|
||||
|
@ -282,7 +282,6 @@ struct GPUStatistics
|
||||
memset(this, 0, sizeof(*this));
|
||||
}
|
||||
void resetFrame() {
|
||||
numJoins = 0;
|
||||
numDrawCalls = 0;
|
||||
numCachedDrawCalls = 0;
|
||||
numVertsSubmitted = 0;
|
||||
@ -298,7 +297,6 @@ struct GPUStatistics
|
||||
}
|
||||
|
||||
// Per frame statistics
|
||||
int numJoins;
|
||||
int numDrawCalls;
|
||||
int numCachedDrawCalls;
|
||||
int numFlushes;
|
||||
|
@ -52,7 +52,10 @@ void GeDisassembleOp(u32 pc, u32 op, u32 prev, char *buffer) {
|
||||
"TRIANGLE_FAN",
|
||||
"RECTANGLES",
|
||||
};
|
||||
sprintf(buffer, "DrawPrim type: %s count: %i vaddr= %08x, iaddr= %08x", type < 7 ? types[type] : "INVALID", count, gstate_c.vertexAddr, gstate_c.indexAddr);
|
||||
if (gstate.vertType & GE_VTYPE_IDX_MASK)
|
||||
sprintf(buffer, "DrawPrim indexed type: %s count: %i vaddr= %08x, iaddr= %08x", type < 7 ? types[type] : "INVALID", count, gstate_c.vertexAddr, gstate_c.indexAddr);
|
||||
else
|
||||
sprintf(buffer, "DrawPrim type: %s count: %i vaddr= %08x", type < 7 ? types[type] : "INVALID", count, gstate_c.vertexAddr);
|
||||
}
|
||||
break;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user