diff --git a/GPU/GLES/TransformPipeline.cpp b/GPU/GLES/TransformPipeline.cpp index d1f70a69f7..c88502fed4 100644 --- a/GPU/GLES/TransformPipeline.cpp +++ b/GPU/GLES/TransformPipeline.cpp @@ -119,7 +119,8 @@ TransformDrawEngine::TransformDrawEngine() framebufferManager_(0), numDrawCalls(0), vertexCountInDrawCalls(0), - uvScale(0) { + uvScale(0), + decodeCounter_(0) { decimationCounter_ = VERTEXCACHE_DECIMATION_INTERVAL; // Allocate nicely aligned memory. Maybe graphics drivers will // appreciate it. @@ -332,70 +333,11 @@ void TransformDrawEngine::DecodeVerts() { UVScale origUV; if (uvScale) origUV = gstate_c.uv; - for (int i = 0; i < numDrawCalls; i++) { - const DeferredDrawCall &dc = drawCalls[i]; - - indexGen.SetIndex(collectedVerts); - int indexLowerBound = dc.indexLowerBound, indexUpperBound = dc.indexUpperBound; - - u32 indexType = dc.indexType; - void *inds = dc.inds; - if (indexType == GE_VTYPE_IDX_NONE >> GE_VTYPE_IDX_SHIFT) { - // Decode the verts and apply morphing. Simple. - if (uvScale) - gstate_c.uv = uvScale[i]; - dec_->DecodeVerts(decoded + collectedVerts * (int)dec_->GetDecVtxFmt().stride, - dc.verts, indexLowerBound, indexUpperBound); - collectedVerts += indexUpperBound - indexLowerBound + 1; - indexGen.AddPrim(dc.prim, dc.vertexCount); - } else { - // It's fairly common that games issue long sequences of PRIM calls, with differing - // inds pointer but the same base vertex pointer. We'd like to reuse vertices between - // these as much as possible, so we make sure here to combine as many as possible - // into one nice big drawcall, sharing data. - - // 1. Look ahead to find the max index, only looking as "matching" drawcalls. - // Expand the lower and upper bounds as we go. - int j = i + 1; - int lastMatch = i; - while (j < numDrawCalls) { - if (drawCalls[j].verts != dc.verts) - break; - if (uvScale && memcmp(&uvScale[j], &uvScale[i], sizeof(uvScale[0])) != 0) - break; - - indexLowerBound = std::min(indexLowerBound, (int)drawCalls[j].indexLowerBound); - indexUpperBound = std::max(indexUpperBound, (int)drawCalls[j].indexUpperBound); - lastMatch = j; - j++; - } - - // 2. Loop through the drawcalls, translating indices as we go. - for (j = i; j <= lastMatch; j++) { - switch (indexType) { - case GE_VTYPE_IDX_8BIT >> GE_VTYPE_IDX_SHIFT: - indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u8 *)drawCalls[j].inds, indexLowerBound); - break; - case GE_VTYPE_IDX_16BIT >> GE_VTYPE_IDX_SHIFT: - indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u16 *)drawCalls[j].inds, indexLowerBound); - break; - } - } - - int vertexCount = indexUpperBound - indexLowerBound + 1; - // 3. Decode that range of vertex data. - if (uvScale) - gstate_c.uv = uvScale[i]; - dec_->DecodeVerts(decoded + collectedVerts * (int)dec_->GetDecVtxFmt().stride, - dc.verts, indexLowerBound, indexUpperBound); - collectedVerts += vertexCount; - - // 4. Advance indexgen vertex counter. - indexGen.Advance(vertexCount); - i = lastMatch; - } + for (; decodeCounter_ < numDrawCalls; decodeCounter_++) { + if (uvScale) + gstate_c.uv = uvScale[decodeCounter_]; + DecodeVertsStep(); } - // Sanity check if (indexGen.Prim() < 0) { ERROR_LOG_REPORT(G3D, "DecodeVerts: Failed to deduce prim: %i", indexGen.Prim()); @@ -406,6 +348,68 @@ void TransformDrawEngine::DecodeVerts() { gstate_c.uv = origUV; } +void TransformDrawEngine::DecodeVertsStep() { + const int i = decodeCounter_; + + const DeferredDrawCall &dc = drawCalls[i]; + + indexGen.SetIndex(collectedVerts); + int indexLowerBound = dc.indexLowerBound, indexUpperBound = dc.indexUpperBound; + + u32 indexType = dc.indexType; + void *inds = dc.inds; + if (indexType == GE_VTYPE_IDX_NONE >> GE_VTYPE_IDX_SHIFT) { + // Decode the verts and apply morphing. Simple. + dec_->DecodeVerts(decoded + collectedVerts * (int)dec_->GetDecVtxFmt().stride, + dc.verts, indexLowerBound, indexUpperBound); + collectedVerts += indexUpperBound - indexLowerBound + 1; + indexGen.AddPrim(dc.prim, dc.vertexCount); + } else { + // It's fairly common that games issue long sequences of PRIM calls, with differing + // inds pointer but the same base vertex pointer. We'd like to reuse vertices between + // these as much as possible, so we make sure here to combine as many as possible + // into one nice big drawcall, sharing data. + + // 1. Look ahead to find the max index, only looking as "matching" drawcalls. + // Expand the lower and upper bounds as we go. + int j = i + 1; + int lastMatch = i; + while (j < numDrawCalls) { + if (drawCalls[j].verts != dc.verts) + break; + if (uvScale && memcmp(&uvScale[j], &uvScale[i], sizeof(uvScale[0])) != 0) + break; + + indexLowerBound = std::min(indexLowerBound, (int)drawCalls[j].indexLowerBound); + indexUpperBound = std::max(indexUpperBound, (int)drawCalls[j].indexUpperBound); + lastMatch = j; + j++; + } + + // 2. Loop through the drawcalls, translating indices as we go. + for (j = i; j <= lastMatch; j++) { + switch (indexType) { + case GE_VTYPE_IDX_8BIT >> GE_VTYPE_IDX_SHIFT: + indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u8 *)drawCalls[j].inds, indexLowerBound); + break; + case GE_VTYPE_IDX_16BIT >> GE_VTYPE_IDX_SHIFT: + indexGen.TranslatePrim(drawCalls[j].prim, drawCalls[j].vertexCount, (const u16 *)drawCalls[j].inds, indexLowerBound); + break; + } + } + + int vertexCount = indexUpperBound - indexLowerBound + 1; + // 3. Decode that range of vertex data. + dec_->DecodeVerts(decoded + collectedVerts * (int)dec_->GetDecVtxFmt().stride, + dc.verts, indexLowerBound, indexUpperBound); + collectedVerts += vertexCount; + + // 4. Advance indexgen vertex counter. + indexGen.Advance(vertexCount); + decodeCounter_ = lastMatch; + } +} + u32 TransformDrawEngine::ComputeHash() { u32 fullhash = 0; int vertexSize = dec_->GetDecVtxFmt().stride; @@ -720,6 +724,7 @@ rotateVBO: collectedVerts = 0; numDrawCalls = 0; vertexCountInDrawCalls = 0; + decodeCounter_ = 0; prevPrim_ = GE_PRIM_INVALID; #ifndef USING_GLES2 diff --git a/GPU/GLES/TransformPipeline.h b/GPU/GLES/TransformPipeline.h index 543a8a2f8c..273cf66982 100644 --- a/GPU/GLES/TransformPipeline.h +++ b/GPU/GLES/TransformPipeline.h @@ -97,7 +97,6 @@ public: void SubmitBezier(void* control_points, void* indices, int count_u, int count_v, GEPatchPrimType prim_type, u32 vertType); bool TestBoundingBox(void* control_points, int vertexCount, u32 vertType); - void DecodeVerts(); void SetShaderManager(ShaderManager *shaderManager) { shaderManager_ = shaderManager; } @@ -127,6 +126,8 @@ public: } private: + void DecodeVerts(); + void DecodeVertsStep(); void DoFlush(); void SoftwareTransformAndDraw(int prim, u8 *decoded, LinkedShader *program, int vertexCount, u32 vertexType, void *inds, int indexType, const DecVtxFormat &decVtxFormat, int maxIndex); void ApplyDrawState(int prim); @@ -195,6 +196,7 @@ private: int vertexCountInDrawCalls; int decimationCounter_; + int decodeCounter_; UVScale *uvScale; };