From c051e9ec2cd5a2890d1b4697586e83b4d600c799 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 24 May 2020 14:05:54 -0700 Subject: [PATCH 1/2] GLES: Fix disabled vertex cache code. --- GPU/GLES/DrawEngineGLES.cpp | 25 +++++++++++++++---------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/GPU/GLES/DrawEngineGLES.cpp b/GPU/GLES/DrawEngineGLES.cpp index 7d80f20fd4..df8b106f19 100644 --- a/GPU/GLES/DrawEngineGLES.cpp +++ b/GPU/GLES/DrawEngineGLES.cpp @@ -374,6 +374,9 @@ void DrawEngineGLES::DoFlush() { vai->maxIndex = indexGen.MaxIndex(); vai->flags = gstate_c.vertexFullAlpha ? VAI_FLAG_VERTEXFULLALPHA : 0; + size_t size = decodedVerts_ * dec_->GetDecVtxFmt().stride; + u8 *dest = (u8 *)frameData.pushVertex->Push(size, &vertexBufferOffset, &vertexBuffer); + memcpy(dest, decoded, size); goto rotateVBO; } @@ -394,8 +397,7 @@ void DrawEngineGLES::DoFlush() { } if (newMiniHash != vai->minihash || newHash != vai->hash) { MarkUnreliable(vai); - DecodeVerts(decoded); - goto rotateVBO; + goto skipVBO; } if (vai->numVerts > 64) { // exponential backoff up to 16 draws, then every 32 @@ -413,12 +415,11 @@ void DrawEngineGLES::DoFlush() { u32 newMiniHash = ComputeMiniHash(); if (newMiniHash != vai->minihash) { MarkUnreliable(vai); - DecodeVerts(decoded); - goto rotateVBO; + goto skipVBO; } } - if (vai->vbo == 0) { + if (vai->vbo == nullptr) { DecodeVerts(decoded); vai->numVerts = indexGen.VertexCount(); vai->prim = indexGen.Prim(); @@ -433,14 +434,18 @@ void DrawEngineGLES::DoFlush() { size_t vsz = dec_->GetDecVtxFmt().stride * indexGen.MaxIndex(); vai->vbo = render_->CreateBuffer(GL_ARRAY_BUFFER, vsz, GL_STATIC_DRAW); - render_->BufferSubdata(vai->vbo, 0, vsz, decoded); + uint8_t *vertexData = new uint8_t[vsz]; + memcpy(vertexData, decoded, vsz); + render_->BufferSubdata(vai->vbo, 0, vsz, vertexData); // If there's only been one primitive type, and it's either TRIANGLES, LINES or POINTS, // there is no need for the index buffer we built. We can then use glDrawArrays instead // for a very minor speed boost. if (useElements) { size_t esz = sizeof(short) * indexGen.VertexCount(); vai->ebo = render_->CreateBuffer(GL_ELEMENT_ARRAY_BUFFER, esz, GL_STATIC_DRAW); - render_->BufferSubdata(vai->ebo, 0, esz, (uint8_t *)decIndex, false); + uint8_t *indexData = new uint8_t[esz]; + memcpy(indexData, decIndex, esz); + render_->BufferSubdata(vai->ebo, 0, esz, indexData); } else { vai->ebo = 0; render_->BindIndexBuffer(vai->ebo); @@ -482,13 +487,13 @@ void DrawEngineGLES::DoFlush() { if (vai->lastFrame != gpuStats.numFlips) { vai->numFrames++; } - DecodeVerts(decoded); - goto rotateVBO; + goto skipVBO; } } vai->lastFrame = gpuStats.numFlips; } else { +skipVBO: if (g_Config.bSoftwareSkinning && (lastVType_ & GE_VTYPE_WEIGHT_MASK)) { // If software skinning, we've already predecoded into "decoded". So push that content. size_t size = decodedVerts_ * dec_->GetDecVtxFmt().stride; @@ -530,8 +535,8 @@ rotateVBO: if (useElements) { if (!indexBuffer) { indexBufferOffset = (uint32_t)frameData.pushIndex->Push(decIndex, sizeof(uint16_t) * indexGen.VertexCount(), &indexBuffer); - render_->BindIndexBuffer(indexBuffer); } + render_->BindIndexBuffer(indexBuffer); render_->DrawIndexed(glprim[prim], vertexCount, GL_UNSIGNED_SHORT, (GLvoid*)(intptr_t)indexBufferOffset); } else { render_->Draw(glprim[prim], 0, vertexCount); From 56f4335915d78c048fce0090fffd7b354e24ecd3 Mon Sep 17 00:00:00 2001 From: "Unknown W. Brackets" Date: Sun, 24 May 2020 14:45:06 -0700 Subject: [PATCH 2/2] GLES: Delay buffer use in vertex cache. This creates the buffer on frame 2, starts using it on frame 3. --- GPU/GLES/DrawEngineGLES.cpp | 103 +++++++++++++++++------------------- GPU/GLES/DrawEngineGLES.h | 2 +- 2 files changed, 51 insertions(+), 54 deletions(-) diff --git a/GPU/GLES/DrawEngineGLES.cpp b/GPU/GLES/DrawEngineGLES.cpp index df8b106f19..6ab5620fe5 100644 --- a/GPU/GLES/DrawEngineGLES.cpp +++ b/GPU/GLES/DrawEngineGLES.cpp @@ -232,7 +232,7 @@ GLRInputLayout *DrawEngineGLES::SetupDecFmtForDraw(LinkedShader *program, const return inputLayout; } -void DrawEngineGLES::DecodeVertsToPushBuffer(GLPushBuffer *push, uint32_t *bindOffset, GLRBuffer **buf) { +void *DrawEngineGLES::DecodeVertsToPushBuffer(GLPushBuffer *push, uint32_t *bindOffset, GLRBuffer **buf) { u8 *dest = decoded; // Figure out how much pushbuffer space we need to allocate. @@ -241,6 +241,7 @@ void DrawEngineGLES::DecodeVertsToPushBuffer(GLPushBuffer *push, uint32_t *bindO dest = (u8 *)push->Push(vertsToDecode * dec_->GetDecVtxFmt().stride, bindOffset, buf); } DecodeVerts(dest); + return dest; } void DrawEngineGLES::MarkUnreliable(VertexArrayInfo *vai) { @@ -341,6 +342,8 @@ void DrawEngineGLES::DoFlush() { if (vshader->UseHWTransform()) { int vertexCount = 0; bool useElements = true; + bool populateCache = false; + VertexArrayInfo *vai = nullptr; // Cannot cache vertex data with morph enabled. bool useCache = g_Config.bVertexCache && !(lastVType_ & GE_VTYPE_MORPHCOUNT_MASK); @@ -353,7 +356,7 @@ void DrawEngineGLES::DoFlush() { if (useCache) { u32 id = dcid_ ^ gstate.getUVGenMode(); // This can have an effect on which UV decoder we need to use! And hence what the decoded data will look like. See #9263 - VertexArrayInfo *vai = vai_.Get(id); + vai = vai_.Get(id); if (!vai) { vai = new VertexArrayInfo(); vai_.Insert(id, vai); @@ -368,16 +371,8 @@ void DrawEngineGLES::DoFlush() { vai->minihash = ComputeMiniHash(); vai->status = VertexArrayInfo::VAI_HASHING; vai->drawsUntilNextFullHash = 0; - DecodeVerts(decoded); // writes to indexGen - vai->numVerts = indexGen.VertexCount(); - vai->prim = indexGen.Prim(); - vai->maxIndex = indexGen.MaxIndex(); - vai->flags = gstate_c.vertexFullAlpha ? VAI_FLAG_VERTEXFULLALPHA : 0; - - size_t size = decodedVerts_ * dec_->GetDecVtxFmt().stride; - u8 *dest = (u8 *)frameData.pushVertex->Push(size, &vertexBufferOffset, &vertexBuffer); - memcpy(dest, decoded, size); - goto rotateVBO; + useCache = false; + break; } // Hashing - still gaining confidence about the buffer. @@ -397,7 +392,8 @@ void DrawEngineGLES::DoFlush() { } if (newMiniHash != vai->minihash || newHash != vai->hash) { MarkUnreliable(vai); - goto skipVBO; + useCache = false; + break; } if (vai->numVerts > 64) { // exponential backoff up to 16 draws, then every 32 @@ -415,51 +411,27 @@ void DrawEngineGLES::DoFlush() { u32 newMiniHash = ComputeMiniHash(); if (newMiniHash != vai->minihash) { MarkUnreliable(vai); - goto skipVBO; + break; } } if (vai->vbo == nullptr) { - DecodeVerts(decoded); - vai->numVerts = indexGen.VertexCount(); - vai->prim = indexGen.Prim(); - vai->maxIndex = indexGen.MaxIndex(); - vai->flags = gstate_c.vertexFullAlpha ? VAI_FLAG_VERTEXFULLALPHA : 0; - useElements = !indexGen.SeenOnlyPurePrims(); - if (!useElements && indexGen.PureCount()) { - vai->numVerts = indexGen.PureCount(); - } - _dbg_assert_msg_(G3D, gstate_c.vertBounds.minV >= gstate_c.vertBounds.maxV, "Should not have checked UVs when caching."); - size_t vsz = dec_->GetDecVtxFmt().stride * indexGen.MaxIndex(); - vai->vbo = render_->CreateBuffer(GL_ARRAY_BUFFER, vsz, GL_STATIC_DRAW); - uint8_t *vertexData = new uint8_t[vsz]; - memcpy(vertexData, decoded, vsz); - render_->BufferSubdata(vai->vbo, 0, vsz, vertexData); - // If there's only been one primitive type, and it's either TRIANGLES, LINES or POINTS, - // there is no need for the index buffer we built. We can then use glDrawArrays instead - // for a very minor speed boost. - if (useElements) { - size_t esz = sizeof(short) * indexGen.VertexCount(); - vai->ebo = render_->CreateBuffer(GL_ELEMENT_ARRAY_BUFFER, esz, GL_STATIC_DRAW); - uint8_t *indexData = new uint8_t[esz]; - memcpy(indexData, decIndex, esz); - render_->BufferSubdata(vai->ebo, 0, esz, indexData); - } else { - vai->ebo = 0; - render_->BindIndexBuffer(vai->ebo); - } + // We'll populate the cache this time around, use it next time. + populateCache = true; + useCache = false; } else { gpuStats.numCachedDrawCalls++; useElements = vai->ebo ? true : false; gpuStats.numCachedVertsDrawn += vai->numVerts; gstate_c.vertexFullAlpha = vai->flags & VAI_FLAG_VERTEXFULLALPHA; + + vertexBuffer = vai->vbo; + indexBuffer = vai->ebo; + vertexCount = vai->numVerts; + prim = static_cast(vai->prim); } - vertexBuffer = vai->vbo; - indexBuffer = vai->ebo; - vertexCount = vai->numVerts; - prim = static_cast(vai->prim); break; } @@ -487,13 +459,17 @@ void DrawEngineGLES::DoFlush() { if (vai->lastFrame != gpuStats.numFlips) { vai->numFrames++; } - goto skipVBO; + useCache = false; + break; } } - vai->lastFrame = gpuStats.numFlips; - } else { -skipVBO: + if (useCache) { + vai->lastFrame = gpuStats.numFlips; + } + } + + if (!useCache) { if (g_Config.bSoftwareSkinning && (lastVType_ & GE_VTYPE_WEIGHT_MASK)) { // If software skinning, we've already predecoded into "decoded". So push that content. size_t size = decodedVerts_ * dec_->GetDecVtxFmt().stride; @@ -501,11 +477,25 @@ skipVBO: memcpy(dest, decoded, size); } else { // Decode directly into the pushbuffer - DecodeVertsToPushBuffer(frameData.pushVertex, &vertexBufferOffset, &vertexBuffer); + u8 *dest = (u8 *)DecodeVertsToPushBuffer(frameData.pushVertex, &vertexBufferOffset, &vertexBuffer); + if (populateCache) { + size_t size = decodedVerts_ * dec_->GetDecVtxFmt().stride; + vai->vbo = render_->CreateBuffer(GL_ARRAY_BUFFER, size, GL_STATIC_DRAW); + render_->BufferSubdata(vai->vbo, 0, size, dest, false); + } + } + + if (populateCache || (vai && vai->status == VertexArrayInfo::VAI_NEW)) { + vai->numVerts = indexGen.VertexCount(); + vai->prim = indexGen.Prim(); + vai->maxIndex = indexGen.MaxIndex(); + vai->flags = gstate_c.vertexFullAlpha ? VAI_FLAG_VERTEXFULLALPHA : 0; } -rotateVBO: gpuStats.numUncachedVertsDrawn += indexGen.VertexCount(); + // If there's only been one primitive type, and it's either TRIANGLES, LINES or POINTS, + // there is no need for the index buffer we built. We can then use glDrawArrays instead + // for a very minor speed boost. useElements = !indexGen.SeenOnlyPurePrims(); vertexCount = indexGen.VertexCount(); if (!useElements && indexGen.PureCount()) { @@ -534,7 +524,14 @@ rotateVBO: render_->BindVertexBuffer(inputLayout, vertexBuffer, vertexBufferOffset); if (useElements) { if (!indexBuffer) { - indexBufferOffset = (uint32_t)frameData.pushIndex->Push(decIndex, sizeof(uint16_t) * indexGen.VertexCount(), &indexBuffer); + size_t esz = sizeof(uint16_t) * indexGen.VertexCount(); + void *dest = frameData.pushIndex->Push(esz, &indexBufferOffset, &indexBuffer); + memcpy(dest, decIndex, esz); + + if (populateCache) { + vai->ebo = render_->CreateBuffer(GL_ELEMENT_ARRAY_BUFFER, esz, GL_STATIC_DRAW); + render_->BufferSubdata(vai->ebo, 0, esz, (uint8_t *)dest, false); + } } render_->BindIndexBuffer(indexBuffer); render_->DrawIndexed(glprim[prim], vertexCount, GL_UNSIGNED_SHORT, (GLvoid*)(intptr_t)indexBufferOffset); diff --git a/GPU/GLES/DrawEngineGLES.h b/GPU/GLES/DrawEngineGLES.h index 165329a3d7..70159d7e2e 100644 --- a/GPU/GLES/DrawEngineGLES.h +++ b/GPU/GLES/DrawEngineGLES.h @@ -198,7 +198,7 @@ private: GLRInputLayout *SetupDecFmtForDraw(LinkedShader *program, const DecVtxFormat &decFmt); - void DecodeVertsToPushBuffer(GLPushBuffer *push, uint32_t *bindOffset, GLRBuffer **buf); + void *DecodeVertsToPushBuffer(GLPushBuffer *push, uint32_t *bindOffset, GLRBuffer **buf); void FreeVertexArray(VertexArrayInfo *vai);