From 50a2d1b87f6e29170067af074193229375d24f25 Mon Sep 17 00:00:00 2001 From: Henrik Rydgard Date: Sun, 28 Jul 2013 00:18:41 +0200 Subject: [PATCH] More work on prescaled UV --- GPU/GLES/DisplayListInterpreter.cpp | 20 ++++++++++++-------- GPU/GLES/ShaderManager.cpp | 16 ++++++++-------- GPU/GLES/TransformPipeline.cpp | 25 +++++++++++++++++++++---- GPU/GLES/TransformPipeline.h | 2 ++ GPU/GLES/VertexDecoder.cpp | 12 ++++++------ GPU/GLES/VertexShaderGenerator.cpp | 19 ++++++++++++------- GPU/GPUState.h | 9 +++++++-- GPU/Null/NullGpu.cpp | 16 ++++++++-------- 8 files changed, 76 insertions(+), 43 deletions(-) diff --git a/GPU/GLES/DisplayListInterpreter.cpp b/GPU/GLES/DisplayListInterpreter.cpp index fbd1d3c867..043186dde3 100644 --- a/GPU/GLES/DisplayListInterpreter.cpp +++ b/GPU/GLES/DisplayListInterpreter.cpp @@ -518,29 +518,33 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) { case GE_CMD_TEXSCALEU: if (diff) { - gstate_c.uScale = getFloat24(data); - shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET); + gstate_c.uv.uScale = getFloat24(data); + if (!g_Config.bPrescaleUV) + shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET); } break; case GE_CMD_TEXSCALEV: if (diff) { - gstate_c.vScale = getFloat24(data); - shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET); + gstate_c.uv.vScale = getFloat24(data); + if (!g_Config.bPrescaleUV) + shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET); } break; case GE_CMD_TEXOFFSETU: if (diff) { - gstate_c.uOff = getFloat24(data); - shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET); + gstate_c.uv.uOff = getFloat24(data); + if (!g_Config.bPrescaleUV) + shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET); } break; case GE_CMD_TEXOFFSETV: if (diff) { - gstate_c.vOff = getFloat24(data); - shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET); + gstate_c.uv.vOff = getFloat24(data); + if (!g_Config.bPrescaleUV) + shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET); } break; diff --git a/GPU/GLES/ShaderManager.cpp b/GPU/GLES/ShaderManager.cpp index 47a2a4f1aa..ec3819aa88 100644 --- a/GPU/GLES/ShaderManager.cpp +++ b/GPU/GLES/ShaderManager.cpp @@ -326,10 +326,10 @@ void LinkedShader::updateUniforms() { if (gstate.isModeThrough()) { // We never get here because we don't use HW transform with through mode. // Although - why don't we? - uvscaleoff[0] = gstate_c.uScale / gstate_c.curTextureWidth; - uvscaleoff[1] = gstate_c.vScale / gstate_c.curTextureHeight; - uvscaleoff[2] = gstate_c.uOff / gstate_c.curTextureWidth; - uvscaleoff[3] = gstate_c.vOff / gstate_c.curTextureHeight; + uvscaleoff[0] = gstate_c.uv.uScale / gstate_c.curTextureWidth; + uvscaleoff[1] = gstate_c.uv.vScale / gstate_c.curTextureHeight; + uvscaleoff[2] = gstate_c.uv.uOff / gstate_c.curTextureWidth; + uvscaleoff[3] = gstate_c.uv.vOff / gstate_c.curTextureHeight; glUniform4fv(u_uvscaleoffset, 1, uvscaleoff); } else { int w = 1 << (gstate.texsize[0] & 0xf); @@ -339,10 +339,10 @@ void LinkedShader::updateUniforms() { if ((gstate.texmapmode & 3) == 0) { static const float rescale[4] = {1.0f, 2*127.5f/128.f, 2*32767.5f/32768.f, 1.0f}; float factor = rescale[(gstate.vertType & GE_VTYPE_TC_MASK) >> GE_VTYPE_TC_SHIFT]; - uvscaleoff[0] = gstate_c.uScale * factor * widthFactor; - uvscaleoff[1] = gstate_c.vScale * factor * heightFactor; - uvscaleoff[2] = gstate_c.uOff * widthFactor; - uvscaleoff[3] = gstate_c.vOff * heightFactor; + uvscaleoff[0] = gstate_c.uv.uScale * factor * widthFactor; + uvscaleoff[1] = gstate_c.uv.vScale * factor * heightFactor; + uvscaleoff[2] = gstate_c.uv.uOff * widthFactor; + uvscaleoff[3] = gstate_c.uv.vOff * heightFactor; } else { uvscaleoff[0] = widthFactor; uvscaleoff[1] = heightFactor; diff --git a/GPU/GLES/TransformPipeline.cpp b/GPU/GLES/TransformPipeline.cpp index 93051632d8..1938c00954 100644 --- a/GPU/GLES/TransformPipeline.cpp +++ b/GPU/GLES/TransformPipeline.cpp @@ -68,7 +68,8 @@ TransformDrawEngine::TransformDrawEngine() shaderManager_(0), textureCache_(0), framebufferManager_(0), - numDrawCalls(0) { + numDrawCalls(0), + uvScale(0) { // Allocate nicely aligned memory. Maybe graphics drivers will // appreciate it. // All this is a LOT of memory, need to see if we can cut down somehow. @@ -76,6 +77,9 @@ TransformDrawEngine::TransformDrawEngine() decIndex = (u16 *)AllocateMemoryPages(DECODED_INDEX_BUFFER_SIZE); transformed = (TransformedVertex *)AllocateMemoryPages(TRANSFORMED_VERTEX_BUFFER_SIZE); transformedExpanded = (TransformedVertex *)AllocateMemoryPages(3 * TRANSFORMED_VERTEX_BUFFER_SIZE); + if (g_Config.bPrescaleUV) { + uvScale = new UVScale[MAX_DEFERRED_DRAW_CALLS]; + } memset(vbo_, 0, sizeof(vbo_)); memset(ebo_, 0, sizeof(ebo_)); indexGen.Setup(decIndex); @@ -93,6 +97,7 @@ TransformDrawEngine::~TransformDrawEngine() { for (auto iter = decoderMap_.begin(); iter != decoderMap_.end(); iter++) { delete iter->second; } + delete [] uvScale; } void TransformDrawEngine::InitDeviceObjects() { @@ -662,8 +667,8 @@ void TransformDrawEngine::SoftwareTransformAndDraw( { case 0: // UV mapping // Texture scale/offset is only performed in this mode. - uv[0] = uscale * (ruv[0]*gstate_c.uScale + gstate_c.uOff); - uv[1] = vscale * (ruv[1]*gstate_c.vScale + gstate_c.vOff); + uv[0] = uscale * (ruv[0]*gstate_c.uv.uScale + gstate_c.uv.uOff); + uv[1] = vscale * (ruv[1]*gstate_c.uv.vScale + gstate_c.uv.vOff); uv[2] = 1.0f; break; case 1: @@ -950,7 +955,7 @@ void TransformDrawEngine::SubmitPrim(void *verts, void *inds, int prim, int vert gpuStats.numDrawCalls++; gpuStats.numVertsSubmitted += vertexCount; - DeferredDrawCall &dc = drawCalls[numDrawCalls++]; + DeferredDrawCall &dc = drawCalls[numDrawCalls]; dc.verts = verts; dc.inds = inds; dc.vertType = vertType; @@ -963,6 +968,11 @@ void TransformDrawEngine::SubmitPrim(void *verts, void *inds, int prim, int vert dc.indexLowerBound = 0; dc.indexUpperBound = vertexCount - 1; } + + if (uvScale) { + uvScale[numDrawCalls] = gstate_c.uv; + } + numDrawCalls++; } void TransformDrawEngine::DecodeVerts() { @@ -976,6 +986,8 @@ void TransformDrawEngine::DecodeVerts() { void *inds = dc.inds; if (indexType == GE_VTYPE_IDX_NONE >> GE_VTYPE_IDX_SHIFT) { // Decode the verts and apply morphing. Simple. + if (uvScale) + gstate_c.uv = uvScale[i]; dec_->DecodeVerts(decoded + collectedVerts * (int)dec_->GetDecVtxFmt().stride, dc.verts, indexLowerBound, indexUpperBound); collectedVerts += indexUpperBound - indexLowerBound + 1; @@ -993,6 +1005,9 @@ void TransformDrawEngine::DecodeVerts() { while (j < numDrawCalls) { if (drawCalls[j].verts != dc.verts) break; + if (uvScale && memcmp(&uvScale[j], &uvScale[i], sizeof(uvScale[0]) != 0)) + break; + indexLowerBound = std::min(indexLowerBound, (int)drawCalls[j].indexLowerBound); indexUpperBound = std::max(indexUpperBound, (int)drawCalls[j].indexUpperBound); lastMatch = j; @@ -1013,6 +1028,8 @@ void TransformDrawEngine::DecodeVerts() { int vertexCount = indexUpperBound - indexLowerBound + 1; // 3. Decode that range of vertex data. + if (uvScale) + gstate_c.uv = uvScale[i]; dec_->DecodeVerts(decoded + collectedVerts * (int)dec_->GetDecVtxFmt().stride, dc.verts, indexLowerBound, indexUpperBound); collectedVerts += vertexCount; diff --git a/GPU/GLES/TransformPipeline.h b/GPU/GLES/TransformPipeline.h index 21ed0bd0ab..93cb5d3da0 100644 --- a/GPU/GLES/TransformPipeline.h +++ b/GPU/GLES/TransformPipeline.h @@ -179,6 +179,8 @@ private: enum { MAX_DEFERRED_DRAW_CALLS = 128 }; DeferredDrawCall drawCalls[MAX_DEFERRED_DRAW_CALLS]; int numDrawCalls; + + UVScale *uvScale; }; // Only used by SW transform diff --git a/GPU/GLES/VertexDecoder.cpp b/GPU/GLES/VertexDecoder.cpp index a06cbb22ac..b7b96daf48 100644 --- a/GPU/GLES/VertexDecoder.cpp +++ b/GPU/GLES/VertexDecoder.cpp @@ -211,22 +211,22 @@ void VertexDecoder::Step_TcFloatThrough() const void VertexDecoder::Step_TcU8Prescale() const { float *uv = (float *)(decoded_ + decFmt.uvoff); const u8 *uvdata = (const u8 *)(ptr_ + tcoff); - uv[0] = (float)uvdata[0] * (1.f / 128.f) * gstate_c.uScale + gstate_c.uOff; - uv[1] = (float)uvdata[1] * (1.f / 128.f) * gstate_c.vScale + gstate_c.vOff; + uv[0] = (float)uvdata[0] * (1.f / 128.f) * gstate_c.uv.uScale + gstate_c.uv.uOff; + uv[1] = (float)uvdata[1] * (1.f / 128.f) * gstate_c.uv.vScale + gstate_c.uv.vOff; } void VertexDecoder::Step_TcU16Prescale() const { float *uv = (float *)(decoded_ + decFmt.uvoff); const u16 *uvdata = (const u16 *)(ptr_ + tcoff); - uv[0] = (float)uvdata[0] * (1.f / 32768.f) * gstate_c.uScale + gstate_c.uOff; - uv[1] = (float)uvdata[1] * (1.f / 32768.f) * gstate_c.vScale + gstate_c.vOff; + uv[0] = (float)uvdata[0] * (1.f / 32768.f) * gstate_c.uv.uScale + gstate_c.uv.uOff; + uv[1] = (float)uvdata[1] * (1.f / 32768.f) * gstate_c.uv.vScale + gstate_c.uv.vOff; } void VertexDecoder::Step_TcFloatPrescale() const { float *uv = (float *)(decoded_ + decFmt.uvoff); const float *uvdata = (const float*)(ptr_ + tcoff); - uv[0] = uvdata[0] * gstate_c.uScale + gstate_c.uOff; - uv[1] = uvdata[1] * gstate_c.vScale + gstate_c.vOff; + uv[0] = uvdata[0] * gstate_c.uv.uScale + gstate_c.uv.uOff; + uv[1] = uvdata[1] * gstate_c.uv.vScale + gstate_c.uv.vOff; } void VertexDecoder::Step_Color565() const diff --git a/GPU/GLES/VertexShaderGenerator.cpp b/GPU/GLES/VertexShaderGenerator.cpp index 9acfe632dc..1c83e3046e 100644 --- a/GPU/GLES/VertexShaderGenerator.cpp +++ b/GPU/GLES/VertexShaderGenerator.cpp @@ -22,11 +22,12 @@ #include #endif -#include "../ge_constants.h" -#include "../GPUState.h" -#include "../../Core/Config.h" +#include "base/stringutil.h" +#include "GPU/ge_constants.h" +#include "GPU/GPUState.h" +#include "Core/Config.h" -#include "VertexShaderGenerator.h" +#include "GPU/GLES/VertexShaderGenerator.h" // SDL 1.2 on Apple does not have support for OpenGL 3 and hence needs // special treatment in the shader generator. @@ -527,13 +528,17 @@ void GenerateVertexShader(int prim, char *buffer, bool useHWTransform) { case 1: // Projection mapping. { - const char *temp_tc; + std::string temp_tc; switch (gstate.getUVProjMode()) { case 0: // Use model space XYZ as source temp_tc = "vec4(a_position.xyz, 1.0)"; break; case 1: // Use unscaled UV as source - temp_tc = "vec4(a_texcoord.xy * 2.0, 0.0, 1.0)"; + { + static const char *rescaleuv[4] = {"", " * 1.9921875", " * 1.999969482421875", ""}; // 2*127.5f/128.f, 2*32767.5f/32768.f, 1.0f}; + const char *factor = rescaleuv[(vertType & GE_VTYPE_TC_MASK) >> GE_VTYPE_TC_SHIFT]; + temp_tc = StringFromFormat("vec4(a_texcoord.xy %s, 0.0, 1.0)", factor); + } break; case 2: // Use normalized transformed normal as source if (hasNormal) @@ -548,7 +553,7 @@ void GenerateVertexShader(int prim, char *buffer, bool useHWTransform) { temp_tc = "vec4(0.0, 0.0, 1.0, 1.0)"; break; } - WRITE(p, " v_texcoord = (u_texmtx * %s).xyz * vec3(u_uvscaleoffset.xy, 1.0);\n", temp_tc); + WRITE(p, " v_texcoord = (u_texmtx * %s).xyz * vec3(u_uvscaleoffset.xy, 1.0);\n", temp_tc.c_str()); } // Transform by texture matrix. XYZ as we are doing projection mapping. break; diff --git a/GPU/GPUState.h b/GPU/GPUState.h index 0d0ca24adb..daa822c1b6 100644 --- a/GPU/GPUState.h +++ b/GPU/GPUState.h @@ -330,6 +330,12 @@ enum SkipDrawReasonFlags { // The rest is cached simplified/converted data for fast access. // Does not need to be saved when saving/restoring context. + +struct UVScale { + float uScale, vScale; + float uOff, vOff; +}; + struct GPUStateCache { u32 vertexAddr; @@ -343,8 +349,7 @@ struct GPUStateCache int skipDrawReason; - float uScale,vScale; - float uOff,vOff; + UVScale uv; bool flipTexture; float zMin, zMax; diff --git a/GPU/Null/NullGpu.cpp b/GPU/Null/NullGpu.cpp index a3ab913564..39f5c869a3 100644 --- a/GPU/Null/NullGpu.cpp +++ b/GPU/Null/NullGpu.cpp @@ -181,23 +181,23 @@ void NullGPU::ExecuteOp(u32 op, u32 diff) break; case GE_CMD_TEXSCALEU: - gstate_c.uScale = getFloat24(data); - DEBUG_LOG(G3D, "DL Texture U Scale: %f", gstate_c.uScale); + gstate_c.uv.uScale = getFloat24(data); + DEBUG_LOG(G3D, "DL Texture U Scale: %f", gstate_c.uv.uScale); break; case GE_CMD_TEXSCALEV: - gstate_c.vScale = getFloat24(data); - DEBUG_LOG(G3D, "DL Texture V Scale: %f", gstate_c.vScale); + gstate_c.uv.vScale = getFloat24(data); + DEBUG_LOG(G3D, "DL Texture V Scale: %f", gstate_c.uv.vScale); break; case GE_CMD_TEXOFFSETU: - gstate_c.uOff = getFloat24(data); - DEBUG_LOG(G3D, "DL Texture U Offset: %f", gstate_c.uOff); + gstate_c.uv.uOff = getFloat24(data); + DEBUG_LOG(G3D, "DL Texture U Offset: %f", gstate_c.uv.uOff); break; case GE_CMD_TEXOFFSETV: - gstate_c.vOff = getFloat24(data); - DEBUG_LOG(G3D, "DL Texture V Offset: %f", gstate_c.vOff); + gstate_c.uv.vOff = getFloat24(data); + DEBUG_LOG(G3D, "DL Texture V Offset: %f", gstate_c.uv.vOff); break; case GE_CMD_SCISSOR1: