More work on prescaled UV

This commit is contained in:
Henrik Rydgard 2013-07-28 00:18:41 +02:00
parent b307d77b61
commit 50a2d1b87f
8 changed files with 76 additions and 43 deletions

View File

@ -518,29 +518,33 @@ void GLES_GPU::ExecuteOp(u32 op, u32 diff) {
case GE_CMD_TEXSCALEU:
if (diff) {
gstate_c.uScale = getFloat24(data);
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
gstate_c.uv.uScale = getFloat24(data);
if (!g_Config.bPrescaleUV)
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
}
break;
case GE_CMD_TEXSCALEV:
if (diff) {
gstate_c.vScale = getFloat24(data);
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
gstate_c.uv.vScale = getFloat24(data);
if (!g_Config.bPrescaleUV)
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
}
break;
case GE_CMD_TEXOFFSETU:
if (diff) {
gstate_c.uOff = getFloat24(data);
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
gstate_c.uv.uOff = getFloat24(data);
if (!g_Config.bPrescaleUV)
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
}
break;
case GE_CMD_TEXOFFSETV:
if (diff) {
gstate_c.vOff = getFloat24(data);
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
gstate_c.uv.vOff = getFloat24(data);
if (!g_Config.bPrescaleUV)
shaderManager_->DirtyUniform(DIRTY_UVSCALEOFFSET);
}
break;

View File

@ -326,10 +326,10 @@ void LinkedShader::updateUniforms() {
if (gstate.isModeThrough()) {
// We never get here because we don't use HW transform with through mode.
// Although - why don't we?
uvscaleoff[0] = gstate_c.uScale / gstate_c.curTextureWidth;
uvscaleoff[1] = gstate_c.vScale / gstate_c.curTextureHeight;
uvscaleoff[2] = gstate_c.uOff / gstate_c.curTextureWidth;
uvscaleoff[3] = gstate_c.vOff / gstate_c.curTextureHeight;
uvscaleoff[0] = gstate_c.uv.uScale / gstate_c.curTextureWidth;
uvscaleoff[1] = gstate_c.uv.vScale / gstate_c.curTextureHeight;
uvscaleoff[2] = gstate_c.uv.uOff / gstate_c.curTextureWidth;
uvscaleoff[3] = gstate_c.uv.vOff / gstate_c.curTextureHeight;
glUniform4fv(u_uvscaleoffset, 1, uvscaleoff);
} else {
int w = 1 << (gstate.texsize[0] & 0xf);
@ -339,10 +339,10 @@ void LinkedShader::updateUniforms() {
if ((gstate.texmapmode & 3) == 0) {
static const float rescale[4] = {1.0f, 2*127.5f/128.f, 2*32767.5f/32768.f, 1.0f};
float factor = rescale[(gstate.vertType & GE_VTYPE_TC_MASK) >> GE_VTYPE_TC_SHIFT];
uvscaleoff[0] = gstate_c.uScale * factor * widthFactor;
uvscaleoff[1] = gstate_c.vScale * factor * heightFactor;
uvscaleoff[2] = gstate_c.uOff * widthFactor;
uvscaleoff[3] = gstate_c.vOff * heightFactor;
uvscaleoff[0] = gstate_c.uv.uScale * factor * widthFactor;
uvscaleoff[1] = gstate_c.uv.vScale * factor * heightFactor;
uvscaleoff[2] = gstate_c.uv.uOff * widthFactor;
uvscaleoff[3] = gstate_c.uv.vOff * heightFactor;
} else {
uvscaleoff[0] = widthFactor;
uvscaleoff[1] = heightFactor;

View File

@ -68,7 +68,8 @@ TransformDrawEngine::TransformDrawEngine()
shaderManager_(0),
textureCache_(0),
framebufferManager_(0),
numDrawCalls(0) {
numDrawCalls(0),
uvScale(0) {
// Allocate nicely aligned memory. Maybe graphics drivers will
// appreciate it.
// All this is a LOT of memory, need to see if we can cut down somehow.
@ -76,6 +77,9 @@ TransformDrawEngine::TransformDrawEngine()
decIndex = (u16 *)AllocateMemoryPages(DECODED_INDEX_BUFFER_SIZE);
transformed = (TransformedVertex *)AllocateMemoryPages(TRANSFORMED_VERTEX_BUFFER_SIZE);
transformedExpanded = (TransformedVertex *)AllocateMemoryPages(3 * TRANSFORMED_VERTEX_BUFFER_SIZE);
if (g_Config.bPrescaleUV) {
uvScale = new UVScale[MAX_DEFERRED_DRAW_CALLS];
}
memset(vbo_, 0, sizeof(vbo_));
memset(ebo_, 0, sizeof(ebo_));
indexGen.Setup(decIndex);
@ -93,6 +97,7 @@ TransformDrawEngine::~TransformDrawEngine() {
for (auto iter = decoderMap_.begin(); iter != decoderMap_.end(); iter++) {
delete iter->second;
}
delete [] uvScale;
}
void TransformDrawEngine::InitDeviceObjects() {
@ -662,8 +667,8 @@ void TransformDrawEngine::SoftwareTransformAndDraw(
{
case 0: // UV mapping
// Texture scale/offset is only performed in this mode.
uv[0] = uscale * (ruv[0]*gstate_c.uScale + gstate_c.uOff);
uv[1] = vscale * (ruv[1]*gstate_c.vScale + gstate_c.vOff);
uv[0] = uscale * (ruv[0]*gstate_c.uv.uScale + gstate_c.uv.uOff);
uv[1] = vscale * (ruv[1]*gstate_c.uv.vScale + gstate_c.uv.vOff);
uv[2] = 1.0f;
break;
case 1:
@ -950,7 +955,7 @@ void TransformDrawEngine::SubmitPrim(void *verts, void *inds, int prim, int vert
gpuStats.numDrawCalls++;
gpuStats.numVertsSubmitted += vertexCount;
DeferredDrawCall &dc = drawCalls[numDrawCalls++];
DeferredDrawCall &dc = drawCalls[numDrawCalls];
dc.verts = verts;
dc.inds = inds;
dc.vertType = vertType;
@ -963,6 +968,11 @@ void TransformDrawEngine::SubmitPrim(void *verts, void *inds, int prim, int vert
dc.indexLowerBound = 0;
dc.indexUpperBound = vertexCount - 1;
}
if (uvScale) {
uvScale[numDrawCalls] = gstate_c.uv;
}
numDrawCalls++;
}
void TransformDrawEngine::DecodeVerts() {
@ -976,6 +986,8 @@ void TransformDrawEngine::DecodeVerts() {
void *inds = dc.inds;
if (indexType == GE_VTYPE_IDX_NONE >> GE_VTYPE_IDX_SHIFT) {
// Decode the verts and apply morphing. Simple.
if (uvScale)
gstate_c.uv = uvScale[i];
dec_->DecodeVerts(decoded + collectedVerts * (int)dec_->GetDecVtxFmt().stride,
dc.verts, indexLowerBound, indexUpperBound);
collectedVerts += indexUpperBound - indexLowerBound + 1;
@ -993,6 +1005,9 @@ void TransformDrawEngine::DecodeVerts() {
while (j < numDrawCalls) {
if (drawCalls[j].verts != dc.verts)
break;
if (uvScale && memcmp(&uvScale[j], &uvScale[i], sizeof(uvScale[0]) != 0))
break;
indexLowerBound = std::min(indexLowerBound, (int)drawCalls[j].indexLowerBound);
indexUpperBound = std::max(indexUpperBound, (int)drawCalls[j].indexUpperBound);
lastMatch = j;
@ -1013,6 +1028,8 @@ void TransformDrawEngine::DecodeVerts() {
int vertexCount = indexUpperBound - indexLowerBound + 1;
// 3. Decode that range of vertex data.
if (uvScale)
gstate_c.uv = uvScale[i];
dec_->DecodeVerts(decoded + collectedVerts * (int)dec_->GetDecVtxFmt().stride,
dc.verts, indexLowerBound, indexUpperBound);
collectedVerts += vertexCount;

View File

@ -179,6 +179,8 @@ private:
enum { MAX_DEFERRED_DRAW_CALLS = 128 };
DeferredDrawCall drawCalls[MAX_DEFERRED_DRAW_CALLS];
int numDrawCalls;
UVScale *uvScale;
};
// Only used by SW transform

View File

@ -211,22 +211,22 @@ void VertexDecoder::Step_TcFloatThrough() const
void VertexDecoder::Step_TcU8Prescale() const {
float *uv = (float *)(decoded_ + decFmt.uvoff);
const u8 *uvdata = (const u8 *)(ptr_ + tcoff);
uv[0] = (float)uvdata[0] * (1.f / 128.f) * gstate_c.uScale + gstate_c.uOff;
uv[1] = (float)uvdata[1] * (1.f / 128.f) * gstate_c.vScale + gstate_c.vOff;
uv[0] = (float)uvdata[0] * (1.f / 128.f) * gstate_c.uv.uScale + gstate_c.uv.uOff;
uv[1] = (float)uvdata[1] * (1.f / 128.f) * gstate_c.uv.vScale + gstate_c.uv.vOff;
}
void VertexDecoder::Step_TcU16Prescale() const {
float *uv = (float *)(decoded_ + decFmt.uvoff);
const u16 *uvdata = (const u16 *)(ptr_ + tcoff);
uv[0] = (float)uvdata[0] * (1.f / 32768.f) * gstate_c.uScale + gstate_c.uOff;
uv[1] = (float)uvdata[1] * (1.f / 32768.f) * gstate_c.vScale + gstate_c.vOff;
uv[0] = (float)uvdata[0] * (1.f / 32768.f) * gstate_c.uv.uScale + gstate_c.uv.uOff;
uv[1] = (float)uvdata[1] * (1.f / 32768.f) * gstate_c.uv.vScale + gstate_c.uv.vOff;
}
void VertexDecoder::Step_TcFloatPrescale() const {
float *uv = (float *)(decoded_ + decFmt.uvoff);
const float *uvdata = (const float*)(ptr_ + tcoff);
uv[0] = uvdata[0] * gstate_c.uScale + gstate_c.uOff;
uv[1] = uvdata[1] * gstate_c.vScale + gstate_c.vOff;
uv[0] = uvdata[0] * gstate_c.uv.uScale + gstate_c.uv.uOff;
uv[1] = uvdata[1] * gstate_c.uv.vScale + gstate_c.uv.vOff;
}
void VertexDecoder::Step_Color565() const

View File

@ -22,11 +22,12 @@
#include <windows.h>
#endif
#include "../ge_constants.h"
#include "../GPUState.h"
#include "../../Core/Config.h"
#include "base/stringutil.h"
#include "GPU/ge_constants.h"
#include "GPU/GPUState.h"
#include "Core/Config.h"
#include "VertexShaderGenerator.h"
#include "GPU/GLES/VertexShaderGenerator.h"
// SDL 1.2 on Apple does not have support for OpenGL 3 and hence needs
// special treatment in the shader generator.
@ -527,13 +528,17 @@ void GenerateVertexShader(int prim, char *buffer, bool useHWTransform) {
case 1: // Projection mapping.
{
const char *temp_tc;
std::string temp_tc;
switch (gstate.getUVProjMode()) {
case 0: // Use model space XYZ as source
temp_tc = "vec4(a_position.xyz, 1.0)";
break;
case 1: // Use unscaled UV as source
temp_tc = "vec4(a_texcoord.xy * 2.0, 0.0, 1.0)";
{
static const char *rescaleuv[4] = {"", " * 1.9921875", " * 1.999969482421875", ""}; // 2*127.5f/128.f, 2*32767.5f/32768.f, 1.0f};
const char *factor = rescaleuv[(vertType & GE_VTYPE_TC_MASK) >> GE_VTYPE_TC_SHIFT];
temp_tc = StringFromFormat("vec4(a_texcoord.xy %s, 0.0, 1.0)", factor);
}
break;
case 2: // Use normalized transformed normal as source
if (hasNormal)
@ -548,7 +553,7 @@ void GenerateVertexShader(int prim, char *buffer, bool useHWTransform) {
temp_tc = "vec4(0.0, 0.0, 1.0, 1.0)";
break;
}
WRITE(p, " v_texcoord = (u_texmtx * %s).xyz * vec3(u_uvscaleoffset.xy, 1.0);\n", temp_tc);
WRITE(p, " v_texcoord = (u_texmtx * %s).xyz * vec3(u_uvscaleoffset.xy, 1.0);\n", temp_tc.c_str());
}
// Transform by texture matrix. XYZ as we are doing projection mapping.
break;

View File

@ -330,6 +330,12 @@ enum SkipDrawReasonFlags {
// The rest is cached simplified/converted data for fast access.
// Does not need to be saved when saving/restoring context.
struct UVScale {
float uScale, vScale;
float uOff, vOff;
};
struct GPUStateCache
{
u32 vertexAddr;
@ -343,8 +349,7 @@ struct GPUStateCache
int skipDrawReason;
float uScale,vScale;
float uOff,vOff;
UVScale uv;
bool flipTexture;
float zMin, zMax;

View File

@ -181,23 +181,23 @@ void NullGPU::ExecuteOp(u32 op, u32 diff)
break;
case GE_CMD_TEXSCALEU:
gstate_c.uScale = getFloat24(data);
DEBUG_LOG(G3D, "DL Texture U Scale: %f", gstate_c.uScale);
gstate_c.uv.uScale = getFloat24(data);
DEBUG_LOG(G3D, "DL Texture U Scale: %f", gstate_c.uv.uScale);
break;
case GE_CMD_TEXSCALEV:
gstate_c.vScale = getFloat24(data);
DEBUG_LOG(G3D, "DL Texture V Scale: %f", gstate_c.vScale);
gstate_c.uv.vScale = getFloat24(data);
DEBUG_LOG(G3D, "DL Texture V Scale: %f", gstate_c.uv.vScale);
break;
case GE_CMD_TEXOFFSETU:
gstate_c.uOff = getFloat24(data);
DEBUG_LOG(G3D, "DL Texture U Offset: %f", gstate_c.uOff);
gstate_c.uv.uOff = getFloat24(data);
DEBUG_LOG(G3D, "DL Texture U Offset: %f", gstate_c.uv.uOff);
break;
case GE_CMD_TEXOFFSETV:
gstate_c.vOff = getFloat24(data);
DEBUG_LOG(G3D, "DL Texture V Offset: %f", gstate_c.vOff);
gstate_c.uv.vOff = getFloat24(data);
DEBUG_LOG(G3D, "DL Texture V Offset: %f", gstate_c.uv.vOff);
break;
case GE_CMD_SCISSOR1: