Merge pull request #6313 from unknownbrackets/gpu-minor

Implement in-shader blending on gles2 / gl2
This commit is contained in:
Henrik Rydgård 2014-06-14 09:21:30 +02:00
commit 036cde7768
10 changed files with 35 additions and 25 deletions

View File

@ -283,10 +283,6 @@ bool ShouldUseShaderBlending() {
if (!gstate.isAlphaBlendEnabled()) { if (!gstate.isAlphaBlendEnabled()) {
return false; return false;
} }
// We can't blit on GLES2, so we don't support it. We also want texelFetch (OpenGL 3.0+ / GLES3+.)
if (!gl_extensions.VersionGEThan(3, 0, 0) && !gl_extensions.GLES3) {
return false;
}
if (g_Config.iRenderingMode == FB_NON_BUFFERED_MODE) { if (g_Config.iRenderingMode == FB_NON_BUFFERED_MODE) {
return false; return false;
} }
@ -516,6 +512,9 @@ void GenerateFragmentShader(char *buffer) {
WRITE(p, "uniform sampler2D tex;\n"); WRITE(p, "uniform sampler2D tex;\n");
if (!gstate.isModeClear() && ShouldUseShaderBlending()) { if (!gstate.isModeClear() && ShouldUseShaderBlending()) {
if (!gl_extensions.NV_shader_framebuffer_fetch) { if (!gl_extensions.NV_shader_framebuffer_fetch) {
if (!gl_extensions.VersionGEThan(3, 0, 0) && !gl_extensions.GLES3) {
WRITE(p, "uniform vec2 u_fbotexSize;\n");
}
WRITE(p, "uniform sampler2D fbotex;\n"); WRITE(p, "uniform sampler2D fbotex;\n");
} }
if (gstate.getBlendFuncA() == GE_SRCBLEND_FIXA) { if (gstate.getBlendFuncA() == GE_SRCBLEND_FIXA) {
@ -766,6 +765,8 @@ void GenerateFragmentShader(char *buffer) {
// TODO: EXT_shader_framebuffer_fetch on iOS 6, possibly others. // TODO: EXT_shader_framebuffer_fetch on iOS 6, possibly others.
if (gl_extensions.NV_shader_framebuffer_fetch) { if (gl_extensions.NV_shader_framebuffer_fetch) {
WRITE(p, " lowp vec4 destColor = gl_LastFragData[0];\n"); WRITE(p, " lowp vec4 destColor = gl_LastFragData[0];\n");
} else if (!gl_extensions.VersionGEThan(3, 0, 0) && !gl_extensions.GLES3) {
WRITE(p, " lowp vec4 destColor = %s(fbotex, gl_FragCoord.xy * u_fbotexSize.xy);\n", texture);
} else { } else {
WRITE(p, " lowp vec4 destColor = texelFetch(fbotex, ivec2(gl_FragCoord.x, gl_FragCoord.y), 0);\n"); WRITE(p, " lowp vec4 destColor = texelFetch(fbotex, ivec2(gl_FragCoord.x, gl_FragCoord.y), 0);\n");
} }

View File

@ -1053,6 +1053,8 @@ void FramebufferManager::DoSetRenderFrameBuffer() {
gstate_c.curRTWidth = vfb->width; gstate_c.curRTWidth = vfb->width;
gstate_c.curRTHeight = vfb->height; gstate_c.curRTHeight = vfb->height;
} }
gstate_c.curRTRenderWidth = vfb->renderWidth;
gstate_c.curRTRenderHeight = vfb->renderHeight;
} }
void FramebufferManager::SetLineWidth() { void FramebufferManager::SetLineWidth() {

View File

@ -162,8 +162,8 @@ static const CommandTableEntry commandTable[] = {
{GE_CMD_STENCILTESTENABLE, FLAG_FLUSHBEFOREONCHANGE}, {GE_CMD_STENCILTESTENABLE, FLAG_FLUSHBEFOREONCHANGE},
{GE_CMD_ALPHABLENDENABLE, FLAG_FLUSHBEFOREONCHANGE}, {GE_CMD_ALPHABLENDENABLE, FLAG_FLUSHBEFOREONCHANGE},
{GE_CMD_BLENDMODE, FLAG_FLUSHBEFOREONCHANGE}, {GE_CMD_BLENDMODE, FLAG_FLUSHBEFOREONCHANGE},
{GE_CMD_BLENDFIXEDA, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GLES_GPU::Execute_BlendFixA}, {GE_CMD_BLENDFIXEDA, FLAG_FLUSHBEFOREONCHANGE},
{GE_CMD_BLENDFIXEDB, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, &GLES_GPU::Execute_BlendFixB}, {GE_CMD_BLENDFIXEDB, FLAG_FLUSHBEFOREONCHANGE},
{GE_CMD_MASKRGB, FLAG_FLUSHBEFOREONCHANGE}, {GE_CMD_MASKRGB, FLAG_FLUSHBEFOREONCHANGE},
{GE_CMD_MASKALPHA, FLAG_FLUSHBEFOREONCHANGE}, {GE_CMD_MASKALPHA, FLAG_FLUSHBEFOREONCHANGE},
{GE_CMD_ZTEST, FLAG_FLUSHBEFOREONCHANGE}, {GE_CMD_ZTEST, FLAG_FLUSHBEFOREONCHANGE},
@ -1096,14 +1096,6 @@ void GLES_GPU::Execute_ColorRef(u32 op, u32 diff) {
shaderManager_->DirtyUniform(DIRTY_ALPHACOLORREF); shaderManager_->DirtyUniform(DIRTY_ALPHACOLORREF);
} }
void GLES_GPU::Execute_BlendFixA(u32 op, u32 diff) {
shaderManager_->DirtyUniform(DIRTY_BLENDFIX);
}
void GLES_GPU::Execute_BlendFixB(u32 op, u32 diff) {
shaderManager_->DirtyUniform(DIRTY_BLENDFIX);
}
void GLES_GPU::Execute_WorldMtxNum(u32 op, u32 diff) { void GLES_GPU::Execute_WorldMtxNum(u32 op, u32 diff) {
// This is almost always followed by GE_CMD_WORLDMATRIXDATA. // This is almost always followed by GE_CMD_WORLDMATRIXDATA.
const u32_le *src = (const u32_le *)Memory::GetPointer(currentList->pc + 4); const u32_le *src = (const u32_le *)Memory::GetPointer(currentList->pc + 4);
@ -1634,11 +1626,7 @@ void GLES_GPU::ExecuteOpInternal(u32 op, u32 diff) {
break; break;
case GE_CMD_BLENDFIXEDA: case GE_CMD_BLENDFIXEDA:
Execute_BlendFixA(op, diff);
break;
case GE_CMD_BLENDFIXEDB: case GE_CMD_BLENDFIXEDB:
Execute_BlendFixB(op, diff);
break; break;
case GE_CMD_ALPHATESTENABLE: case GE_CMD_ALPHATESTENABLE:

View File

@ -128,8 +128,6 @@ public:
void Execute_AlphaTest(u32 op, u32 diff); void Execute_AlphaTest(u32 op, u32 diff);
void Execute_StencilTest(u32 op, u32 diff); void Execute_StencilTest(u32 op, u32 diff);
void Execute_ColorRef(u32 op, u32 diff); void Execute_ColorRef(u32 op, u32 diff);
void Execute_BlendFixA(u32 op, u32 diff);
void Execute_BlendFixB(u32 op, u32 diff);
void Execute_WorldMtxNum(u32 op, u32 diff); void Execute_WorldMtxNum(u32 op, u32 diff);
void Execute_WorldMtxData(u32 op, u32 diff); void Execute_WorldMtxData(u32 op, u32 diff);
void Execute_ViewMtxNum(u32 op, u32 diff); void Execute_ViewMtxNum(u32 op, u32 diff);

View File

@ -151,6 +151,7 @@ LinkedShader::LinkedShader(Shader *vs, Shader *fs, u32 vertType, bool useHWTrans
u_fbotex = glGetUniformLocation(program, "fbotex"); u_fbotex = glGetUniformLocation(program, "fbotex");
u_blendFixA = glGetUniformLocation(program, "u_blendFixA"); u_blendFixA = glGetUniformLocation(program, "u_blendFixA");
u_blendFixB = glGetUniformLocation(program, "u_blendFixB"); u_blendFixB = glGetUniformLocation(program, "u_blendFixB");
u_fbotexSize = glGetUniformLocation(program, "u_fbotexSize");
// Transform // Transform
u_view = glGetUniformLocation(program, "u_view"); u_view = glGetUniformLocation(program, "u_view");
@ -225,7 +226,7 @@ LinkedShader::LinkedShader(Shader *vs, Shader *fs, u32 vertType, bool useHWTrans
if (u_view != -1) availableUniforms |= DIRTY_VIEWMATRIX; if (u_view != -1) availableUniforms |= DIRTY_VIEWMATRIX;
if (u_texmtx != -1) availableUniforms |= DIRTY_TEXMATRIX; if (u_texmtx != -1) availableUniforms |= DIRTY_TEXMATRIX;
if (u_stencilReplaceValue != -1) availableUniforms |= DIRTY_STENCILREPLACEVALUE; if (u_stencilReplaceValue != -1) availableUniforms |= DIRTY_STENCILREPLACEVALUE;
if (u_blendFixA != -1 || u_blendFixB != -1) availableUniforms |= DIRTY_BLENDFIX; if (u_blendFixA != -1 || u_blendFixB != -1 || u_fbotexSize != -1) availableUniforms |= DIRTY_SHADERBLEND;
// Looping up to numBones lets us avoid checking u_bone[i] // Looping up to numBones lets us avoid checking u_bone[i]
for (int i = 0; i < numBones; i++) { for (int i = 0; i < numBones; i++) {
@ -551,9 +552,17 @@ void LinkedShader::UpdateUniforms(u32 vertType) {
} }
#endif #endif
if (dirty & DIRTY_BLENDFIX) { if (dirty & DIRTY_SHADERBLEND) {
SetColorUniform3(u_blendFixA, gstate.getFixA()); SetColorUniform3(u_blendFixA, gstate.getFixA());
SetColorUniform3(u_blendFixB, gstate.getFixB()); SetColorUniform3(u_blendFixB, gstate.getFixB());
const float fbotexSize[2] = {
1.0f / (float)gstate_c.curRTRenderWidth,
1.0f / (float)gstate_c.curRTRenderHeight,
};
if (u_fbotexSize != -1) {
glUniform2fv(u_fbotexSize, 1, fbotexSize);
}
} }
// Lighting // Lighting

View File

@ -77,6 +77,7 @@ public:
int u_fbotex; int u_fbotex;
int u_blendFixA; int u_blendFixA;
int u_blendFixB; int u_blendFixB;
int u_fbotexSize;
// Fragment processing inputs // Fragment processing inputs
int u_alphacolorref; int u_alphacolorref;
@ -130,7 +131,7 @@ enum
DIRTY_AMBIENT = (1 << 15), DIRTY_AMBIENT = (1 << 15),
DIRTY_MATAMBIENTALPHA = (1 << 16), DIRTY_MATAMBIENTALPHA = (1 << 16),
DIRTY_BLENDFIX = (1 << 17), // (either one.) DIRTY_SHADERBLEND = (1 << 17), // Used only for in-shader blending.
DIRTY_UVSCALEOFFSET = (1 << 18), // this will be dirtied ALL THE TIME... maybe we'll need to do "last value with this shader compares" DIRTY_UVSCALEOFFSET = (1 << 18), // this will be dirtied ALL THE TIME... maybe we'll need to do "last value with this shader compares"
DIRTY_TEXCLAMP = (1 << 19), DIRTY_TEXCLAMP = (1 << 19),

View File

@ -195,8 +195,13 @@ void TransformDrawEngine::ApplyDrawState(int prim) {
glActiveTexture(GL_TEXTURE1); glActiveTexture(GL_TEXTURE1);
framebufferManager_->BindFramebufferColor(NULL); framebufferManager_->BindFramebufferColor(NULL);
// If we are rendering at a higher resolution, linear is probably best for the dest color.
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glActiveTexture(GL_TEXTURE0); glActiveTexture(GL_TEXTURE0);
fboTexBound_ = true; fboTexBound_ = true;
shaderManager_->DirtyUniform(DIRTY_SHADERBLEND);
} }
// None of the below logic is interesting, we're gonna do it entirely in the shader. // None of the below logic is interesting, we're gonna do it entirely in the shader.
wantBlend = false; wantBlend = false;
@ -204,6 +209,7 @@ void TransformDrawEngine::ApplyDrawState(int prim) {
glActiveTexture(GL_TEXTURE1); glActiveTexture(GL_TEXTURE1);
glBindTexture(GL_TEXTURE_2D, 0); glBindTexture(GL_TEXTURE_2D, 0);
glActiveTexture(GL_TEXTURE0); glActiveTexture(GL_TEXTURE0);
fboTexBound_ = false;
} }
glstate.blend.set(wantBlend); glstate.blend.set(wantBlend);

View File

@ -969,8 +969,8 @@ void TextureCache::SetTextureFramebuffer(TexCacheEntry *entry, VirtualFramebuffe
glActiveTexture(GL_TEXTURE0); glActiveTexture(GL_TEXTURE0);
framebufferManager_->BindFramebufferColor(framebuffer, true); framebufferManager_->BindFramebufferColor(framebuffer, true);
glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
glTexParameterf(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
entry->status |= TexCacheEntry::STATUS_TEXPARAM_DIRTY; entry->status |= TexCacheEntry::STATUS_TEXPARAM_DIRTY;
glDisable(GL_BLEND); glDisable(GL_BLEND);

View File

@ -335,4 +335,6 @@ void GPUStateCache::DoState(PointerWrap &p) {
p.Do(curRTWidth); p.Do(curRTWidth);
p.Do(curRTHeight); p.Do(curRTHeight);
// curRTBufferWidth and curRTBufferHeight don't need to be saved.
} }

View File

@ -475,6 +475,9 @@ struct GPUStateCache
u32 curRTWidth; u32 curRTWidth;
u32 curRTHeight; u32 curRTHeight;
u32 curRTRenderWidth;
u32 curRTRenderHeight;
u32 getRelativeAddress(u32 data) const; u32 getRelativeAddress(u32 data) const;
void DoState(PointerWrap &p); void DoState(PointerWrap &p);
}; };