diff --git a/GPU/Directx9/FramebufferDX9.cpp b/GPU/Directx9/FramebufferDX9.cpp index 96e8708dc9..d0ccd0c3cb 100644 --- a/GPU/Directx9/FramebufferDX9.cpp +++ b/GPU/Directx9/FramebufferDX9.cpp @@ -23,6 +23,7 @@ #include "Core/Reporting.h" #include "GPU/ge_constants.h" #include "GPU/GPUState.h" +#include "GPU/Debugger/Stepping.h" #include "helper/dx_state.h" #include "helper/fbo.h" @@ -642,6 +643,40 @@ namespace DX9 { return offscreen; } + void FramebufferManagerDX9::BindFramebufferColor(int stage, VirtualFramebuffer *framebuffer, bool skipCopy) { + if (framebuffer == NULL) { + framebuffer = currentRenderVfb_; + } + + if (!framebuffer->fbo || !useBufferedRendering_) { + pD3Ddevice->SetTexture(stage, nullptr); + gstate_c.skipDrawReason |= SKIPDRAW_BAD_FB_TEXTURE; + return; + } + + // currentRenderVfb_ will always be set when this is called, except from the GE debugger. + // Let's just not bother with the copy in that case. + if (GPUStepping::IsStepping() || g_Config.bDisableSlowFramebufEffects) { + skipCopy = true; + } + if (!skipCopy && currentRenderVfb_ && framebuffer->fb_address == gstate.getFrameBufRawAddress()) { + // TODO: Maybe merge with bvfbs_? Not sure if those could be packing, and they're created at a different size. + FBO *renderCopy = GetTempFBO(framebuffer->renderWidth, framebuffer->renderHeight, (FBOColorDepth)framebuffer->colorDepth); + if (renderCopy) { + VirtualFramebuffer copyInfo = *framebuffer; + copyInfo.fbo = renderCopy; + BlitFramebuffer(©Info, 0, 0, framebuffer, 0, 0, framebuffer->drawnWidth, framebuffer->drawnHeight, 0, false); + + RebindFramebuffer(); + pD3Ddevice->SetTexture(stage, fbo_get_color_texture(renderCopy)); + } else { + pD3Ddevice->SetTexture(stage, fbo_get_color_texture(framebuffer->fbo)); + } + } else { + pD3Ddevice->SetTexture(stage, fbo_get_color_texture(framebuffer->fbo)); + } + } + void FramebufferManagerDX9::CopyDisplayToOutput() { fbo_unbind(); diff --git a/GPU/Directx9/FramebufferDX9.h b/GPU/Directx9/FramebufferDX9.h index 69be6d3211..081e5bbb1e 100644 --- a/GPU/Directx9/FramebufferDX9.h +++ b/GPU/Directx9/FramebufferDX9.h @@ -73,6 +73,8 @@ public: void BlitFramebufferDepth(VirtualFramebuffer *src, VirtualFramebuffer *dst); + void BindFramebufferColor(int stage, VirtualFramebuffer *framebuffer, bool skipCopy); + virtual void ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h) override; std::vector GetFramebufferList(); diff --git a/GPU/Directx9/PixelShaderGeneratorDX9.cpp b/GPU/Directx9/PixelShaderGeneratorDX9.cpp index 4cdbb4abc8..9b25ea5d82 100644 --- a/GPU/Directx9/PixelShaderGeneratorDX9.cpp +++ b/GPU/Directx9/PixelShaderGeneratorDX9.cpp @@ -19,6 +19,7 @@ #include "Core/Reporting.h" #include "Core/Config.h" +#include "GPU/Directx9/helper/global.h" #include "GPU/Directx9/PixelShaderGeneratorDX9.h" #include "GPU/ge_constants.h" #include "GPU/GPUState.h" @@ -106,6 +107,116 @@ bool IsAlphaTestAgainstZero() { return gstate.getAlphaTestRef() == 0 && gstate.getAlphaTestMask() == 0xFF; } +const bool nonAlphaSrcFactors[16] = { + true, // GE_SRCBLEND_DSTCOLOR, + true, // GE_SRCBLEND_INVDSTCOLOR, + false, // GE_SRCBLEND_SRCALPHA, + false, // GE_SRCBLEND_INVSRCALPHA, + true, // GE_SRCBLEND_DSTALPHA, + true, // GE_SRCBLEND_INVDSTALPHA, + false, // GE_SRCBLEND_DOUBLESRCALPHA, + false, // GE_SRCBLEND_DOUBLEINVSRCALPHA, + true, // GE_SRCBLEND_DOUBLEDSTALPHA, + true, // GE_SRCBLEND_DOUBLEINVDSTALPHA, + true, // GE_SRCBLEND_FIXA, +}; + +const bool nonAlphaDestFactors[16] = { + true, // GE_DSTBLEND_SRCCOLOR, + true, // GE_DSTBLEND_INVSRCCOLOR, + false, // GE_DSTBLEND_SRCALPHA, + false, // GE_DSTBLEND_INVSRCALPHA, + true, // GE_DSTBLEND_DSTALPHA, + true, // GE_DSTBLEND_INVDSTALPHA, + false, // GE_DSTBLEND_DOUBLESRCALPHA, + false, // GE_DSTBLEND_DOUBLEINVSRCALPHA, + true, // GE_DSTBLEND_DOUBLEDSTALPHA, + true, // GE_DSTBLEND_DOUBLEINVDSTALPHA, + true, // GE_DSTBLEND_FIXB, +}; + +ReplaceAlphaType ReplaceAlphaWithStencil(ReplaceBlendType replaceBlend) { + if (!gstate.isStencilTestEnabled() || gstate.isModeClear()) { + return REPLACE_ALPHA_NO; + } + + if (replaceBlend != REPLACE_BLEND_NO && replaceBlend != REPLACE_BLEND_COPY_FBO) { + if (nonAlphaSrcFactors[gstate.getBlendFuncA()] && nonAlphaDestFactors[gstate.getBlendFuncB()]) { + return REPLACE_ALPHA_YES; + } else { + // TODO +#if 0 + if (pD3DdeviceEx) { + return REPLACE_ALPHA_DUALSOURCE; + } else { +#else + { +#endif + return REPLACE_ALPHA_NO; + } + } + } + + return REPLACE_ALPHA_YES; +} + +StencilValueType ReplaceAlphaWithStencilType() { + switch (gstate.FrameBufFormat()) { + case GE_FORMAT_565: + // There's never a stencil value. Maybe the right alpha is 1? + return STENCIL_VALUE_ONE; + + case GE_FORMAT_5551: + switch (gstate.getStencilOpZPass()) { + // Technically, this should only ever use zero/one. + case GE_STENCILOP_REPLACE: + return (gstate.getStencilTestRef() & 0x80) != 0 ? STENCIL_VALUE_ONE : STENCIL_VALUE_ZERO; + + // Decrementing always zeros, since there's only one bit. + case GE_STENCILOP_DECR: + case GE_STENCILOP_ZERO: + return STENCIL_VALUE_ZERO; + + // Incrementing always fills, since there's only one bit. + case GE_STENCILOP_INCR: + return STENCIL_VALUE_ONE; + + case GE_STENCILOP_INVERT: + return STENCIL_VALUE_INVERT; + + case GE_STENCILOP_KEEP: + return STENCIL_VALUE_KEEP; + } + break; + + case GE_FORMAT_4444: + case GE_FORMAT_8888: + case GE_FORMAT_INVALID: + switch (gstate.getStencilOpZPass()) { + case GE_STENCILOP_REPLACE: + return STENCIL_VALUE_UNIFORM; + + case GE_STENCILOP_ZERO: + return STENCIL_VALUE_ZERO; + + case GE_STENCILOP_DECR: + return gstate.FrameBufFormat() == GE_FORMAT_4444 ? STENCIL_VALUE_DECR_4 : STENCIL_VALUE_DECR_8; + + case GE_STENCILOP_INCR: + return gstate.FrameBufFormat() == GE_FORMAT_4444 ? STENCIL_VALUE_INCR_4 : STENCIL_VALUE_INCR_8; + + case GE_STENCILOP_INVERT: + return STENCIL_VALUE_INVERT; + + case GE_STENCILOP_KEEP: + return STENCIL_VALUE_KEEP; + } + break; + } + + return STENCIL_VALUE_KEEP; +} + bool IsColorTestTriviallyTrue() { switch (gstate.getColorTestFunction()) { case GE_COMP_NEVER: @@ -122,6 +233,122 @@ bool IsColorTestTriviallyTrue() { } } +ReplaceBlendType ReplaceBlendWithShader() { + if (!gstate.isAlphaBlendEnabled() || gstate.isModeClear()) { + return REPLACE_BLEND_NO; + } + + GEBlendSrcFactor funcA = gstate.getBlendFuncA(); + GEBlendDstFactor funcB = gstate.getBlendFuncB(); + GEBlendMode eq = gstate.getBlendEq(); + + // Let's get the non-factor modes out of the way first. + switch (eq) { + case GE_BLENDMODE_ABSDIFF: + return !gstate_c.allowShaderBlend ? REPLACE_BLEND_STANDARD : REPLACE_BLEND_COPY_FBO; + + case GE_BLENDMODE_MIN: + case GE_BLENDMODE_MAX: + return REPLACE_BLEND_STANDARD; + + default: + break; + } + + switch (funcA) { + case GE_SRCBLEND_DOUBLESRCALPHA: + case GE_SRCBLEND_DOUBLEINVSRCALPHA: + // 2x alpha in the source function and not in the dest = source color doubling. + // Even dest alpha is safe, since we're moving the * 2.0 into the src color. + switch (funcB) { + case GE_DSTBLEND_SRCCOLOR: + case GE_DSTBLEND_INVSRCCOLOR: + // Can't double, we need the source color to be correct. + return !gstate_c.allowShaderBlend ? REPLACE_BLEND_2X_ALPHA : REPLACE_BLEND_COPY_FBO; + + case GE_DSTBLEND_DOUBLEDSTALPHA: + case GE_DSTBLEND_DOUBLEINVDSTALPHA: + return !gstate_c.allowShaderBlend ? REPLACE_BLEND_2X_ALPHA : REPLACE_BLEND_COPY_FBO; + + case GE_DSTBLEND_DOUBLESRCALPHA: + case GE_DSTBLEND_DOUBLEINVSRCALPHA: + // We can't technically do this correctly (due to clamping) without reading the dst color. + // Using a copy isn't accurate either, though, when there's overlap. + return REPLACE_BLEND_PRE_SRC_2X_ALPHA; + + default: + // TODO: Could use vertexFullAlpha, but it's not calculated yet. + return REPLACE_BLEND_PRE_SRC; + } + + case GE_SRCBLEND_DOUBLEDSTALPHA: + case GE_SRCBLEND_DOUBLEINVDSTALPHA: + switch (funcB) { + case GE_DSTBLEND_SRCCOLOR: + case GE_DSTBLEND_INVSRCCOLOR: + // Can't double, we need the source color to be correct. + return !gstate_c.allowShaderBlend ? REPLACE_BLEND_STANDARD : REPLACE_BLEND_COPY_FBO; + + case GE_DSTBLEND_DOUBLEDSTALPHA: + case GE_DSTBLEND_DOUBLEINVDSTALPHA: + case GE_DSTBLEND_DOUBLESRCALPHA: + case GE_DSTBLEND_DOUBLEINVSRCALPHA: + return !gstate_c.allowShaderBlend ? REPLACE_BLEND_2X_SRC : REPLACE_BLEND_COPY_FBO; + + default: + // We can't technically do this correctly (due to clamping) without reading the dst alpha. + return !gstate_c.allowShaderBlend ? REPLACE_BLEND_2X_SRC : REPLACE_BLEND_COPY_FBO; + } + + case GE_SRCBLEND_FIXA: + switch (funcB) { + case GE_DSTBLEND_DOUBLESRCALPHA: + case GE_DSTBLEND_DOUBLEINVSRCALPHA: + // Can't safely double alpha, will clamp. + return !gstate_c.allowShaderBlend ? REPLACE_BLEND_2X_ALPHA : REPLACE_BLEND_COPY_FBO; + + case GE_DSTBLEND_DOUBLEDSTALPHA: + case GE_DSTBLEND_DOUBLEINVDSTALPHA: + return !gstate_c.allowShaderBlend ? REPLACE_BLEND_STANDARD : REPLACE_BLEND_COPY_FBO; + + case GE_DSTBLEND_FIXB: + if (gstate.getFixA() == 0xFFFFFF && gstate.getFixB() == 0x000000) { + // Some games specify this. Some cards may prefer blending off entirely. + return REPLACE_BLEND_NO; + } else if (gstate.getFixA() == 0xFFFFFF || gstate.getFixA() == 0x000000 || gstate.getFixB() == 0xFFFFFF || gstate.getFixB() == 0x000000) { + return REPLACE_BLEND_STANDARD; + } else { + return REPLACE_BLEND_PRE_SRC; + } + + default: + return REPLACE_BLEND_STANDARD; + } + + default: + switch (funcB) { + case GE_DSTBLEND_DOUBLESRCALPHA: + case GE_DSTBLEND_DOUBLEINVSRCALPHA: + if (funcA == GE_SRCBLEND_SRCALPHA || funcA == GE_SRCBLEND_INVSRCALPHA) { + // Can't safely double alpha, will clamp. However, a copy may easily be worse due to overlap. + return REPLACE_BLEND_PRE_SRC_2X_ALPHA; + } else { + // This means dst alpha/color is used in the src factor. + // Unfortunately, copying here causes overlap problems in Silent Hill games (it seems?) + // We will just hope that doubling alpha for the dst factor will not clamp too badly. + return REPLACE_BLEND_2X_ALPHA; + } + + case GE_DSTBLEND_DOUBLEDSTALPHA: + case GE_DSTBLEND_DOUBLEINVDSTALPHA: + return !gstate_c.allowShaderBlend ? REPLACE_BLEND_STANDARD : REPLACE_BLEND_COPY_FBO; + + default: + return REPLACE_BLEND_STANDARD; + } + } +} + static bool CanDoubleSrcBlendMode() { if (!gstate.isAlphaBlendEnabled()) { return false; @@ -164,10 +391,10 @@ void ComputeFragmentShaderIDDX9(FragmentShaderIDDX9 *id) { bool alphaTestAgainstZero = IsAlphaTestAgainstZero(); bool enableColorTest = gstate.isColorTestEnabled() && !IsColorTestTriviallyTrue(); bool enableColorDoubling = gstate.isColorDoublingEnabled(); - // This isn't really correct, but it's a hack to get doubled blend modes to work more correctly. - bool enableAlphaDoubling = CanDoubleSrcBlendMode(); bool doTextureProjection = gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_MATRIX; bool doTextureAlpha = gstate.isTextureAlphaUsed(); + ReplaceBlendType replaceBlend = ReplaceBlendWithShader(); + ReplaceAlphaType stencilToAlpha = ReplaceAlphaWithStencil(replaceBlend); // All texfuncs except replace are the same for RGB as for RGBA with full alpha. if (gstate_c.textureFullAlpha && gstate.getTextureFunction() != GE_TEXFUNC_REPLACE) @@ -206,14 +433,31 @@ void ComputeFragmentShaderIDDX9(FragmentShaderIDDX9 *id) { id0 |= (enableFog & 1) << 19; id0 |= (doTextureProjection & 1) << 20; id0 |= (enableColorDoubling & 1) << 21; - id0 |= (enableAlphaDoubling & 1) << 22; - id0 |= (gstate_c.bgraTexture & 1) << 23; + // 2 bits + id0 |= (stencilToAlpha) << 22; + + if (stencilToAlpha != REPLACE_ALPHA_NO) { + // 4 bits + id0 |= ReplaceAlphaWithStencilType() << 24; + } id0 |= (alphaTestAgainstZero & 1) << 28; if (enableAlphaTest) gpuStats.numAlphaTestedDraws++; else gpuStats.numNonAlphaTestedDraws++; + + id0 |= (gstate_c.bgraTexture & 1) << 29; + // 30 and 31 are free. + + // 3 bits. + id1 |= replaceBlend << 0; + if (replaceBlend > REPLACE_BLEND_STANDARD) { + // 11 bits total. + id1 |= gstate.getBlendEq() << 3; + id1 |= gstate.getBlendFuncA() << 6; + id1 |= gstate.getBlendFuncB() << 10; + } } id->d[0] = id0; @@ -231,23 +475,44 @@ void GenerateFragmentShaderDX9(char *buffer) { bool enableAlphaTest = gstate.isAlphaTestEnabled() && !IsAlphaTestTriviallyTrue() && !gstate.isModeClear() && !g_Config.bDisableAlphaTest; bool alphaTestAgainstZero = IsAlphaTestAgainstZero(); bool enableColorTest = gstate.isColorTestEnabled() && !IsColorTestTriviallyTrue() && !gstate.isModeClear(); - bool enableColorDoubling = gstate.isColorDoublingEnabled(); - // This isn't really correct, but it's a hack to get doubled blend modes to work more correctly. - bool enableAlphaDoubling = CanDoubleSrcBlendMode(); + bool enableColorDoubling = gstate.isColorDoublingEnabled() && gstate.isTextureMapEnabled(); bool doTextureProjection = gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_MATRIX; bool doTextureAlpha = gstate.isTextureAlphaUsed(); + bool textureAtOffset = gstate_c.curTextureXOffset != 0 || gstate_c.curTextureYOffset != 0; + ReplaceBlendType replaceBlend = ReplaceBlendWithShader(); + ReplaceAlphaType stencilToAlpha = ReplaceAlphaWithStencil(replaceBlend); if (gstate_c.textureFullAlpha && gstate.getTextureFunction() != GE_TEXFUNC_REPLACE) doTextureAlpha = false; if (doTexture) - WRITE(p, "sampler tex: register(s0);\n"); + WRITE(p, "sampler tex : register(s0);\n"); + if (!gstate.isModeClear() && replaceBlend > REPLACE_BLEND_STANDARD) { + if (replaceBlend == REPLACE_BLEND_COPY_FBO) { + WRITE(p, "float2 u_fbotexSize : register(c%i);\n", CONST_PS_FBOTEXSIZE); + WRITE(p, "sampler fbotex : register(s1);\n"); + } + if (gstate.getBlendFuncA() == GE_SRCBLEND_FIXA) { + WRITE(p, "float3 u_blendFixA : register(c%i);\n", CONST_PS_BLENDFIXA); + } + if (gstate.getBlendFuncB() == GE_DSTBLEND_FIXB) { + WRITE(p, "float3 u_blendFixB : register(c%i);\n", CONST_PS_BLENDFIXB); + } + } + if (gstate_c.needShaderTexClamp && doTexture) { + WRITE(p, "float4 u_texclamp : register(c%i);\n", CONST_PS_TEXCLAMP); + if (textureAtOffset) { + WRITE(p, "float2 u_texclampoff : register(c%i);\n", CONST_PS_TEXCLAMPOFF); + } + } if (enableAlphaTest || enableColorTest) { WRITE(p, "float4 u_alphacolorref : register(c%i);\n", CONST_PS_ALPHACOLORREF); WRITE(p, "float4 u_alphacolormask : register(c%i);\n", CONST_PS_ALPHACOLORMASK); } - + if (stencilToAlpha && ReplaceAlphaWithStencilType() == STENCIL_VALUE_UNIFORM) { + WRITE(p, "float u_stencilReplaceValue : register(c%i);\n", CONST_PS_STENCILREPLACE); + } if (gstate.isTextureMapEnabled() && gstate.getTextureFunction() == GE_TEXFUNC_BLEND) { WRITE(p, "float3 u_texenv : register(c%i);\n", CONST_PS_TEXENV); } @@ -282,7 +547,7 @@ void GenerateFragmentShaderDX9(char *buffer) { if (gstate.isModeClear()) { // Clear mode does not allow any fancy shading. - WRITE(p, " return In.v_color0;\n"); + WRITE(p, " float4 v = In.v_color0;\n"); } else { const char *secondary = ""; // Secondary color for specular on top of texture @@ -295,12 +560,51 @@ void GenerateFragmentShaderDX9(char *buffer) { if (gstate.isTextureMapEnabled()) { const char *texcoord = "In.v_texcoord"; - if (doTextureProjection && gstate_c.flipTexture) { + // TODO: Not sure the right way to do this for projection. + if (gstate_c.needShaderTexClamp) { + // We may be clamping inside a larger surface (tex = 64x64, buffer=480x272). + // We may also be wrapping in such a surface, or either one in a too-small surface. + // Obviously, clamping to a smaller surface won't work. But better to clamp to something. + std::string ucoord = "In.v_texcoord.x"; + std::string vcoord = "In.v_texcoord.y"; + if (doTextureProjection) { + ucoord += " / In.v_texcoord.z"; + vcoord = "(In.v_texcoord.y / In.v_texcoord.z)"; + // Vertex texcoords are NOT flipped when projecting despite gstate_c.flipTexture. + } else if (gstate_c.flipTexture) { + vcoord = "1.0 - " + vcoord; + } + + if (gstate.isTexCoordClampedS()) { + ucoord = "clamp(" + ucoord + ", u_texclamp.z, u_texclamp.x - u_texclamp.z)"; + } else { + ucoord = "fmod(" + ucoord + ", u_texclamp.x)"; + } + if (gstate.isTexCoordClampedT()) { + vcoord = "clamp(" + vcoord + ", u_texclamp.w, u_texclamp.y - u_texclamp.w)"; + } else { + vcoord = "fmod(" + vcoord + ", u_texclamp.y)"; + } + if (textureAtOffset) { + ucoord = "(" + ucoord + " + u_texclampoff.x)"; + vcoord = "(" + vcoord + " + u_texclampoff.y)"; + } + + if (gstate_c.flipTexture) { + vcoord = "1.0 - " + vcoord; + } + + WRITE(p, " float2 fixedcoord = float2(%s, %s);\n", ucoord.c_str(), vcoord.c_str()); + texcoord = "fixedcoord"; + // We already projected it. + doTextureProjection = false; + } else if (doTextureProjection && gstate_c.flipTexture) { // Since we need to flip v, we project manually. WRITE(p, " float2 fixedcoord = float2(v_texcoord.x / v_texcoord.z, 1.0 - (v_texcoord.y / v_texcoord.z));\n"); texcoord = "fixedcoord"; doTextureProjection = false; } + if (doTextureProjection) { WRITE(p, " float4 t = tex2Dproj(tex, float4(In.v_texcoord.x, In.v_texcoord.y, 0, In.v_texcoord.z))%s;\n", gstate_c.bgraTexture ? ".bgra" : ""); } else { @@ -319,6 +623,9 @@ void GenerateFragmentShaderDX9(char *buffer) { case GE_TEXFUNC_REPLACE: WRITE(p, " float4 v = t%s;\n", secondary); break; case GE_TEXFUNC_ADD: + case GE_TEXFUNC_UNKNOWN1: + case GE_TEXFUNC_UNKNOWN2: + case GE_TEXFUNC_UNKNOWN3: WRITE(p, " float4 v = float4(p.rgb + t.rgb, p.a * t.a)%s;\n", secondary); break; default: WRITE(p, " float4 v = p;\n"); break; @@ -335,6 +642,9 @@ void GenerateFragmentShaderDX9(char *buffer) { case GE_TEXFUNC_REPLACE: WRITE(p, " float4 v = float4(t.rgb, p.a)%s;\n", secondary); break; case GE_TEXFUNC_ADD: + case GE_TEXFUNC_UNKNOWN1: + case GE_TEXFUNC_UNKNOWN2: + case GE_TEXFUNC_UNKNOWN3: WRITE(p, " float4 v = float4(p.rgb + t.rgb, p.a)%s;\n", secondary); break; default: WRITE(p, " float4 v = p;\n"); break; @@ -374,33 +684,113 @@ void GenerateFragmentShaderDX9(char *buffer) { } } #endif - // TODO: Before or after the color test? - if (enableColorDoubling && enableAlphaDoubling) { - WRITE(p, " v = v * 2.0;\n"); - } else if (enableColorDoubling) { - WRITE(p, " v.rgb = v.rgb * 2.0;\n"); - } else if (enableAlphaDoubling) { - WRITE(p, " v.a = v.a * 2.0;\n"); - } - if (enableColorTest) { GEComparison colorTestFunc = gstate.getColorTestFunction(); const char *colorTestFuncs[] = { "#", "#", " != ", " == " }; // never/always don't make sense u32 colorTestMask = gstate.getColorTestMask(); if (colorTestFuncs[colorTestFunc][0] != '#') { const char * test = colorTestFuncs[colorTestFunc]; - WRITE(p, "float3 colortest = roundAndScaleTo255v(v.rgb);\n"); - WRITE(p, "if ((colortest.r %s u_alphacolorref.r) && (colortest.g %s u_alphacolorref.g) && (colortest.b %s u_alphacolorref.b )) clip(-1);\n", test, test, test); + WRITE(p, " float3 colortest = roundAndScaleTo255v(v.rgb);\n"); + WRITE(p, " if ((colortest.r %s u_alphacolorref.r) && (colortest.g %s u_alphacolorref.g) && (colortest.b %s u_alphacolorref.b )) clip(-1);\n", test, test, test); + } else { + WRITE(p, " clip(-1);\n"); } } + // Color doubling happens after the color test. + if (enableColorDoubling && replaceBlend == REPLACE_BLEND_2X_SRC) { + WRITE(p, " v.rgb = v.rgb * 4.0;\n"); + } else if (enableColorDoubling || replaceBlend == REPLACE_BLEND_2X_SRC) { + WRITE(p, " v.rgb = v.rgb * 2.0;\n"); + } + if (enableFog) { WRITE(p, " float fogCoef = clamp(In.v_fogdepth.x, 0.0, 1.0);\n"); - WRITE(p, " return lerp(float4(u_fogcolor, v.a), v, fogCoef);\n"); - } else { - WRITE(p, " return v;\n"); + WRITE(p, " v = lerp(float4(u_fogcolor, v.a), v, fogCoef);\n"); + } + + if (replaceBlend == REPLACE_BLEND_PRE_SRC || replaceBlend == REPLACE_BLEND_PRE_SRC_2X_ALPHA) { + GEBlendSrcFactor funcA = gstate.getBlendFuncA(); + const char *srcFactor = "ERROR"; + switch (funcA) { + case GE_SRCBLEND_DSTCOLOR: srcFactor = "ERROR"; break; + case GE_SRCBLEND_INVDSTCOLOR: srcFactor = "ERROR"; break; + case GE_SRCBLEND_SRCALPHA: srcFactor = "float3(v.a, v.a, v.a)"; break; + case GE_SRCBLEND_INVSRCALPHA: srcFactor = "float3(1.0 - v.a, 1.0 - v.a, 1.0 - v.a)"; break; + case GE_SRCBLEND_DSTALPHA: srcFactor = "ERROR"; break; + case GE_SRCBLEND_INVDSTALPHA: srcFactor = "ERROR"; break; + case GE_SRCBLEND_DOUBLESRCALPHA: srcFactor = "float3(v.a * 2.0, v.a * 2.0, v.a * 2.0)"; break; + // TODO: Double inverse, or inverse double? Following softgpu for now... + case GE_SRCBLEND_DOUBLEINVSRCALPHA: srcFactor = "float3(1.0 - v.a * 2.0, 1.0 - v.a * 2.0, 1.0 - v.a * 2.0)"; break; + case GE_SRCBLEND_DOUBLEDSTALPHA: srcFactor = "ERROR"; break; + case GE_SRCBLEND_DOUBLEINVDSTALPHA: srcFactor = "ERROR"; break; + case GE_SRCBLEND_FIXA: srcFactor = "u_blendFixA"; break; + } + + WRITE(p, " v.rgb = v.rgb * %s;\n", srcFactor); + } + + // Can't really do REPLACE_BLEND_COPY_FBO in ps_2_0... + + if (replaceBlend == REPLACE_BLEND_2X_ALPHA || replaceBlend == REPLACE_BLEND_PRE_SRC_2X_ALPHA) { + WRITE(p, " v.a = v.a * 2.0;\n"); } } + + std::string replacedAlpha = "0.0"; + char replacedAlphaTemp[64] = ""; + if (stencilToAlpha != REPLACE_ALPHA_NO) { + switch (ReplaceAlphaWithStencilType()) { + case STENCIL_VALUE_UNIFORM: + replacedAlpha = "u_stencilReplaceValue"; + break; + + case STENCIL_VALUE_ZERO: + replacedAlpha = "0.0"; + break; + + case STENCIL_VALUE_ONE: + case STENCIL_VALUE_INVERT: + // In invert, we subtract by one, but we want to output one here. + replacedAlpha = "1.0"; + break; + + case STENCIL_VALUE_INCR_4: + case STENCIL_VALUE_DECR_4: + // We're adding/subtracting, just by the smallest value in 4-bit. + snprintf(replacedAlphaTemp, sizeof(replacedAlphaTemp), "%f", 1.0 / 15.0); + replacedAlpha = replacedAlphaTemp; + break; + + case STENCIL_VALUE_INCR_8: + case STENCIL_VALUE_DECR_8: + // We're adding/subtracting, just by the smallest value in 8-bit. + snprintf(replacedAlphaTemp, sizeof(replacedAlphaTemp), "%f", 1.0 / 255.0); + replacedAlpha = replacedAlphaTemp; + break; + + case STENCIL_VALUE_KEEP: + // Do nothing. We'll mask out the alpha using color mask. + break; + } + } + + switch (stencilToAlpha) { + case REPLACE_ALPHA_DUALSOURCE: + WRITE(p, " v.a = %s;\n", replacedAlpha.c_str()); + // TODO: Output the second color as well using original v.a. + break; + + case REPLACE_ALPHA_YES: + WRITE(p, " v.a = %s;\n", replacedAlpha.c_str()); + break; + + case REPLACE_ALPHA_NO: + // Do nothing, v is already fine. + break; + } + + WRITE(p, " return v;\n"); WRITE(p, "}\n"); } diff --git a/GPU/Directx9/PixelShaderGeneratorDX9.h b/GPU/Directx9/PixelShaderGeneratorDX9.h index 1d9da1a5e7..5ad26dc7db 100644 --- a/GPU/Directx9/PixelShaderGeneratorDX9.h +++ b/GPU/Directx9/PixelShaderGeneratorDX9.h @@ -49,16 +49,53 @@ struct FragmentShaderIDDX9 { void ComputeFragmentShaderIDDX9(FragmentShaderIDDX9 *id); void GenerateFragmentShaderDX9(char *buffer); +enum StencilValueType { + STENCIL_VALUE_UNIFORM, + STENCIL_VALUE_ZERO, + STENCIL_VALUE_ONE, + STENCIL_VALUE_KEEP, + STENCIL_VALUE_INVERT, + STENCIL_VALUE_INCR_4, + STENCIL_VALUE_INCR_8, + STENCIL_VALUE_DECR_4, + STENCIL_VALUE_DECR_8, +}; + +enum ReplaceAlphaType { + REPLACE_ALPHA_NO = 0, + REPLACE_ALPHA_YES = 1, + REPLACE_ALPHA_DUALSOURCE = 2, +}; + +enum ReplaceBlendType { + REPLACE_BLEND_NO, + REPLACE_BLEND_STANDARD, + REPLACE_BLEND_PRE_SRC, + REPLACE_BLEND_PRE_SRC_2X_ALPHA, + REPLACE_BLEND_2X_ALPHA, + REPLACE_BLEND_2X_SRC, + REPLACE_BLEND_COPY_FBO, +}; + bool IsAlphaTestAgainstZero(); bool IsAlphaTestTriviallyTrue(); bool IsColorTestTriviallyTrue(); +StencilValueType ReplaceAlphaWithStencilType(); +ReplaceAlphaType ReplaceAlphaWithStencil(ReplaceBlendType replaceBlend); +ReplaceBlendType ReplaceBlendWithShader(); #define CONST_PS_TEXENV 0 #define CONST_PS_ALPHACOLORREF 1 #define CONST_PS_ALPHACOLORMASK 2 #define CONST_PS_FOGCOLOR 3 +#define CONST_PS_STENCILREPLACE 4 +#define CONST_PS_BLENDFIXA 5 +#define CONST_PS_BLENDFIXB 6 +#define CONST_PS_FBOTEXSIZE 7 +#define CONST_PS_TEXCLAMP 8 +#define CONST_PS_TEXCLAMPOFF 9 // For stencil upload -#define CONST_PS_STENCILVALUE 4 +#define CONST_PS_STENCILVALUE 10 }; diff --git a/GPU/Directx9/ShaderManagerDX9.cpp b/GPU/Directx9/ShaderManagerDX9.cpp index ca368fa834..ce1b4d2eaf 100644 --- a/GPU/Directx9/ShaderManagerDX9.cpp +++ b/GPU/Directx9/ShaderManagerDX9.cpp @@ -26,6 +26,7 @@ #include "util/text/utf8.h" #include "Common/Common.h" +#include "Core/Config.h" #include "Core/Reporting.h" #include "GPU/Math3D.h" #include "GPU/GPUState.h" @@ -137,6 +138,19 @@ void ShaderManagerDX9::PSSetColorUniform3Alpha255(int creg, u32 color, u8 alpha) pD3Ddevice->SetPixelShaderConstantF(creg, col, 1); } +void ShaderManagerDX9::PSSetFloat(int creg, float value) { + const float f[4] = { value, 0.0f, 0.0f, 0.0f }; + pD3Ddevice->SetPixelShaderConstantF(creg, f, 1); +} + +void ShaderManagerDX9::PSSetFloatArray(int creg, const float *value, int count) { + float f[4] = { 0.0f, 0.0f, 0.0f, 0.0f }; + for (int i = 0; i < count; i++) { + f[i] = value[i]; + } + pD3Ddevice->SetPixelShaderConstantF(creg, f, 1); +} + void ShaderManagerDX9::VSSetFloat(int creg, float value) { const float f[4] = { value, 0.0f, 0.0f, 0.0f }; pD3Ddevice->SetVertexShaderConstantF(creg, f, 1); @@ -231,6 +245,43 @@ void ShaderManagerDX9::PSUpdateUniforms(int dirtyUniforms) { if (dirtyUniforms & DIRTY_FOGCOLOR) { PSSetColorUniform3(CONST_PS_FOGCOLOR, gstate.fogcolor); } + if (dirtyUniforms & DIRTY_STENCILREPLACEVALUE) { + PSSetFloat(CONST_PS_STENCILREPLACE, (float)gstate.getStencilTestRef() * (1.0f / 255.0f)); + } + + if (dirtyUniforms & DIRTY_SHADERBLEND) { + PSSetColorUniform3(CONST_PS_BLENDFIXA, gstate.getFixA()); + PSSetColorUniform3(CONST_PS_BLENDFIXB, gstate.getFixB()); + + const float fbotexSize[2] = { + 1.0f / (float)gstate_c.curRTRenderWidth, + 1.0f / (float)gstate_c.curRTRenderHeight, + }; + PSSetFloatArray(CONST_PS_FBOTEXSIZE, fbotexSize, 2); + } + + if (dirtyUniforms & DIRTY_TEXCLAMP) { + const float invW = 1.0f / (float)gstate_c.curTextureWidth; + const float invH = 1.0f / (float)gstate_c.curTextureHeight; + const int w = gstate.getTextureWidth(0); + const int h = gstate.getTextureHeight(0); + const float widthFactor = (float)w * invW; + const float heightFactor = (float)h * invH; + + // First wrap xy, then half texel xy (for clamp.) + const float texclamp[4] = { + widthFactor, + heightFactor, + invW * 0.5f, + invH * 0.5f, + }; + const float texclampoff[2] = { + gstate_c.curTextureXOffset * invW, + gstate_c.curTextureYOffset * invH, + }; + PSSetFloatArray(CONST_PS_TEXCLAMP, texclamp, 4); + PSSetFloatArray(CONST_PS_TEXCLAMPOFF, texclampoff, 2); + } } void ShaderManagerDX9::VSUpdateUniforms(int dirtyUniforms) { @@ -278,6 +329,7 @@ void ShaderManagerDX9::VSUpdateUniforms(int dirtyUniforms) { getFloat24(gstate.fog1), getFloat24(gstate.fog2), }; + // TODO: Handle NAN/INF? VSSetFloatArray(CONST_VS_FOGCOEF, fogcoef, 2); } // TODO: Could even set all bones in one go if they're all dirty. @@ -315,31 +367,58 @@ void ShaderManagerDX9::VSUpdateUniforms(int dirtyUniforms) { // Texturing if (dirtyUniforms & DIRTY_UVSCALEOFFSET) { + const float invW = 1.0f / (float)gstate_c.curTextureWidth; + const float invH = 1.0f / (float)gstate_c.curTextureHeight; + const int w = gstate.getTextureWidth(0); + const int h = gstate.getTextureHeight(0); + const float widthFactor = (float)w * invW; + const float heightFactor = (float)h * invH; + float uvscaleoff[4]; - if (gstate.isModeThrough()) { - // We never get here because we don't use HW transform with through mode. - // Although - why don't we? - uvscaleoff[0] = gstate_c.uv.uScale / gstate_c.curTextureWidth; - uvscaleoff[1] = gstate_c.uv.vScale / gstate_c.curTextureHeight; - uvscaleoff[2] = gstate_c.uv.uOff / gstate_c.curTextureWidth; - uvscaleoff[3] = gstate_c.uv.vOff / gstate_c.curTextureHeight; - } else { - int w = gstate.getTextureWidth(0); - int h = gstate.getTextureHeight(0); - float widthFactor = (float)w / (float)gstate_c.curTextureWidth; - float heightFactor = (float)h / (float)gstate_c.curTextureHeight; + + switch (gstate.getUVGenMode()) { + case GE_TEXMAP_TEXTURE_COORDS: // Not sure what GE_TEXMAP_UNKNOWN is, but seen in Riviera. Treating the same as GE_TEXMAP_TEXTURE_COORDS works. - if (gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_COORDS || gstate.getUVGenMode() == GE_TEXMAP_UNKNOWN) { - uvscaleoff[0] = gstate_c.uv.uScale * widthFactor; - uvscaleoff[1] = gstate_c.uv.vScale * heightFactor; - uvscaleoff[2] = gstate_c.uv.uOff * widthFactor; - uvscaleoff[3] = gstate_c.uv.vOff * heightFactor; - } else { + case GE_TEXMAP_UNKNOWN: + if (g_Config.bPrescaleUV) { + // Shouldn't even get here as we won't use the uniform in the shader. + // We are here but are prescaling UV in the decoder? Let's do the same as in the other case + // except consider *Scale and *Off to be 1 and 0. uvscaleoff[0] = widthFactor; uvscaleoff[1] = heightFactor; uvscaleoff[2] = 0.0f; uvscaleoff[3] = 0.0f; + } else { + uvscaleoff[0] = gstate_c.uv.uScale * widthFactor; + uvscaleoff[1] = gstate_c.uv.vScale * heightFactor; + uvscaleoff[2] = gstate_c.uv.uOff * widthFactor; + uvscaleoff[3] = gstate_c.uv.vOff * heightFactor; } + break; + + // These two work the same whether or not we prescale UV. + + case GE_TEXMAP_TEXTURE_MATRIX: + // We cannot bake the UV coord scale factor in here, as we apply a matrix multiplication + // before this is applied, and the matrix multiplication may contain translation. In this case + // the translation will be scaled which breaks faces in Hexyz Force for example. + // So I've gone back to applying the scale factor in the shader. + uvscaleoff[0] = widthFactor; + uvscaleoff[1] = heightFactor; + uvscaleoff[2] = 0.0f; + uvscaleoff[3] = 0.0f; + break; + + case GE_TEXMAP_ENVIRONMENT_MAP: + // In this mode we only use uvscaleoff to scale to the texture size. + uvscaleoff[0] = widthFactor; + uvscaleoff[1] = heightFactor; + uvscaleoff[2] = 0.0f; + uvscaleoff[3] = 0.0f; + break; + + default: + ERROR_LOG_REPORT(G3D, "Unexpected UV gen mode: %d", gstate.getUVGenMode()); } VSSetFloatArray(CONST_VS_UVSCALEOFFSET, uvscaleoff, 4); } diff --git a/GPU/Directx9/ShaderManagerDX9.h b/GPU/Directx9/ShaderManagerDX9.h index 88cbc9064e..7ad723aac4 100644 --- a/GPU/Directx9/ShaderManagerDX9.h +++ b/GPU/Directx9/ShaderManagerDX9.h @@ -53,7 +53,7 @@ enum { DIRTY_MATEMISSIVE = (1 << 14), DIRTY_AMBIENT = (1 << 15), DIRTY_MATAMBIENTALPHA = (1 << 16), - DIRTY_MATERIAL = (1 << 17), // let's set all 4 together (emissive ambient diffuse specular). We hide specular coef in specular.a + DIRTY_SHADERBLEND = (1 << 17), // Used only for in-shader blending. DIRTY_UVSCALEOFFSET = (1 << 18), // this will be dirtied ALL THE TIME... maybe we'll need to do "last value with this shader compares" DIRTY_TEXCLAMP = (1 << 19), @@ -131,6 +131,8 @@ private: void VSUpdateUniforms(int dirtyUniforms); void PSSetColorUniform3Alpha255(int creg, u32 color, u8 alpha); void PSSetColorUniform3(int creg, u32 color); + void PSSetFloat(int creg, float value); + void PSSetFloatArray(int creg, const float *value, int count); void VSSetMatrix4x3(int creg, const float *m4x3); void VSSetMatrix4x3_3(int creg, const float *m4x3); diff --git a/GPU/Directx9/StateMappingDX9.cpp b/GPU/Directx9/StateMappingDX9.cpp index c84b936a61..8c6dd6e964 100644 --- a/GPU/Directx9/StateMappingDX9.cpp +++ b/GPU/Directx9/StateMappingDX9.cpp @@ -78,11 +78,6 @@ static const D3DCMPFUNC ztests[] = { D3DCMP_LESS, D3DCMP_LESSEQUAL, D3DCMP_GREATER, D3DCMP_GREATEREQUAL, }; -static const D3DCMPFUNC ztests_backwards[] = { - D3DCMP_NEVER, D3DCMP_ALWAYS, D3DCMP_EQUAL, D3DCMP_NOTEQUAL, - D3DCMP_GREATER, D3DCMP_GREATEREQUAL, D3DCMP_LESS, D3DCMP_LESSEQUAL, -}; - static const D3DSTENCILOP stencilOps[] = { D3DSTENCILOP_KEEP, D3DSTENCILOP_ZERO, @@ -94,7 +89,21 @@ static const D3DSTENCILOP stencilOps[] = { D3DSTENCILOP_KEEP, // reserved }; -static u32 blendColor2Func(u32 fix) { +static D3DBLEND toDualSource(D3DBLEND blendfunc) { + switch (blendfunc) { + // TODO +#if 0 + case D3DBLEND_SRCALPHA: + return D3DBLEND_SRCCOLOR2; + case D3DBLEND_INVSRCALPHA: + return D3DBLEND_INVSRCCOLOR2; +#endif + default: + return blendfunc; + } +} + +static D3DBLEND blendColor2Func(u32 fix) { if (fix == 0xFFFFFF) return D3DBLEND_ONE; if (fix == 0) @@ -108,13 +117,339 @@ static u32 blendColor2Func(u32 fix) { return D3DBLEND_UNK; } -static bool blendColorSimilar(const Vec3f &a, const Vec3f &b, float margin = 0.1f) { +static inline bool blendColorSimilar(const Vec3f &a, const Vec3f &b, float margin = 0.1f) { const Vec3f diff = a - b; if (fabsf(diff.x) <= margin && fabsf(diff.y) <= margin && fabsf(diff.z) <= margin) return true; return false; } +bool TransformDrawEngineDX9::ApplyShaderBlending() { + bool skipBlit = false; + + static const int MAX_REASONABLE_BLITS_PER_FRAME = 24; + + static int lastFrameBlit = -1; + static int blitsThisFrame = 0; + if (lastFrameBlit != gpuStats.numFlips) { + if (blitsThisFrame > MAX_REASONABLE_BLITS_PER_FRAME) { + WARN_LOG_REPORT_ONCE(blendingBlit, G3D, "Lots of blits needed for obscure blending: %d per frame, blend %d/%d/%d", blitsThisFrame, gstate.getBlendFuncA(), gstate.getBlendFuncB(), gstate.getBlendEq()); + } + blitsThisFrame = 0; + lastFrameBlit = gpuStats.numFlips; + } + ++blitsThisFrame; + if (blitsThisFrame > MAX_REASONABLE_BLITS_PER_FRAME * 2) { + WARN_LOG_ONCE(blendingBlit2, G3D, "Skipping additional blits needed for obscure blending: %d per frame, blend %d/%d/%d", blitsThisFrame, gstate.getBlendFuncA(), gstate.getBlendFuncB(), gstate.getBlendEq()); + ResetShaderBlending(); + return false; + } + + framebufferManager_->BindFramebufferColor(1, nullptr, false); + // If we are rendering at a higher resolution, linear is probably best for the dest color. + pD3Ddevice->SetSamplerState(1, D3DSAMP_MAGFILTER, D3DTEXF_LINEAR); + pD3Ddevice->SetSamplerState(1, D3DSAMP_MINFILTER, D3DTEXF_LINEAR); + fboTexBound_ = true; + + shaderManager_->DirtyUniform(DIRTY_SHADERBLEND); + return true; +} + +inline void TransformDrawEngineDX9::ResetShaderBlending() { + if (fboTexBound_) { + pD3Ddevice->SetTexture(1, nullptr); + fboTexBound_ = false; + } +} + +void TransformDrawEngineDX9::ApplyStencilReplaceOnly() { + // We're not blending, but we may still want to blend for stencil. + // This is only useful for INCR/DECR/INVERT. Others can write directly. + switch (ReplaceAlphaWithStencilType()) { + case STENCIL_VALUE_INCR_4: + case STENCIL_VALUE_INCR_8: + // We'll add the incremented value output by the shader. + dxstate.blendFunc.set(D3DBLEND_ONE, D3DBLEND_ZERO, D3DBLEND_ONE, D3DBLEND_ONE); + dxstate.blendEquation.set(D3DBLENDOP_ADD, D3DBLENDOP_ADD); + dxstate.blend.enable(); + dxstate.blendSeparate.enable(); + break; + + case STENCIL_VALUE_DECR_4: + case STENCIL_VALUE_DECR_8: + // We'll subtract the incremented value output by the shader. + dxstate.blendFunc.set(D3DBLEND_ONE, D3DBLEND_ZERO, D3DBLEND_ONE, D3DBLEND_ONE); + dxstate.blendEquation.set(D3DBLENDOP_ADD, D3DBLENDOP_SUBTRACT); + dxstate.blend.enable(); + dxstate.blendSeparate.enable(); + break; + + case STENCIL_VALUE_INVERT: + // The shader will output one, and reverse subtracting will essentially invert. + dxstate.blendFunc.set(D3DBLEND_ONE, D3DBLEND_ZERO, D3DBLEND_ONE, D3DBLEND_ONE); + dxstate.blendEquation.set(D3DBLENDOP_ADD, D3DBLENDOP_REVSUBTRACT); + dxstate.blend.enable(); + dxstate.blendSeparate.enable(); + break; + + default: + dxstate.blend.disable(); + break; + } +} + +void TransformDrawEngineDX9::ApplyBlendState() { + // Blending is a bit complex to emulate. This is due to several reasons: + // + // * Doubled blend modes (src, dst, inversed) aren't supported in Direct3D. + // If possible, we double the src color or src alpha in the shader to account for these. + // These may clip incorrectly, so we avoid unfortunately. + // * Direct3D only has one arbitrary fixed color. We premultiply the other in the shader. + // * The written output alpha should actually be the stencil value. Alpha is not written. + // + // If we can't apply blending, we make a copy of the framebuffer and do it manually. + + // Unfortunately, we can't really do this in Direct3D 9... + gstate_c.allowShaderBlend = false; + + ReplaceBlendType replaceBlend = ReplaceBlendWithShader(); + ReplaceAlphaType replaceAlphaWithStencil = ReplaceAlphaWithStencil(replaceBlend); + bool usePreSrc = false; + + switch (replaceBlend) { + case REPLACE_BLEND_NO: + ResetShaderBlending(); + // We may still want to do something about stencil -> alpha. + if (replaceAlphaWithStencil == REPLACE_ALPHA_YES) { + ApplyStencilReplaceOnly(); + } else { + dxstate.blend.disable(); + } + return; + + case REPLACE_BLEND_COPY_FBO: + if (ApplyShaderBlending()) { + // We may still want to do something about stencil -> alpha. + if (replaceAlphaWithStencil == REPLACE_ALPHA_YES) { + ApplyStencilReplaceOnly(); + } else { + // None of the below logic is interesting, we're gonna do it entirely in the shader. + dxstate.blend.disable(); + } + return; + } + // Until next time, force it off. + gstate_c.allowShaderBlend = false; + break; + + case REPLACE_BLEND_PRE_SRC: + case REPLACE_BLEND_PRE_SRC_2X_ALPHA: + usePreSrc = true; + break; + + case REPLACE_BLEND_STANDARD: + case REPLACE_BLEND_2X_ALPHA: + case REPLACE_BLEND_2X_SRC: + break; + } + + dxstate.blend.enable(); + dxstate.blendSeparate.enable(); + ResetShaderBlending(); + + GEBlendMode blendFuncEq = gstate.getBlendEq(); + int blendFuncA = gstate.getBlendFuncA(); + int blendFuncB = gstate.getBlendFuncB(); + if (blendFuncA > GE_SRCBLEND_FIXA) + blendFuncA = GE_SRCBLEND_FIXA; + if (blendFuncB > GE_DSTBLEND_FIXB) + blendFuncB = GE_DSTBLEND_FIXB; + + float constantAlpha = 1.0f; + if (gstate.isStencilTestEnabled() && replaceAlphaWithStencil == REPLACE_ALPHA_NO) { + switch (ReplaceAlphaWithStencilType()) { + case STENCIL_VALUE_UNIFORM: + constantAlpha = (float) gstate.getStencilTestRef() * (1.0f / 255.0f); + break; + + case STENCIL_VALUE_INCR_4: + case STENCIL_VALUE_DECR_4: + constantAlpha = 1.0f / 15.0f; + break; + + case STENCIL_VALUE_INCR_8: + case STENCIL_VALUE_DECR_8: + constantAlpha = 1.0f / 255.0f; + break; + + default: + break; + } + } + + // Shortcut by using D3DBLEND_ONE where possible, no need to set blendcolor + D3DBLEND glBlendFuncA = blendFuncA == GE_SRCBLEND_FIXA ? blendColor2Func(gstate.getFixA()) : aLookup[blendFuncA]; + D3DBLEND glBlendFuncB = blendFuncB == GE_DSTBLEND_FIXB ? blendColor2Func(gstate.getFixB()) : bLookup[blendFuncB]; + + if (usePreSrc) { + glBlendFuncA = D3DBLEND_ONE; + // Need to pull in the fixed color. + if (blendFuncA == GE_SRCBLEND_FIXA) { + shaderManager_->DirtyUniform(DIRTY_SHADERBLEND); + } + } + + if (replaceAlphaWithStencil == REPLACE_ALPHA_DUALSOURCE) { + glBlendFuncA = toDualSource(glBlendFuncA); + glBlendFuncB = toDualSource(glBlendFuncB); + } + + if (blendFuncA == GE_SRCBLEND_FIXA || blendFuncB == GE_DSTBLEND_FIXB) { + Vec3f fixA = Vec3f::FromRGB(gstate.getFixA()); + Vec3f fixB = Vec3f::FromRGB(gstate.getFixB()); + if (glBlendFuncA == D3DBLEND_UNK && glBlendFuncB != D3DBLEND_UNK) { + // Can use blendcolor trivially. + const float blendColor[4] = {fixA.x, fixA.y, fixA.z, constantAlpha}; + dxstate.blendColor.set(blendColor); + glBlendFuncA = D3DBLEND_BLENDFACTOR; + } else if (glBlendFuncA != D3DBLEND_UNK && glBlendFuncB == D3DBLEND_UNK) { + // Can use blendcolor trivially. + const float blendColor[4] = {fixB.x, fixB.y, fixB.z, constantAlpha}; + dxstate.blendColor.set(blendColor); + glBlendFuncB = D3DBLEND_BLENDFACTOR; + } else if (glBlendFuncA == D3DBLEND_UNK && glBlendFuncB == D3DBLEND_UNK) { + if (blendColorSimilar(fixA, Vec3f::AssignToAll(constantAlpha) - fixB)) { + glBlendFuncA = D3DBLEND_BLENDFACTOR; + glBlendFuncB = D3DBLEND_INVBLENDFACTOR; + const float blendColor[4] = {fixA.x, fixA.y, fixA.z, constantAlpha}; + dxstate.blendColor.set(blendColor); + } else if (blendColorSimilar(fixA, fixB)) { + glBlendFuncA = D3DBLEND_BLENDFACTOR; + glBlendFuncB = D3DBLEND_BLENDFACTOR; + const float blendColor[4] = {fixA.x, fixA.y, fixA.z, constantAlpha}; + dxstate.blendColor.set(blendColor); + } else { + static bool didReportBlend = false; + if (!didReportBlend) + Reporting::ReportMessage("ERROR INVALID blendcolorstate: FixA=%06x FixB=%06x FuncA=%i FuncB=%i", gstate.getFixA(), gstate.getFixB(), gstate.getBlendFuncA(), gstate.getBlendFuncB()); + didReportBlend = true; + + DEBUG_LOG(G3D, "ERROR INVALID blendcolorstate: FixA=%06x FixB=%06x FuncA=%i FuncB=%i", gstate.getFixA(), gstate.getFixB(), gstate.getBlendFuncA(), gstate.getBlendFuncB()); + // Let's approximate, at least. Close is better than totally off. + const bool nearZeroA = blendColorSimilar(fixA, Vec3f::AssignToAll(0.0f), 0.25f); + const bool nearZeroB = blendColorSimilar(fixB, Vec3f::AssignToAll(0.0f), 0.25f); + if (nearZeroA || blendColorSimilar(fixA, Vec3f::AssignToAll(1.0f), 0.25f)) { + glBlendFuncA = nearZeroA ? D3DBLEND_ZERO : D3DBLEND_ONE; + glBlendFuncB = D3DBLEND_BLENDFACTOR; + const float blendColor[4] = {fixB.x, fixB.y, fixB.z, constantAlpha}; + dxstate.blendColor.set(blendColor); + // We need to pick something. Let's go with A as the fixed color. + } else { + glBlendFuncA = D3DBLEND_BLENDFACTOR; + glBlendFuncB = nearZeroB ? D3DBLEND_ZERO : D3DBLEND_ONE; + const float blendColor[4] = {fixA.x, fixA.y, fixA.z, constantAlpha}; + dxstate.blendColor.set(blendColor); + } + } + } else { + // We optimized both, but that's probably not necessary, so let's pick one to be constant. + // For now let's just pick whichever was fixed instead of checking error. + if (blendFuncA == GE_SRCBLEND_FIXA && !usePreSrc) { + glBlendFuncA = D3DBLEND_BLENDFACTOR; + const float blendColor[4] = {fixA.x, fixA.y, fixA.z, constantAlpha}; + dxstate.blendColor.set(blendColor); + } else { + glBlendFuncB = D3DBLEND_BLENDFACTOR; + const float blendColor[4] = {fixB.x, fixB.y, fixB.z, constantAlpha}; + dxstate.blendColor.set(blendColor); + } + } + } else if (constantAlpha < 1.0f) { + const float blendColor[4] = {1.0f, 1.0f, 1.0f, constantAlpha}; + dxstate.blendColor.set(blendColor); + } + + // At this point, through all paths above, glBlendFuncA and glBlendFuncB will be set right somehow. + + // The stencil-to-alpha in fragment shader doesn't apply here (blending is enabled), and we shouldn't + // do any blending in the alpha channel as that doesn't seem to happen on PSP. So lacking a better option, + // the only value we can set alpha to here without multipass and dual source alpha is zero (by setting + // the factors to zero). So let's do that. + D3DBLENDOP alphaEq = D3DBLENDOP_ADD; + if (replaceAlphaWithStencil != REPLACE_ALPHA_NO) { + // Let the fragment shader take care of it. + switch (ReplaceAlphaWithStencilType()) { + case STENCIL_VALUE_INCR_4: + case STENCIL_VALUE_INCR_8: + // We'll add the increment value. + dxstate.blendFunc.set(glBlendFuncA, glBlendFuncB, D3DBLEND_ONE, D3DBLEND_ONE); + break; + + case STENCIL_VALUE_DECR_4: + case STENCIL_VALUE_DECR_8: + // Like add with a small value, but subtracting. + dxstate.blendFunc.set(glBlendFuncA, glBlendFuncB, D3DBLEND_ONE, D3DBLEND_ONE); + alphaEq = D3DBLENDOP_SUBTRACT; + break; + + case STENCIL_VALUE_INVERT: + // This will subtract by one, effectively inverting the bits. + dxstate.blendFunc.set(glBlendFuncA, glBlendFuncB, D3DBLEND_ONE, D3DBLEND_ONE); + alphaEq = D3DBLENDOP_REVSUBTRACT; + break; + + default: + dxstate.blendFunc.set(glBlendFuncA, glBlendFuncB, D3DBLEND_ONE, D3DBLEND_ZERO); + break; + } + } else if (gstate.isStencilTestEnabled()) { + switch (ReplaceAlphaWithStencilType()) { + case STENCIL_VALUE_KEEP: + dxstate.blendFunc.set(glBlendFuncA, glBlendFuncB, D3DBLEND_ZERO, D3DBLEND_ONE); + break; + case STENCIL_VALUE_ONE: + // This won't give one but it's our best shot... + dxstate.blendFunc.set(glBlendFuncA, glBlendFuncB, D3DBLEND_ONE, D3DBLEND_ONE); + break; + case STENCIL_VALUE_ZERO: + dxstate.blendFunc.set(glBlendFuncA, glBlendFuncB, D3DBLEND_ZERO, D3DBLEND_ZERO); + break; + case STENCIL_VALUE_UNIFORM: + // This won't give a correct value (it multiplies) but it may be better than random values. + if (constantAlpha < 1.0f) { + // TODO: Does this work as the alpha component of the fixed blend factor? + dxstate.blendFunc.set(glBlendFuncA, glBlendFuncB, D3DBLEND_BLENDFACTOR, D3DBLEND_ZERO); + } else { + dxstate.blendFunc.set(glBlendFuncA, glBlendFuncB, D3DBLEND_ONE, D3DBLEND_ZERO); + } + break; + case STENCIL_VALUE_INCR_4: + case STENCIL_VALUE_INCR_8: + // This won't give a correct value always, but it will try to increase at least. + // TODO: Does this work as the alpha component of the fixed blend factor? + dxstate.blendFunc.set(glBlendFuncA, glBlendFuncB, D3DBLEND_BLENDFACTOR, D3DBLEND_ONE); + break; + case STENCIL_VALUE_DECR_4: + case STENCIL_VALUE_DECR_8: + // This won't give a correct value always, but it will try to decrease at least. + // TODO: Does this work as the alpha component of the fixed blend factor? + dxstate.blendFunc.set(glBlendFuncA, glBlendFuncB, D3DBLEND_BLENDFACTOR, D3DBLEND_ONE); + alphaEq = D3DBLENDOP_SUBTRACT; + break; + case STENCIL_VALUE_INVERT: + dxstate.blendFunc.set(glBlendFuncA, glBlendFuncB, D3DBLEND_ONE, D3DBLEND_ONE); + // If the output alpha is near 1, this will basically invert. It's our best shot. + alphaEq = D3DBLENDOP_REVSUBTRACT; + break; + } + } else { + // Retain the existing value when stencil testing is off. + dxstate.blendFunc.set(glBlendFuncA, glBlendFuncB, D3DBLEND_ZERO, D3DBLEND_ONE); + } + + dxstate.blendEquation.set(eqLookup[blendFuncEq], alphaEq); +} + void TransformDrawEngineDX9::ApplyDrawState(int prim) { // TODO: All this setup is soon so expensive that we'll need dirty flags, or simply do it in the command writes where we detect dirty by xoring. Silly to do all this work on every drawcall. @@ -128,82 +463,8 @@ void TransformDrawEngineDX9::ApplyDrawState(int prim) { } } - // TODO: The top bit of the alpha channel should be written to the stencil bit somehow. This appears to require very expensive multipass rendering :( Alternatively, one could do a - // single fullscreen pass that converts alpha to stencil (or 2 passes, to set both the 0 and 1 values) very easily. - - // Set blend - bool wantBlend = !gstate.isModeClear() && gstate.isAlphaBlendEnabled(); - dxstate.blend.set(wantBlend); - if (wantBlend) { - // This can't be done exactly as there are several PSP blend modes that are impossible to do on OpenGL ES 2.0, and some even on regular OpenGL for desktop. - // HOWEVER - we should be able to approximate the 2x modes in the shader, although they will clip wrongly. - - // Examples of seen unimplementable blend states: - // Mortal Kombat Unchained: FixA=0000ff FixB=000080 FuncA=10 FuncB=10 - - int blendFuncA = gstate.getBlendFuncA(); - int blendFuncB = gstate.getBlendFuncB(); - int blendFuncEq = gstate.getBlendEq(); - if (blendFuncA > GE_SRCBLEND_FIXA) blendFuncA = GE_SRCBLEND_FIXA; - if (blendFuncB > GE_DSTBLEND_FIXB) blendFuncB = GE_DSTBLEND_FIXB; - - // Shortcut by using D3DBLEND_ONE where possible, no need to set blendcolor - u32 glBlendFuncA = blendFuncA == GE_SRCBLEND_FIXA ? blendColor2Func(gstate.getFixA()) : aLookup[blendFuncA]; - u32 glBlendFuncB = blendFuncB == GE_DSTBLEND_FIXB ? blendColor2Func(gstate.getFixB()) : bLookup[blendFuncB]; - if (blendFuncA == GE_SRCBLEND_FIXA || blendFuncB == GE_DSTBLEND_FIXB) { - Vec3f fixA = Vec3f::FromRGB(gstate.getFixA()); - Vec3f fixB = Vec3f::FromRGB(gstate.getFixB()); - if (glBlendFuncA == D3DBLEND_UNK && glBlendFuncB != D3DBLEND_UNK) { - // Can use blendcolor trivially. - const float blendColor[4] = {fixA.x, fixA.y, fixA.z, 1.0f}; - dxstate.blendColor.set(blendColor); - glBlendFuncA = D3DBLEND_BLENDFACTOR; - } else if (glBlendFuncA != D3DBLEND_UNK && glBlendFuncB == D3DBLEND_UNK) { - // Can use blendcolor trivially. - const float blendColor[4] = {fixB.x, fixB.y, fixB.z, 1.0f}; - dxstate.blendColor.set(blendColor); - glBlendFuncB = D3DBLEND_BLENDFACTOR; - } else if (glBlendFuncA == D3DBLEND_UNK && glBlendFuncB == D3DBLEND_UNK) { - if (blendColorSimilar(fixA, Vec3f::AssignToAll(1.0f) - fixB)) { - glBlendFuncA = D3DBLEND_BLENDFACTOR; - glBlendFuncB = D3DBLEND_INVBLENDFACTOR; - const float blendColor[4] = {fixA.x, fixA.y, fixA.z, 1.0f}; - dxstate.blendColor.set(blendColor); - } else if (blendColorSimilar(fixA, fixB)) { - glBlendFuncA = D3DBLEND_BLENDFACTOR; - glBlendFuncB = D3DBLEND_BLENDFACTOR; - const float blendColor[4] = {fixA.x, fixA.y, fixA.z, 1.0f}; - dxstate.blendColor.set(blendColor); - } else { - static bool didReportBlend = false; - if (!didReportBlend) - Reporting::ReportMessage("ERROR INVALID blendcolorstate: FixA=%06x FixB=%06x FuncA=%i FuncB=%i", gstate.getFixA(), gstate.getFixB(), gstate.getBlendFuncA(), gstate.getBlendFuncB()); - didReportBlend = true; - - DEBUG_LOG(HLE, "ERROR INVALID blendcolorstate: FixA=%06x FixB=%06x FuncA=%i FuncB=%i", gstate.getFixA(), gstate.getFixB(), gstate.getBlendFuncA(), gstate.getBlendFuncB()); - // Let's approximate, at least. Close is better than totally off. - const bool nearZeroA = blendColorSimilar(fixA, Vec3f::AssignToAll(0.0f), 0.25f); - const bool nearZeroB = blendColorSimilar(fixB, Vec3f::AssignToAll(0.0f), 0.25f); - if (nearZeroA || blendColorSimilar(fixA, Vec3f::AssignToAll(1.0f), 0.25f)) { - glBlendFuncA = nearZeroA ? D3DBLEND_ZERO : D3DBLEND_ONE; - glBlendFuncB = D3DBLEND_BLENDFACTOR; - const float blendColor[4] = {fixB.x, fixB.y, fixB.z, 1.0f}; - dxstate.blendColor.set(blendColor); - // We need to pick something. Let's go with A as the fixed color. - } else { - glBlendFuncA = D3DBLEND_BLENDFACTOR; - glBlendFuncB = nearZeroB ? D3DBLEND_ZERO : D3DBLEND_ONE; - const float blendColor[4] = {fixA.x, fixA.y, fixA.z, 1.0f}; - dxstate.blendColor.set(blendColor); - } - } - } - } - - // At this point, through all paths above, glBlendFuncA and glBlendFuncB will be set right somehow. - dxstate.blendFunc.set(glBlendFuncA, glBlendFuncB, D3DBLEND_ONE, D3DBLEND_ZERO); - dxstate.blendEquation.set(eqLookup[blendFuncEq], D3DBLENDOP_ADD); - } + // Set blend - unless we need to do it in the shader. + ApplyBlendState(); // Set Dither if (gstate.isDitherEnabled()) { @@ -222,10 +483,13 @@ void TransformDrawEngineDX9::ApplyDrawState(int prim) { dxstate.depthTest.enable(); dxstate.depthFunc.set(D3DCMP_ALWAYS); dxstate.depthWrite.set(gstate.isClearModeDepthMask()); + if (gstate.isClearModeDepthMask()) { + framebufferManager_->SetDepthUpdated(); + } // Color Test - bool colorMask = (gstate.clearmode >> 8) & 1; - bool alphaMask = (gstate.clearmode >> 9) & 1; + bool colorMask = gstate.isClearModeColorMask(); + bool alphaMask = gstate.isClearModeAlphaMask(); dxstate.colorMask.set(colorMask, colorMask, colorMask, alphaMask); // Stencil Test @@ -248,6 +512,9 @@ void TransformDrawEngineDX9::ApplyDrawState(int prim) { dxstate.depthTest.enable(); dxstate.depthFunc.set(ztests[gstate.getDepthTestFunction()]); dxstate.depthWrite.set(gstate.isDepthWriteEnabled()); + if (gstate.isDepthWriteEnabled()) { + framebufferManager_->SetDepthUpdated(); + } } else { dxstate.depthTest.disable(); } @@ -273,6 +540,16 @@ void TransformDrawEngineDX9::ApplyDrawState(int prim) { } #endif + // Let's not write to alpha if stencil isn't enabled. + if (!gstate.isStencilTestEnabled()) { + amask = false; + } else { + // If the stencil type is set to KEEP, we shouldn't write to the stencil/alpha channel. + if (ReplaceAlphaWithStencilType() == STENCIL_VALUE_KEEP) { + amask = false; + } + } + dxstate.colorMask.set(rmask, gmask, bmask, amask); // Stencil Test diff --git a/GPU/Directx9/TransformPipelineDX9.cpp b/GPU/Directx9/TransformPipelineDX9.cpp index 51b6872b34..986f758e6b 100644 --- a/GPU/Directx9/TransformPipelineDX9.cpp +++ b/GPU/Directx9/TransformPipelineDX9.cpp @@ -95,7 +95,8 @@ TransformDrawEngineDX9::TransformDrawEngineDX9() vertexCountInDrawCalls(0), decodeCounter_(0), dcid_(0), - uvScale(0) { + uvScale(0), + fboTexBound_(false) { memset(&decOptions_, 0, sizeof(decOptions_)); decOptions_.expandAllUVtoFloat = true; diff --git a/GPU/Directx9/TransformPipelineDX9.h b/GPU/Directx9/TransformPipelineDX9.h index a5fd2f064c..74a2db807a 100644 --- a/GPU/Directx9/TransformPipelineDX9.h +++ b/GPU/Directx9/TransformPipelineDX9.h @@ -182,6 +182,10 @@ private: void ApplyDrawState(int prim); void ApplyDrawStateLate(); + void ApplyBlendState(); + void ApplyStencilReplaceOnly(); + bool ApplyShaderBlending(); + inline void ResetShaderBlending(); IDirect3DVertexDeclaration9 *SetupDecFmtForDraw(VSShader *vshader, const DecVtxFormat &decFmt, u32 pspFmt); @@ -241,6 +245,7 @@ private: UVScale *uvScale; + bool fboTexBound_; VertexDecoderOptions decOptions_; }; diff --git a/GPU/Directx9/helper/dx_state.cpp b/GPU/Directx9/helper/dx_state.cpp index 7dc86e15ce..4cea4b4870 100644 --- a/GPU/Directx9/helper/dx_state.cpp +++ b/GPU/Directx9/helper/dx_state.cpp @@ -21,6 +21,7 @@ void DirectxState::Restore() { int count = 0; blend.restore(); count++; + blendSeparate.restore(); count++; blendEquation.restore(); count++; blendFunc.restore(); count++; blendColor.restore(); count++; diff --git a/GPU/Directx9/helper/dx_state.h b/GPU/Directx9/helper/dx_state.h index 653d134e86..0c76aff301 100644 --- a/GPU/Directx9/helper/dx_state.h +++ b/GPU/Directx9/helper/dx_state.h @@ -355,6 +355,7 @@ public: // When adding a state here, don't forget to add it to DirectxState::Restore() too BoolState blend; + BoolState blendSeparate; DxState4 blendFunc; DxState2 blendEquation; SavedBlendFactor blendColor; diff --git a/GPU/Directx9/helper/global.h b/GPU/Directx9/helper/global.h index cc0a37f1b1..6f44f2864b 100644 --- a/GPU/Directx9/helper/global.h +++ b/GPU/Directx9/helper/global.h @@ -31,6 +31,6 @@ bool CompileVertexShader(const char *code, LPDIRECT3DVERTEXSHADER9 *pShader, ID3 void DestroyShaders(); void DirectxInit(HWND window); -#define D3DBLEND_UNK D3DSTENCILOP_FORCE_DWORD +#define D3DBLEND_UNK D3DBLEND_FORCE_DWORD }; diff --git a/GPU/GLES/FragmentShaderGenerator.cpp b/GPU/GLES/FragmentShaderGenerator.cpp index 22c62d2601..ab49f23ebe 100644 --- a/GPU/GLES/FragmentShaderGenerator.cpp +++ b/GPU/GLES/FragmentShaderGenerator.cpp @@ -665,7 +665,6 @@ void GenerateFragmentShader(char *buffer) { } else { ucoord = "mod(" + ucoord + ", u_texclamp.x)"; } - // The v coordinate is more tricky, since it's flipped. if (gstate.isTexCoordClampedT()) { vcoord = "clamp(" + vcoord + ", u_texclamp.w, u_texclamp.y - u_texclamp.w)"; } else { diff --git a/GPU/GLES/Framebuffer.cpp b/GPU/GLES/Framebuffer.cpp index d17ed80f66..272fd636df 100644 --- a/GPU/GLES/Framebuffer.cpp +++ b/GPU/GLES/Framebuffer.cpp @@ -920,13 +920,18 @@ FBO *FramebufferManager::GetTempFBO(u16 w, u16 h, FBOColorDepth depth) { return fbo; } -void FramebufferManager::BindFramebufferColor(VirtualFramebuffer *framebuffer, bool skipCopy) { +void FramebufferManager::BindFramebufferColor(int stage, VirtualFramebuffer *framebuffer, bool skipCopy) { if (framebuffer == NULL) { framebuffer = currentRenderVfb_; } + if (stage != GL_TEXTURE0) { + glActiveTexture(stage); + } + if (!framebuffer->fbo || !useBufferedRendering_) { glBindTexture(GL_TEXTURE_2D, 0); + glActiveTexture(GL_TEXTURE0); gstate_c.skipDrawReason |= SKIPDRAW_BAD_FB_TEXTURE; return; } @@ -952,6 +957,10 @@ void FramebufferManager::BindFramebufferColor(VirtualFramebuffer *framebuffer, b } else { fbo_bind_color_as_texture(framebuffer->fbo, 0); } + + if (stage != GL_TEXTURE1) { + glActiveTexture(stage); + } } void FramebufferManager::CopyDisplayToOutput() { diff --git a/GPU/GLES/Framebuffer.h b/GPU/GLES/Framebuffer.h index f67adfff51..ad0c3162ad 100644 --- a/GPU/GLES/Framebuffer.h +++ b/GPU/GLES/Framebuffer.h @@ -94,7 +94,7 @@ public: void BlitFramebufferDepth(VirtualFramebuffer *src, VirtualFramebuffer *dst); // For use when texturing from a framebuffer. May create a duplicate if target. - void BindFramebufferColor(VirtualFramebuffer *framebuffer, bool skipCopy = false); + void BindFramebufferColor(int stage, VirtualFramebuffer *framebuffer, bool skipCopy = false); // Reads a rectangular subregion of a framebuffer to the right position in its backing memory. virtual void ReadFramebufferToMemory(VirtualFramebuffer *vfb, bool sync, int x, int y, int w, int h) override; diff --git a/GPU/GLES/ShaderManager.cpp b/GPU/GLES/ShaderManager.cpp index a03d4d30bc..e030067531 100644 --- a/GPU/GLES/ShaderManager.cpp +++ b/GPU/GLES/ShaderManager.cpp @@ -416,7 +416,7 @@ void LinkedShader::UpdateUniforms(u32 vertType) { if (my_isinf(fogcoef[1])) { // not really sure what a sensible value might be. fogcoef[1] = fogcoef[1] < 0.0f ? -10000.0f : 10000.0f; - } else if (my_isnan(fogcoef[1])) { + } else if (my_isnan(fogcoef[1])) { // Workaround for https://github.com/hrydgard/ppsspp/issues/5384#issuecomment-38365988 // Just put the fog far away at a large finite distance. // Infinities and NaNs are rather unpredictable in shaders on many GPUs diff --git a/GPU/GLES/StateMapping.cpp b/GPU/GLES/StateMapping.cpp index 87b158b24e..1c4c0b214e 100644 --- a/GPU/GLES/StateMapping.cpp +++ b/GPU/GLES/StateMapping.cpp @@ -187,8 +187,8 @@ bool TransformDrawEngine::ApplyShaderBlending() { return false; } + framebufferManager_->BindFramebufferColor(GL_TEXTURE1, NULL); glActiveTexture(GL_TEXTURE1); - framebufferManager_->BindFramebufferColor(NULL); // If we are rendering at a higher resolution, linear is probably best for the dest color. glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); diff --git a/GPU/GLES/TextureCache.cpp b/GPU/GLES/TextureCache.cpp index a36d1fb499..3e18142685 100644 --- a/GPU/GLES/TextureCache.cpp +++ b/GPU/GLES/TextureCache.cpp @@ -1052,7 +1052,7 @@ void TextureCache::SetTextureFramebuffer(TexCacheEntry *entry, VirtualFramebuffe glBindTexture(GL_TEXTURE_2D, clutTexture); glActiveTexture(GL_TEXTURE0); - framebufferManager_->BindFramebufferColor(framebuffer, true); + framebufferManager_->BindFramebufferColor(GL_TEXTURE0, framebuffer, true); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); @@ -1087,7 +1087,7 @@ void TextureCache::SetTextureFramebuffer(TexCacheEntry *entry, VirtualFramebuffe gstate_c.textureSimpleAlpha = alphaStatus == TexCacheEntry::STATUS_ALPHA_SIMPLE; } else { entry->status &= ~TexCacheEntry::STATUS_DEPALETTIZE; - framebufferManager_->BindFramebufferColor(framebuffer); + framebufferManager_->BindFramebufferColor(GL_TEXTURE0, framebuffer); gstate_c.textureFullAlpha = framebuffer->drawnFormat == GE_FORMAT_565; gstate_c.textureSimpleAlpha = gstate_c.textureFullAlpha; diff --git a/Windows/D3D9Base.cpp b/Windows/D3D9Base.cpp index 899efe7409..069f643f9d 100644 --- a/Windows/D3D9Base.cpp +++ b/Windows/D3D9Base.cpp @@ -68,8 +68,6 @@ bool D3D9_Init(HWND hWnd, bool windowed, std::string *error_message) { g_pfnCreate9ex = (DIRECT3DCREATE9EX)GetProcAddress(hD3D9, "Direct3DCreate9Ex"); has9Ex = (g_pfnCreate9ex != NULL); - has9Ex = false; - if (has9Ex) { HRESULT result = g_pfnCreate9ex(D3D_SDK_VERSION, &d3dEx); d3d = d3dEx; @@ -143,8 +141,9 @@ bool D3D9_Init(HWND hWnd, bool windowed, std::string *error_message) { if (has9Ex) { if (windowed && IsWin7OrLater()) { // This new flip mode gives higher performance. - pp.BackBufferCount = 2; - pp.SwapEffect = D3DSWAPEFFECT_FLIPEX; + // TODO: This makes it slower? + //pp.BackBufferCount = 2; + //pp.SwapEffect = D3DSWAPEFFECT_FLIPEX; } hr = d3dEx->CreateDeviceEx(adapterId, D3DDEVTYPE_HAL, hWnd, dwBehaviorFlags, &pp, NULL, &deviceEx); device = deviceEx; @@ -172,7 +171,8 @@ bool D3D9_Init(HWND hWnd, bool windowed, std::string *error_message) { DX9::fbo_init(d3d); if (deviceEx && IsWin7OrLater()) { - deviceEx->SetMaximumFrameLatency(1); + // TODO: This makes it slower? + //deviceEx->SetMaximumFrameLatency(1); } return true;