Implement dual source blending to avoid unneeded alpha pass.

this implementation does not work in windows xp (sorry no support for dual source blending there).
this should improve speed on older hardware or in newer hardware using super sampling.
disable partial fix for 4x supersampling as I'm interested in knowing the original issue with the implementation to fix it correctly.
remove the deprecation label from the plugin while I'm working on it.
This commit is contained in:
Rodolfo Bogado 2013-03-28 20:08:51 -03:00
parent fb28349056
commit 40d919b352
5 changed files with 92 additions and 44 deletions

View File

@ -848,8 +848,17 @@ const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType
// single pass
if (dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND)
{
// Colors will be blended against the alpha from ocol1...
WRITE(p, "\tocol1 = prev;\n");
if(ApiType & API_D3D9)
{
//Colors will be blended against the color from ocol1 in D3D 9...
//ALPHA must be 0 or the shader will not compile( direct3d9 ex resriction)
WRITE(p, "\tocol1 = float4(prev.a, prev.a, prev.a, 0.0f);\n");
}
else
{
// Colors will be blended against the alpha from ocol1...
WRITE(p, "\tocol1 = prev;\n");
}
// ...and the alpha from ocol0 will be written to the framebuffer.
WRITE(p, "\tocol0.a = " I_ALPHA"[0].a;\n");
}

View File

@ -661,49 +661,73 @@ void Renderer::SetBlendMode(bool forceUpdate)
// Our render target always uses an alpha channel, so we need to override the blend functions to assume a destination alpha of 1 if the render target isn't supposed to have an alpha channel
// Example: D3DBLEND_DESTALPHA needs to be D3DBLEND_ONE since the result without an alpha channel is assumed to always be 1.
bool target_has_alpha = bpmem.zcontrol.pixel_format == PIXELFMT_RGBA6_Z24;
bool useDstAlpha = !g_ActiveConfig.bDstAlphaPass && bpmem.dstalpha.enable && bpmem.blendmode.alphaupdate && target_has_alpha;
bool useDualSource = useDstAlpha && g_ActiveConfig.backend_info.bSupportsDualSourceBlend;
const D3DBLEND d3dSrcFactors[8] =
{
D3DBLEND_ZERO,
D3DBLEND_ONE,
D3DBLEND_DESTCOLOR,
D3DBLEND_INVDESTCOLOR,
D3DBLEND_SRCALPHA,
D3DBLEND_INVSRCALPHA,
(target_has_alpha) ? D3DBLEND_DESTALPHA : D3DBLEND_ONE,
(target_has_alpha) ? D3DBLEND_INVDESTALPHA : D3DBLEND_ZERO
};
{
D3DBLEND_ZERO,
D3DBLEND_ONE,
D3DBLEND_DESTCOLOR,
D3DBLEND_INVDESTCOLOR,
(useDualSource) ? D3DBLEND_SRCCOLOR2 : D3DBLEND_SRCALPHA,
(useDualSource) ? D3DBLEND_INVSRCCOLOR2 : D3DBLEND_INVSRCALPHA,
(target_has_alpha) ? D3DBLEND_DESTALPHA : D3DBLEND_ONE,
(target_has_alpha) ? D3DBLEND_INVDESTALPHA : D3DBLEND_ZERO
};
const D3DBLEND d3dDestFactors[8] =
{
D3DBLEND_ZERO,
D3DBLEND_ONE,
D3DBLEND_SRCCOLOR,
D3DBLEND_INVSRCCOLOR,
D3DBLEND_SRCALPHA,
D3DBLEND_INVSRCALPHA,
(target_has_alpha) ? D3DBLEND_DESTALPHA : D3DBLEND_ONE,
(target_has_alpha) ? D3DBLEND_INVDESTALPHA : D3DBLEND_ZERO
};
D3DBLEND_SRCCOLOR,
D3DBLEND_INVSRCCOLOR,
(useDualSource) ? D3DBLEND_SRCCOLOR2 : D3DBLEND_SRCALPHA,
(useDualSource) ? D3DBLEND_INVSRCCOLOR2 : D3DBLEND_INVSRCALPHA,
(target_has_alpha) ? D3DBLEND_DESTALPHA : D3DBLEND_ONE,
(target_has_alpha) ? D3DBLEND_INVDESTALPHA : D3DBLEND_ZERO
};
if (bpmem.blendmode.logicopenable && !forceUpdate)
{
D3D::SetRenderState(D3DRS_SEPARATEALPHABLENDENABLE , false);
return;
}
if (bpmem.blendmode.subtract && bpmem.blendmode.blendenable)
{
D3D::SetRenderState(D3DRS_ALPHABLENDENABLE, true);
D3D::SetRenderState(D3DRS_BLENDOP, D3DBLENDOP_REVSUBTRACT);
D3D::SetRenderState(D3DRS_SRCBLEND, D3DBLEND_ONE);
D3D::SetRenderState(D3DRS_DESTBLEND, D3DBLEND_ONE);
}
else
{
D3D::SetRenderState(D3DRS_ALPHABLENDENABLE, bpmem.blendmode.blendenable);
if (bpmem.blendmode.blendenable)
{
D3D::SetRenderState(D3DRS_BLENDOP, D3DBLENDOP_ADD);
D3D::SetRenderState(D3DRS_SRCBLEND, d3dSrcFactors[bpmem.blendmode.srcfactor]);
D3D::SetRenderState(D3DRS_DESTBLEND, d3dDestFactors[bpmem.blendmode.dstfactor]);
bool BlendEnable = bpmem.blendmode.subtract || bpmem.blendmode.blendenable;
D3D::SetRenderState(D3DRS_ALPHABLENDENABLE, BlendEnable);
D3D::SetRenderState(D3DRS_SEPARATEALPHABLENDENABLE , BlendEnable);
if (BlendEnable)
{
D3DBLENDOP op = D3DBLENDOP_ADD;
u32 srcidx = bpmem.blendmode.srcfactor;
u32 dstidx = bpmem.blendmode.dstfactor;
if (bpmem.blendmode.subtract)
{
op = D3DBLENDOP_REVSUBTRACT;
srcidx = GX_BL_ONE;
dstidx = GX_BL_ONE;
}
D3D::SetRenderState(D3DRS_BLENDOP, op);
D3D::SetRenderState(D3DRS_SRCBLEND, d3dSrcFactors[srcidx]);
D3D::SetRenderState(D3DRS_DESTBLEND, d3dDestFactors[dstidx]);
if (useDualSource)
{
op = D3DBLENDOP_ADD;
srcidx = GX_BL_ONE;
dstidx = GX_BL_ZERO;
}
}
else
{
// we can't use D3DBLEND_DESTCOLOR or D3DBLEND_INVDESTCOLOR for source in alpha channel so use their alpha equivalent instead
if (srcidx == GX_BL_DSTCLR) srcidx = GX_BL_DSTALPHA;
if (srcidx == GX_BL_INVDSTCLR) srcidx = GX_BL_INVDSTALPHA;
// we can't use D3DBLEND_SRCCOLOR or D3DBLEND_INVSRCCOLOR for destination in alpha channel so use their alpha equivalent instead
if (dstidx == GX_BL_SRCCLR) dstidx = GX_BL_SRCALPHA;
if (dstidx == GX_BL_INVSRCCLR) dstidx = GX_BL_INVSRCALPHA;
}
D3D::SetRenderState(D3DRS_BLENDOPALPHA, op);
D3D::SetRenderState(D3DRS_SRCBLENDALPHA, d3dSrcFactors[srcidx]);
D3D::SetRenderState(D3DRS_DESTBLENDALPHA, d3dDestFactors[dstidx]);
}
}
bool Renderer::SaveScreenshot(const std::string &filename, const TargetRectangle &dst_rect)

View File

@ -361,7 +361,12 @@ void VertexManager::vFlush()
VertexShaderManager::SetConstants();
PixelShaderManager::SetConstants();
u32 stride = g_nativeVertexFmt->GetVertexStride();
if (!PixelShaderCache::SetShader(DSTALPHA_NONE,g_nativeVertexFmt->m_components))
bool useDstAlpha = !g_ActiveConfig.bDstAlphaPass && bpmem.dstalpha.enable && bpmem.blendmode.alphaupdate &&
bpmem.zcontrol.pixel_format == PIXELFMT_RGBA6_Z24;
bool useDualSource = useDstAlpha && g_ActiveConfig.backend_info.bSupportsDualSourceBlend;
DSTALPHA_MODE AlphaMode = useDualSource ? DSTALPHA_DUAL_SOURCE_BLEND : DSTALPHA_NONE;
if (!PixelShaderCache::SetShader(AlphaMode ,g_nativeVertexFmt->m_components))
{
GFX_DEBUGGER_PAUSE_LOG_AT(NEXT_ERROR,true,{printf("Fail to set pixel shader\n");});
goto shader_fail;
@ -383,9 +388,7 @@ void VertexManager::vFlush()
DrawVertexArray(stride);
}
bool useDstAlpha = !g_ActiveConfig.bDstAlphaPass && bpmem.dstalpha.enable && bpmem.blendmode.alphaupdate &&
bpmem.zcontrol.pixel_format == PIXELFMT_RGBA6_Z24;
if (useDstAlpha)
if (useDstAlpha && !useDualSource)
{
if (!PixelShaderCache::SetShader(DSTALPHA_ALPHA_PASS, g_nativeVertexFmt->m_components))
{

View File

@ -111,8 +111,7 @@ void VertexShaderCache::Init()
"{\n"
"VSOUTPUT OUT;"
"OUT.vPosition = inPosition;\n"
// HACK: Scale the texture coordinate range from (0,width) to (0,width-1), otherwise the linear filter won't average our samples correctly
"OUT.vTexCoord = inTEX0 * (float2(1.f,1.f) / inInvTexSize - float2(1.f,1.f)) * inInvTexSize;\n"
"OUT.vTexCoord = inTEX0;\n"
"OUT.vTexCoord1 = inTEX2;\n"
"return OUT;\n"
"}\n");

View File

@ -89,7 +89,7 @@ std::string VideoBackend::GetName()
std::string VideoBackend::GetDisplayName()
{
return "Direct3D9 (deprecated)";
return "Direct3D9";
}
void InitBackendInfo()
@ -97,11 +97,24 @@ void InitBackendInfo()
DX9::D3D::Init();
const int shaderModel = ((DX9::D3D::GetCaps().PixelShaderVersion >> 8) & 0xFF);
const int maxConstants = (shaderModel < 3) ? 32 : ((shaderModel < 4) ? 224 : 65536);
g_Config.backend_info.APIType = shaderModel < 3 ? API_D3D9_SM20 :API_D3D9_SM30;
g_Config.backend_info.APIType = shaderModel < 3 ? API_D3D9_SM20 : API_D3D9_SM30;
g_Config.backend_info.bUseRGBATextures = false;
g_Config.backend_info.bUseMinimalMipCount = true;
g_Config.backend_info.bSupports3DVision = true;
g_Config.backend_info.bSupportsDualSourceBlend = false;
OSVERSIONINFO info;
ZeroMemory(&info, sizeof(OSVERSIONINFO));
info.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
if (GetVersionEx(&info))
{
// dual source blending is only supported in windows 7 o newer. sorry xp users
g_Config.backend_info.bSupportsDualSourceBlend = info.dwPlatformId == VER_PLATFORM_WIN32_NT && info.dwMajorVersion > 5;
}
else
{
g_Config.backend_info.bSupportsDualSourceBlend = false;
}
g_Config.backend_info.bSupportsFormatReinterpretation = true;
g_Config.backend_info.bSupportsPixelLighting = C_PLIGHTS + 40 <= maxConstants && C_PMATERIALS + 4 <= maxConstants;