Get shader color write masking going on all backends.

This commit is contained in:
Henrik Rydgård 2020-11-08 23:17:06 +01:00
parent 7632c12189
commit 6310af25fa
22 changed files with 325 additions and 165 deletions

View File

@ -1001,6 +1001,32 @@ void GLQueueRunner::PerformRenderPass(const GLRStep &step, bool first, bool last
CHECK_GL_ERROR_IF_DEBUG();
break;
}
case GLRRenderCommand::UNIFORM4UI:
{
_dbg_assert_(curProgram);
int loc = c.uniform4.loc ? *c.uniform4.loc : -1;
if (c.uniform4.name) {
loc = curProgram->GetUniformLoc(c.uniform4.name);
}
if (loc >= 0) {
switch (c.uniform4.count) {
case 1:
glUniform1uiv(loc, 1, (GLuint *)&c.uniform4.v[0]);
break;
case 2:
glUniform2uiv(loc, 1, (GLuint *)c.uniform4.v);
break;
case 3:
glUniform3uiv(loc, 1, (GLuint *)c.uniform4.v);
break;
case 4:
glUniform4uiv(loc, 1, (GLuint *)c.uniform4.v);
break;
}
}
CHECK_GL_ERROR_IF_DEBUG();
break;
}
case GLRRenderCommand::UNIFORM4I:
{
_dbg_assert_(curProgram);

View File

@ -42,6 +42,7 @@ enum class GLRRenderCommand : uint8_t {
BLENDCOLOR,
LOGICOP,
UNIFORM4I,
UNIFORM4UI,
UNIFORM4F,
UNIFORMMATRIX,
TEXTURESAMPLER,

View File

@ -659,6 +659,30 @@ public:
curRenderStep_->commands.push_back(data);
}
void SetUniformUI(const GLint *loc, int count, const uint32_t *udata) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
#ifdef _DEBUG
_dbg_assert_(curProgram_);
#endif
GLRRenderData data{ GLRRenderCommand::UNIFORM4UI };
data.uniform4.loc = loc;
data.uniform4.count = count;
memcpy(data.uniform4.v, udata, sizeof(uint32_t) * count);
curRenderStep_->commands.push_back(data);
}
void SetUniformUI1(const GLint *loc, uint32_t udata) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
#ifdef _DEBUG
_dbg_assert_(curProgram_);
#endif
GLRRenderData data{ GLRRenderCommand::UNIFORM4UI };
data.uniform4.loc = loc;
data.uniform4.count = 1;
memcpy(data.uniform4.v, &udata, sizeof(udata));
curRenderStep_->commands.push_back(data);
}
void SetUniformF(const GLint *loc, int count, const float *udata) {
_dbg_assert_(curRenderStep_ && curRenderStep_->stepType == GLRStepType::RENDER);
#ifdef _DEBUG

View File

@ -23,6 +23,7 @@ const char *hlsl_preamble_fs =
"#define vec3 float3\n"
"#define vec4 float4\n"
"#define uvec3 uint3\n"
"#define uvec4 uint4\n"
"#define ivec3 int3\n"
"#define ivec4 int4\n"
"#define mat4 float4x4\n"

View File

@ -1543,7 +1543,8 @@ void VKContext::BindFramebufferAsRenderTarget(Framebuffer *fbo, const RenderPass
void VKContext::BindFramebufferAsTexture(Framebuffer *fbo, int binding, FBChannel channelBit, int attachment) {
VKFramebuffer *fb = (VKFramebuffer *)fbo;
// TODO: There are cases where this is okay, actually.
// TODO: There are cases where this is okay, actually. But requires layout transitions and stuff -
// we're not ready for this.
_assert_(fb != curFramebuffer_);
int aspect = 0;

View File

@ -80,6 +80,12 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
bool doFlatShading = id.Bit(FS_BIT_FLATSHADE);
bool shaderDepal = id.Bit(FS_BIT_SHADER_DEPAL);
bool bgraTexture = id.Bit(FS_BIT_BGRA_TEXTURE);
bool colorWriteMask = id.Bit(FS_BIT_COLOR_WRITEMASK);
if (colorWriteMask && !compat.bitwiseOps) {
*errorString = "Color Write Mask requires bitwise ops";
return false;
}
GEComparison alphaTestFunc = (GEComparison)id.Bits(FS_BIT_ALPHA_TEST_FUNC, 3);
GEComparison colorTestFunc = (GEComparison)id.Bits(FS_BIT_COLOR_TEST_FUNC, 2);
@ -104,7 +110,13 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
bool earlyFragmentTests = ((!enableAlphaTest && !enableColorTest) || testForceToZero) && !gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT);
bool useAdrenoBugWorkaround = id.Bit(FS_BIT_NO_DEPTH_CANNOT_DISCARD_STENCIL);
bool readFramebufferTex = replaceBlend == REPLACE_BLEND_COPY_FBO && !gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH);
bool readFramebuffer = replaceBlend == REPLACE_BLEND_COPY_FBO || colorWriteMask;
bool readFramebufferTex = readFramebuffer && !gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH);
if (readFramebuffer && compat.shaderLanguage == HLSL_D3D9) {
*errorString = "Framebuffer read not yet supported in HLSL D3D9";
return false;
}
if (compat.shaderLanguage == ShaderLanguage::GLSL_VULKAN) {
if (earlyFragmentTests) {
@ -188,11 +200,9 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
} else {
WRITE(p, "SamplerState samp : register(s0);\n");
WRITE(p, "Texture2D<vec4> tex : register(t0);\n");
if (!isModeClear && replaceBlend > REPLACE_BLEND_STANDARD) {
if (replaceBlend == REPLACE_BLEND_COPY_FBO) {
// No sampler required, we Load
WRITE(p, "Texture2D<vec4> fboTex : register(t1);\n");
}
if (readFramebufferTex) {
// No sampler required, we Load
WRITE(p, "Texture2D<vec4> fboTex : register(t1);\n");
}
WRITE(p, "cbuffer base : register(b0) {\n%s};\n", ub_baseStr);
}
@ -207,7 +217,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
}
if (enableColorTest) {
if (compat.shaderLanguage == HLSL_D3D11) {
WRITE(p, "uvec3 roundAndScaleTo255iv(float3 x) { return uvec3(floor(x * 255.0f + 0.5f)); }\n");
WRITE(p, "uvec3 roundAndScaleTo255iv(float3 x) { return (floor(x * 255.0f + 0.5f)); }\n");
} else {
WRITE(p, "vec3 roundAndScaleTo255v(float3 x) { return floor(x * 255.0f + 0.5f); }\n");
}
@ -225,7 +235,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
if (enableFog) {
WRITE(p, " float v_fogdepth: TEXCOORD1;\n");
}
if (compat.shaderLanguage == HLSL_D3D11 && ((replaceBlend == REPLACE_BLEND_COPY_FBO) || gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT))) {
if (compat.shaderLanguage == HLSL_D3D11 && readFramebuffer) {
WRITE(p, " vec4 pixelPos : SV_POSITION;\n");
}
WRITE(p, "};\n");
@ -286,14 +296,15 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
if (doTexture)
WRITE(p, "uniform sampler2D tex;\n");
if (readFramebufferTex) {
if (!compat.texelFetch) {
WRITE(p, "uniform vec2 u_fbotexSize;\n");
}
WRITE(p, "uniform sampler2D fbotex;\n");
}
if (!isModeClear && replaceBlend > REPLACE_BLEND_STANDARD) {
*uniformMask |= DIRTY_SHADERBLEND;
if (readFramebufferTex) {
if (!compat.texelFetch) {
WRITE(p, "uniform vec2 u_fbotexSize;\n");
}
WRITE(p, "uniform sampler2D fbotex;\n");
}
if (replaceBlendFuncA >= GE_SRCBLEND_FIXA) {
WRITE(p, "uniform vec3 u_blendFixA;\n");
}
@ -329,6 +340,11 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
*uniformMask |= DIRTY_DEPAL;
}
if (colorWriteMask) {
WRITE(p, "uniform uint u_colorWriteMask;\n");
*uniformMask |= DIRTY_COLORWRITEMASK;
}
if (stencilToAlpha && replaceAlphaWithStencilType == STENCIL_VALUE_UNIFORM) {
*uniformMask |= DIRTY_STENCILREPLACEVALUE;
WRITE(p, "uniform float u_stencilReplaceValue;\n");
@ -387,6 +403,20 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
}
// Provide implementations of packUnorm4x8 and unpackUnorm4x8 if not available.
if (colorWriteMask && compat.shaderLanguage == HLSL_D3D11 || (compat.shaderLanguage == GLSL_3xx && compat.glslVersionNumber < 400)) {
WRITE(p, "uint packUnorm4x8(vec4 v) {\n");
WRITE(p, " v = clamp(v, 0.0, 1.0);\n");
WRITE(p, " uvec4 u = uvec4(255.0 * v);\n");
WRITE(p, " return u.x | (u.y << 8) | (u.z << 16) | (u.w << 24);\n");
WRITE(p, "}\n");
WRITE(p, "vec4 unpackUnorm4x8(uint x) {\n");
WRITE(p, " uvec4 u = uvec4(x & 0xFFU, (x >> 8) & 0xFFU, (x >> 16) & 0xFFU, (x >> 24) & 0xFFU);\n");
WRITE(p, " return vec4(u) * (1.0 / 255.0);\n");
WRITE(p, "}\n");
}
// PowerVR needs a custom modulo function. For some reason, this has far higher precision than the builtin one.
if ((gl_extensions.bugs & BUG_PVR_SHADER_PRECISION_BAD) && needShaderTexClamp) {
WRITE(p, "float mymod(float a, float b) { return a - b * floor(a / b); }\n");
@ -416,6 +446,21 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
if (isModeClear) {
// Clear mode does not allow any fancy shading.
WRITE(p, " vec4 v = v_color0;\n");
// Masking with clear mode is ok, I think?
if (readFramebuffer) {
if (compat.shaderLanguage == HLSL_D3D11) {
WRITE(p, " vec4 destColor = fboTex.Load(int3((int)In.pixelPos.x, (int)In.pixelPos.y, 0));\n");
} else if (gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH)) {
// If we have NV_shader_framebuffer_fetch / EXT_shader_framebuffer_fetch, we skip the blit.
// We can just read the prev value more directly.
WRITE(p, " lowp vec4 destColor = %s;\n", compat.lastFragData);
} else if (!compat.texelFetch) {
WRITE(p, " lowp vec4 destColor = %s(fbotex, gl_FragCoord.xy * u_fbotexSize.xy);\n", compat.texture);
} else {
WRITE(p, " lowp vec4 destColor = %s(fbotex, ivec2(gl_FragCoord.x, gl_FragCoord.y), 0);\n", compat.texelFetch);
}
}
} else {
const char *secondary = "";
// Secondary color for specular on top of texture
@ -799,19 +844,22 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
WRITE(p, " v.rgb = v.rgb * %s;\n", srcFactor);
}
if (replaceBlend == REPLACE_BLEND_COPY_FBO && compat.shaderLanguage != HLSL_D3D9) {
// If we have NV_shader_framebuffer_fetch / EXT_shader_framebuffer_fetch, we skip the blit.
// We can just read the prev value more directly.
// Two things read from the old framebuffer - shader replacement blending and bit-level masking.
if (readFramebuffer) {
if (compat.shaderLanguage == HLSL_D3D11) {
WRITE(p, " vec4 destColor = fboTex.Load(int3((int)In.pixelPos.x, (int)In.pixelPos.y, 0));\n");
} else if (gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH)) {
// If we have NV_shader_framebuffer_fetch / EXT_shader_framebuffer_fetch, we skip the blit.
// We can just read the prev value more directly.
WRITE(p, " lowp vec4 destColor = %s;\n", compat.lastFragData);
} else if (!compat.texelFetch) {
WRITE(p, " lowp vec4 destColor = %s(fbotex, gl_FragCoord.xy * u_fbotexSize.xy);\n", compat.texture);
} else {
WRITE(p, " lowp vec4 destColor = %s(fbotex, ivec2(gl_FragCoord.x, gl_FragCoord.y), 0);\n", compat.texelFetch);
}
}
if (replaceBlend == REPLACE_BLEND_COPY_FBO) {
const char *srcFactor = nullptr;
const char *dstFactor = nullptr;
@ -927,6 +975,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
return false;
}
// TODO: This could support more ops using the shader blending mechanism.
LogicOpReplaceType replaceLogicOpType = (LogicOpReplaceType)id.Bits(FS_BIT_REPLACE_LOGIC_OP_TYPE, 2);
switch (replaceLogicOpType) {
case LOGICOPTYPE_ONE:
@ -943,6 +992,17 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
return false;
}
// Final color computed - apply color write mask.
// TODO: Maybe optimize to only do math on the affected channels?
// Or .. meh.
if (colorWriteMask) {
WRITE(p, " highp uint v32 = packUnorm4x8(v);\n");
WRITE(p, " highp uint d32 = packUnorm4x8(destColor);\n");
// Note that the mask has been flipped to the PC way - 1 means write.
WRITE(p, " v32 = (v32 & u_colorWriteMask) | (d32 & ~u_colorWriteMask);\n");
WRITE(p, " v = unpackUnorm4x8(v32);\n");
}
if (gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT)) {
const double scale = DepthSliceFactor() * 65535.0;

View File

@ -956,8 +956,68 @@ void ApplyStencilReplaceAndLogicOpIgnoreBlend(ReplaceAlphaType replaceAlphaWithS
}
}
// Called even if AlphaBlendEnable == false - it also deals with stencil-related blend state.
bool IsColorWriteMaskComplex(bool allowFramebufferRead) {
// Restrict to Outrun temporarily (by uglily reusing the ReinterpretFramebuffers flag)
if (!allowFramebufferRead || !PSP_CoreParameter().compat.flags().ReinterpretFramebuffers) {
// Don't have a choice - we'll make do but it won't always be right.
return false;
}
uint32_t colorMask = (gstate.pmskc & 0xFFFFFF) | (gstate.pmska << 24);
for (int i = 0; i < 4; i++) {
switch (colorMask & 0xFF) {
case 0x0:
case 0xFF:
break;
default:
return true;
}
colorMask >>= 8;
}
return false;
}
// If we can we emulate the colorMask by simply toggling the full R G B A masks offered
// by modern hardware, we do that. This is 99.9% of the time.
// When that's not enough, we fall back on a technique similar to shader blending,
// we read from the framebuffer (or a copy of it).
void ConvertMaskState(GenericMaskState &maskState, bool allowFramebufferRead) {
// Invert to convert masks from the PSP's format where 1 is don't draw to PC where 1 is draw.
uint32_t colorMask = ~((gstate.pmskc & 0xFFFFFF) | (gstate.pmska << 24));
maskState.applyFramebufferRead = false;
for (int i = 0; i < 4; i++) {
int channelMask = colorMask & 0xFF;
switch (channelMask) {
case 0x0:
maskState.rgba[i] = false;
break;
case 0xFF:
maskState.rgba[i] = true;
break;
default:
if (allowFramebufferRead) {
maskState.applyFramebufferRead = true;
maskState.rgba[i] = true;
} else {
// Use the old heuristic.
maskState.rgba[i] = channelMask >= 128;
}
}
colorMask >>= 8;
}
// Let's not write to alpha if stencil isn't enabled.
if (IsStencilTestOutputDisabled()) {
maskState.rgba[3] = false;
} else if (ReplaceAlphaWithStencilType() == STENCIL_VALUE_KEEP) {
// If the stencil type is set to KEEP, we shouldn't write to the stencil/alpha channel.
maskState.rgba[3] = false;
}
}
// Called even if AlphaBlendEnable == false - it also deals with stencil-related blend state.
void ConvertBlendState(GenericBlendState &blendState, bool allowFramebufferRead) {
// Blending is a bit complex to emulate. This is due to several reasons:
//

View File

@ -1,5 +1,6 @@
#pragma once
#include <cstdint>
#include "Common/CommonTypes.h"
#include "GPU/ge_constants.h"
@ -25,12 +26,18 @@ enum ReplaceAlphaType {
};
enum ReplaceBlendType {
REPLACE_BLEND_NO,
REPLACE_BLEND_NO, // Blend function handled directly with blend states.
REPLACE_BLEND_STANDARD,
// SRC part of blend function handled in-shader.
REPLACE_BLEND_PRE_SRC,
REPLACE_BLEND_PRE_SRC_2X_ALPHA,
REPLACE_BLEND_2X_ALPHA,
REPLACE_BLEND_2X_SRC,
// Full blend equation runs in shader.
// We might have to make a copy of the framebuffer target to read from.
REPLACE_BLEND_COPY_FBO,
};
@ -47,6 +54,9 @@ bool IsAlphaTestAgainstZero();
bool NeedsTestDiscard();
bool IsStencilTestOutputDisabled();
// If not, we have to emulate it in the shader, similar to blend replace.
bool IsColorMaskSimple(uint32_t colorMask);
StencilValueType ReplaceAlphaWithStencilType();
ReplaceAlphaType ReplaceAlphaWithStencil(ReplaceBlendType replaceBlend);
ReplaceBlendType ReplaceBlendWithShader(bool allowShaderBlend, GEBufferFormat bufferFormat);
@ -160,6 +170,15 @@ struct GenericBlendState {
void ConvertBlendState(GenericBlendState &blendState, bool allowShaderBlend);
void ApplyStencilReplaceAndLogicOpIgnoreBlend(ReplaceAlphaType replaceAlphaWithStencil, GenericBlendState &blendState);
struct GenericMaskState {
bool applyFramebufferRead;
uint32_t uniformMask; // For each bit, opposite to the PSP.
bool rgba[4]; // true = draw, false = don't draw this channel
};
void ConvertMaskState(GenericMaskState &maskState, bool allowFramebufferRead);
bool IsColorWriteMaskComplex(bool allowFramebufferRead);
struct GenericStencilFuncState {
bool enabled;
GEComparison testFunc;

View File

@ -41,7 +41,7 @@ enum DebugShaderStringType {
};
// Shared between the backends. Not all are necessarily used by each backend, but this lets us share
// more code than before.
// more code than before. TODO: Can probably cut the number of these down without too much slowdown.
enum : uint64_t {
DIRTY_PROJMATRIX = 1ULL << 0,
DIRTY_PROJTHROUGHMATRIX = 1ULL << 1,
@ -86,12 +86,13 @@ enum : uint64_t {
DIRTY_CULLRANGE = 1ULL << 34,
DIRTY_DEPAL = 1ULL << 35,
DIRTY_COLORWRITEMASK = 1ULL << 36,
// space for 5 more uniform dirty flags. Remember to update DIRTY_ALL_UNIFORMS.
// space for 4 more uniform dirty flags. Remember to update DIRTY_ALL_UNIFORMS.
DIRTY_BONE_UNIFORMS = 0xFF000000ULL,
DIRTY_ALL_UNIFORMS = 0xFFFFFFFFFULL,
DIRTY_ALL_UNIFORMS = 0x1FFFFFFFFFULL,
DIRTY_ALL_LIGHTS = DIRTY_LIGHT0 | DIRTY_LIGHT1 | DIRTY_LIGHT2 | DIRTY_LIGHT3,
// Other dirty elements that aren't uniforms!

View File

@ -171,6 +171,7 @@ std::string FragmentShaderDesc(const FShaderID &id) {
if (id.Bit(FS_BIT_FLATSHADE)) desc << "Flat ";
if (id.Bit(FS_BIT_BGRA_TEXTURE)) desc << "BGRA ";
if (id.Bit(FS_BIT_SHADER_DEPAL)) desc << "Depal ";
if (id.Bit(FS_BIT_COLOR_WRITEMASK)) desc << "WriteMask ";
if (id.Bit(FS_BIT_SHADER_TEX_CLAMP)) {
desc << "TClamp";
if (id.Bit(FS_BIT_CLAMP_S)) desc << "S";
@ -239,6 +240,7 @@ void ComputeFragmentShaderID(FShaderID *id_out, const Draw::Bugs &bugs) {
bool doTextureAlpha = gstate.isTextureAlphaUsed();
bool doFlatShading = gstate.getShadeMode() == GE_SHADE_FLAT;
bool useShaderDepal = gstate_c.useShaderDepal;
bool colorWriteMask = IsColorWriteMaskComplex(gstate_c.allowFramebufferRead);
// Note how we here recompute some of the work already done in state mapping.
// Not ideal! At least we share the code.
@ -309,6 +311,7 @@ void ComputeFragmentShaderID(FShaderID *id_out, const Draw::Bugs &bugs) {
id.SetBit(FS_BIT_FLATSHADE, doFlatShading);
id.SetBit(FS_BIT_SHADER_DEPAL, useShaderDepal);
id.SetBit(FS_BIT_COLOR_WRITEMASK, colorWriteMask);
if (g_Config.bVendorBugChecksEnabled) {
if (bugs.Has(Draw::Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL)) {

View File

@ -92,6 +92,7 @@ enum FShaderBit : uint8_t {
FS_BIT_BGRA_TEXTURE = 47,
FS_BIT_TEST_DISCARD_TO_ZERO = 48,
FS_BIT_NO_DEPTH_CANNOT_DISCARD_STENCIL = 49,
FS_BIT_COLOR_WRITEMASK = 50,
};
static inline FShaderBit operator +(FShaderBit bit, int i) {

View File

@ -203,6 +203,10 @@ void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipView
Uint8x3ToFloat4_AlphaUint8(ub->matAmbient, gstate.materialambient, gstate.getMaterialAmbientA());
}
if (dirtyUniforms & DIRTY_COLORWRITEMASK) {
ub->colorWriteMask = ~((gstate.pmska << 24) | (gstate.pmskc & 0xFFFFFF));
}
// Texturing
if (dirtyUniforms & DIRTY_UVSCALEOFFSET) {
const float invW = 1.0f / (float)gstate_c.curTextureWidth;

View File

@ -10,7 +10,7 @@ enum : uint64_t {
DIRTY_BASE_UNIFORMS =
DIRTY_WORLDMATRIX | DIRTY_PROJTHROUGHMATRIX | DIRTY_VIEWMATRIX | DIRTY_TEXMATRIX | DIRTY_ALPHACOLORREF |
DIRTY_PROJMATRIX | DIRTY_FOGCOLOR | DIRTY_FOGCOEF | DIRTY_TEXENV | DIRTY_STENCILREPLACEVALUE |
DIRTY_ALPHACOLORMASK | DIRTY_SHADERBLEND | DIRTY_UVSCALEOFFSET | DIRTY_TEXCLAMP | DIRTY_DEPTHRANGE | DIRTY_MATAMBIENTALPHA |
DIRTY_ALPHACOLORMASK | DIRTY_SHADERBLEND | DIRTY_COLORWRITEMASK | DIRTY_UVSCALEOFFSET | DIRTY_TEXCLAMP | DIRTY_DEPTHRANGE | DIRTY_MATAMBIENTALPHA |
DIRTY_BEZIERSPLINE | DIRTY_DEPAL,
DIRTY_LIGHT_UNIFORMS =
DIRTY_LIGHT0 | DIRTY_LIGHT1 | DIRTY_LIGHT2 | DIRTY_LIGHT3 |
@ -30,10 +30,10 @@ struct UB_VS_FS_Base {
float depthRange[4];
float fogCoef[2]; float stencil; float pad0;
float matAmbient[4];
uint32_t spline_counts; uint32_t depal_mask_shift_off_fmt; // 4 params packed into one.
int pad2; int pad3;
float cullRangeMin[4];
float cullRangeMax[4];
uint32_t spline_counts; uint32_t depal_mask_shift_off_fmt; // 4 params packed into one.
uint32_t colorWriteMask; int pad3;
// Fragment data
float fogColor[4];
float texEnvColor[4];
@ -56,17 +56,17 @@ R"( mat4 u_proj;
vec2 u_fogcoef;
float u_stencilReplaceValue;
vec4 u_matambientalpha;
uint u_spline_counts;
uint u_depal_mask_shift_off_fmt;
int u_pad2;
int u_pad3;
vec4 u_cullRangeMin;
vec4 u_cullRangeMax;
uint u_spline_counts;
uint u_depal_mask_shift_off_fmt;
uint u_colorWriteMask;
int u_pad3;
vec3 u_fogcolor;
vec3 u_texenv;
ivec4 u_alphacolorref;
ivec4 u_alphacolormask;
vec3 u_blendFixA;
vec3 u_blendFixA; uint colorWriteMask;
vec3 u_blendFixB;
vec4 u_texclamp;
vec2 u_texclampoff;

View File

@ -158,6 +158,10 @@ void DrawEngineD3D11::ApplyDrawState(int prim) {
// Set blend - unless we need to do it in the shader.
GenericBlendState blendState;
ConvertBlendState(blendState, gstate_c.allowFramebufferRead);
GenericMaskState maskState;
ConvertMaskState(maskState, gstate_c.allowFramebufferRead);
if (blendState.applyFramebufferRead) {
if (ApplyFramebufferRead(&fboTexNeedsBind_)) {
// The shader takes over the responsibility for blending, so recompute.
@ -170,6 +174,7 @@ void DrawEngineD3D11::ApplyDrawState(int prim) {
gstate_c.Dirty(DIRTY_FRAGMENTSHADER_STATE);
} else if (blendState.resetFramebufferRead) {
ResetFramebufferRead();
gstate_c.Dirty(DIRTY_FRAGMENTSHADER_STATE);
}
if (blendState.enabled) {
@ -204,38 +209,7 @@ void DrawEngineD3D11::ApplyDrawState(int prim) {
}
}
// PSP color/alpha mask is per bit but we can only support per byte.
// But let's do that, at least. And let's try a threshold.
bool rmask = (gstate.pmskc & 0xFF) < 128;
bool gmask = ((gstate.pmskc >> 8) & 0xFF) < 128;
bool bmask = ((gstate.pmskc >> 16) & 0xFF) < 128;
bool amask = (gstate.pmska & 0xFF) < 128;
#ifndef MOBILE_DEVICE
u8 abits = (gstate.pmska >> 0) & 0xFF;
u8 rbits = (gstate.pmskc >> 0) & 0xFF;
u8 gbits = (gstate.pmskc >> 8) & 0xFF;
u8 bbits = (gstate.pmskc >> 16) & 0xFF;
if ((rbits != 0 && rbits != 0xFF) || (gbits != 0 && gbits != 0xFF) || (bbits != 0 && bbits != 0xFF)) {
WARN_LOG_REPORT_ONCE(rgbmask, G3D, "Unsupported RGB mask: r=%02x g=%02x b=%02x", rbits, gbits, bbits);
}
if (abits != 0 && abits != 0xFF) {
// The stencil part of the mask is supported.
WARN_LOG_REPORT_ONCE(amask, G3D, "Unsupported alpha/stencil mask: %02x", abits);
}
#endif
// Let's not write to alpha if stencil isn't enabled.
if (IsStencilTestOutputDisabled()) {
amask = false;
} else {
// If the stencil type is set to KEEP, we shouldn't write to the stencil/alpha channel.
if (ReplaceAlphaWithStencilType() == STENCIL_VALUE_KEEP) {
amask = false;
}
}
keys_.blend.colorWriteMask = (rmask ? 1 : 0) | (gmask ? 2 : 0) | (bmask ? 4 : 0) | (amask ? 8 : 0);
keys_.blend.colorWriteMask = (maskState.rgba[0] ? 1 : 0) | (maskState.rgba[1] ? 2 : 0) | (maskState.rgba[2] ? 4 : 0) | (maskState.rgba[3] ? 8 : 0);
}
if (!device1_) {

View File

@ -129,7 +129,10 @@ void DrawEngineDX9::ApplyDrawState(int prim) {
GenericBlendState blendState;
ConvertBlendState(blendState, gstate_c.allowFramebufferRead);
if (blendState.applyFramebufferRead) {
GenericMaskState maskState;
ConvertMaskState(maskState, gstate_c.allowFramebufferRead);
if (blendState.applyFramebufferRead || maskState.applyFramebufferRead) {
if (ApplyFramebufferRead(&fboTexNeedsBind_)) {
// The shader takes over the responsibility for blending, so recompute.
ApplyStencilReplaceAndLogicOpIgnoreBlend(blendState.replaceAlphaWithStencil, blendState);
@ -160,24 +163,7 @@ void DrawEngineDX9::ApplyDrawState(int prim) {
dxstate.blend.disable();
}
// PSP color/alpha mask is per bit but we can only support per byte.
// But let's do that, at least. And let's try a threshold.
bool rmask = (gstate.pmskc & 0xFF) < 128;
bool gmask = ((gstate.pmskc >> 8) & 0xFF) < 128;
bool bmask = ((gstate.pmskc >> 16) & 0xFF) < 128;
bool amask = (gstate.pmska & 0xFF) < 128;
// Let's not write to alpha if stencil isn't enabled.
if (IsStencilTestOutputDisabled()) {
amask = false;
} else {
// If the stencil type is set to KEEP, we shouldn't write to the stencil/alpha channel.
if (ReplaceAlphaWithStencilType() == STENCIL_VALUE_KEEP) {
amask = false;
}
}
dxstate.colorMask.set(rmask, gmask, bmask, amask);
dxstate.colorMask.set(maskState.rgba[0], maskState.rgba[1], maskState.rgba[2], maskState.rgba[3]);
}
}

View File

@ -102,6 +102,7 @@ LinkedShader::LinkedShader(GLRenderManager *render, VShaderID VSID, Shader *vs,
queries.push_back({ &u_fogcoef, "u_fogcoef" });
queries.push_back({ &u_alphacolorref, "u_alphacolorref" });
queries.push_back({ &u_alphacolormask, "u_alphacolormask" });
queries.push_back({ &u_colorWriteMask, "u_colorWriteMask" });
queries.push_back({ &u_stencilReplaceValue, "u_stencilReplaceValue" });
queries.push_back({ &u_testtex, "testtex" });
@ -377,6 +378,9 @@ void LinkedShader::UpdateUniforms(u32 vertType, const ShaderID &vsid, bool useBu
if (dirty & DIRTY_ALPHACOLORMASK) {
SetColorUniform3iAlpha(render_, &u_alphacolormask, gstate.colortestmask, gstate.getAlphaTestMask());
}
if (dirty & DIRTY_COLORWRITEMASK) {
render_->SetUniformUI1(&u_colorWriteMask, ~((gstate.pmska << 24) | (gstate.pmskc & 0xFFFFFF)));
}
if (dirty & DIRTY_FOGCOLOR) {
SetColorUniform3(render_, &u_fogcolor, gstate.fogcolor);
}

View File

@ -80,6 +80,7 @@ public:
// Fragment processing inputs
int u_alphacolorref;
int u_alphacolormask;
int u_colorWriteMask;
int u_testtex;
int u_fogcolor;
int u_fogcoef;

View File

@ -24,6 +24,7 @@
#include "Common/Profiler/Profiler.h"
#include "Common/GPU/OpenGL/GLDebugLog.h"
#include "Common/GPU/OpenGL/GLRenderManager.h"
#include "Common/Data/Convert/SmallDataConvert.h"
#include "GPU/Math3D.h"
#include "GPU/GPUState.h"
@ -140,18 +141,6 @@ void DrawEngineGLES::ApplyDrawState(int prim) {
// Start profiling here to skip SetTexture which is already accounted for
PROFILE_THIS_SCOPE("applydrawstate");
// amask is needed for both stencil and blend state so we keep it outside for now
bool amask = (gstate.pmska & 0xFF) < 128;
// Let's not write to alpha if stencil isn't enabled.
if (IsStencilTestOutputDisabled()) {
amask = false;
} else {
// If the stencil type is set to KEEP, we shouldn't write to the stencil/alpha channel.
if (ReplaceAlphaWithStencilType() == STENCIL_VALUE_KEEP) {
amask = false;
}
}
bool useBufferedRendering = framebufferManager_->UseBufferedRendering();
if (gstate_c.IsDirty(DIRTY_BLEND_STATE)) {
@ -169,7 +158,10 @@ void DrawEngineGLES::ApplyDrawState(int prim) {
GenericBlendState blendState;
ConvertBlendState(blendState, gstate_c.allowFramebufferRead);
if (blendState.applyFramebufferRead) {
GenericMaskState maskState;
ConvertMaskState(maskState, gstate_c.allowFramebufferRead);
if (blendState.applyFramebufferRead || maskState.applyFramebufferRead) {
if (ApplyFramebufferRead(&fboTexNeedsBind_)) {
// The shader takes over the responsibility for blending, so recompute.
ApplyStencilReplaceAndLogicOpIgnoreBlend(blendState.replaceAlphaWithStencil, blendState);
@ -177,7 +169,6 @@ void DrawEngineGLES::ApplyDrawState(int prim) {
// We copy the framebuffer here, as doing so will wipe any blend state if we do it later.
if (fboTexNeedsBind_) {
// Note that this is positions, not UVs, that we need the copy from.
// TODO: If the device doesn't support blit, this will corrupt the currently applied texture.
framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY);
// If we are rendering at a higher resolution, linear is probably best for the dest color.
renderManager->SetTextureSampler(1, GL_CLAMP_TO_EDGE, GL_CLAMP_TO_EDGE, GL_LINEAR, GL_LINEAR, 0.0f);
@ -205,23 +196,13 @@ void DrawEngineGLES::ApplyDrawState(int prim) {
}
if (blendState.useBlendColor) {
uint32_t color = blendState.blendColor;
const float col[4] = {
(float)((color & 0xFF) >> 0) * (1.0f / 255.0f),
(float)((color & 0xFF00) >> 8) * (1.0f / 255.0f),
(float)((color & 0xFF0000) >> 16) * (1.0f / 255.0f),
(float)((color & 0xFF000000) >> 24) * (1.0f / 255.0f),
};
float col[4];
Uint8x4ToFloat4(col, color);
renderManager->SetBlendFactor(col);
}
}
// PSP color/alpha mask is per bit but we can only support per byte.
// But let's do that, at least. And let's try a threshold.
bool rmask = (gstate.pmskc & 0xFF) < 128;
bool gmask = ((gstate.pmskc >> 8) & 0xFF) < 128;
bool bmask = ((gstate.pmskc >> 16) & 0xFF) < 128;
int mask = (int)rmask | ((int)gmask << 1) | ((int)bmask << 2) | ((int)amask << 3);
int mask = (int)maskState.rgba[0] | ((int)maskState.rgba[1] << 1) | ((int)maskState.rgba[2] << 2) | ((int)maskState.rgba[3] << 3);
if (blendState.enabled) {
renderManager->SetBlendAndMask(mask, blendState.enabled,
glBlendFactorLookup[(size_t)blendState.srcColor], glBlendFactorLookup[(size_t)blendState.dstColor],

View File

@ -120,8 +120,8 @@ const CommonCommandTableEntry commonCommandTable[] = {
{ GE_CMD_BLENDMODE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_FRAGMENTSHADER_STATE },
{ GE_CMD_BLENDFIXEDA, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_FRAGMENTSHADER_STATE },
{ GE_CMD_BLENDFIXEDB, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_FRAGMENTSHADER_STATE },
{ GE_CMD_MASKRGB, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_FRAGMENTSHADER_STATE },
{ GE_CMD_MASKALPHA, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_DEPTHSTENCIL_STATE },
{ GE_CMD_MASKRGB, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_COLORWRITEMASK },
{ GE_CMD_MASKALPHA, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_COLORWRITEMASK },
{ GE_CMD_ZTEST, FLAG_FLUSHBEFOREONCHANGE, DIRTY_DEPTHSTENCIL_STATE },
{ GE_CMD_ZTESTENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_DEPTHSTENCIL_STATE | DIRTY_FRAGMENTSHADER_STATE },
{ GE_CMD_ZWRITEDISABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_DEPTHSTENCIL_STATE | DIRTY_FRAGMENTSHADER_STATE },

View File

@ -165,7 +165,10 @@ void DrawEngineVulkan::ConvertStateToVulkanKey(FramebufferManagerVulkan &fbManag
GenericBlendState blendState;
ConvertBlendState(blendState, gstate_c.allowFramebufferRead);
if (blendState.applyFramebufferRead) {
GenericMaskState maskState;
ConvertMaskState(maskState, gstate_c.allowFramebufferRead);
if (blendState.applyFramebufferRead || maskState.applyFramebufferRead) {
if (ApplyFramebufferRead(&fboTexNeedsBind_)) {
// The shader takes over the responsibility for blending, so recompute.
ApplyStencilReplaceAndLogicOpIgnoreBlend(blendState.replaceAlphaWithStencil, blendState);
@ -178,6 +181,7 @@ void DrawEngineVulkan::ConvertStateToVulkanKey(FramebufferManagerVulkan &fbManag
gstate_c.Dirty(DIRTY_FRAGMENTSHADER_STATE);
} else if (blendState.resetFramebufferRead) {
ResetFramebufferRead();
gstate_c.Dirty(DIRTY_FRAGMENTSHADER_STATE);
}
if (blendState.enabled) {
@ -206,24 +210,11 @@ void DrawEngineVulkan::ConvertStateToVulkanKey(FramebufferManagerVulkan &fbManag
dynState.useBlendColor = false;
}
// PSP color/alpha mask is per bit but we can only support per byte.
// But let's do that, at least. And let's try a threshold.
bool rmask = (gstate.pmskc & 0xFF) < 128;
bool gmask = ((gstate.pmskc >> 8) & 0xFF) < 128;
bool bmask = ((gstate.pmskc >> 16) & 0xFF) < 128;
bool amask = (gstate.pmska & 0xFF) < 128;
// Let's not write to alpha if stencil isn't enabled.
if (IsStencilTestOutputDisabled()) {
amask = false;
} else {
// If the stencil type is set to KEEP, we shouldn't write to the stencil/alpha channel.
if (ReplaceAlphaWithStencilType() == STENCIL_VALUE_KEEP) {
amask = false;
}
}
key.colorWriteMask = (rmask ? VK_COLOR_COMPONENT_R_BIT : 0) | (gmask ? VK_COLOR_COMPONENT_G_BIT : 0) | (bmask ? VK_COLOR_COMPONENT_B_BIT : 0) | (amask ? VK_COLOR_COMPONENT_A_BIT : 0);
key.colorWriteMask =
(maskState.rgba[0] ? VK_COLOR_COMPONENT_R_BIT : 0) |
(maskState.rgba[1] ? VK_COLOR_COMPONENT_G_BIT : 0) |
(maskState.rgba[2] ? VK_COLOR_COMPONENT_B_BIT : 0) |
(maskState.rgba[3] ? VK_COLOR_COMPONENT_A_BIT : 0);
// Workaround proposed in #10421, for bug where the color write mask is not applied correctly on Adreno.
if ((gstate.pmskc & 0x00FFFFFF) == 0x00FFFFFF && g_Config.bVendorBugChecksEnabled && draw_->GetBugs().Has(Draw::Bugs::COLORWRITEMASK_BROKEN_WITH_DEPTHTEST)) {
@ -377,20 +368,16 @@ void DrawEngineVulkan::ConvertStateToVulkanKey(FramebufferManagerVulkan &fbManag
}
void DrawEngineVulkan::BindShaderBlendTex() {
// At this point, we know if the vertices are full alpha or not.
// TODO: Set the nearest/linear here (since we correctly know if alpha/color tests are needed)?
// TODO: At this point, we know if the vertices are full alpha or not.
// Set the nearest/linear here (since we correctly know if alpha/color tests are needed)?
if (!gstate.isModeClear()) {
// TODO: Test texture?
if (fboTexNeedsBind_) {
// Note that this is positions, not UVs, that we need the copy from.
framebufferManager_->BindFramebufferAsColorTexture(1, framebufferManager_->GetCurrentRenderVFB(), BINDFBCOLOR_MAY_COPY);
// If we are rendering at a higher resolution, linear is probably best for the dest color.
boundSecondary_ = (VkImageView)draw_->GetNativeObject(Draw::NativeObject::BOUND_TEXTURE1_IMAGEVIEW);
fboTexBound_ = true;
fboTexNeedsBind_ = false;
}
}
}
void DrawEngineVulkan::ApplyDrawStateLate(VulkanRenderManager *renderManager, bool applyStencilRef, uint8_t stencilRef, bool useBlendConstant) {

View File

@ -796,9 +796,15 @@ NPUZ00043 = true
NPEZ00198 = true
# This setting will go away in the near future, hopefully we can enable it
# for all games.
# for all or most games.
[ReinterpretFramebuffers]
# Outrun - issue #11358
# Outrun 2006: Coast to Coast - issue #11358
ULES00262 = true
ULUS10064 = true
ULKS46087 = true
# Colin McRae's DiRT 2?
# ULUS10471 = true
# ULJM05533 = true
# NPJH50006 = true

View File

@ -181,7 +181,7 @@ bool TestReinterpretShaders() {
failed = true;
return false;
} else {
printf("===\n%s\n===\n", buffer);
//printf("===\n%s\n===\n", buffer);
}
}
}
@ -215,25 +215,16 @@ bool TestReinterpretShaders() {
return !failed;
}
const ShaderLanguage languages[] = {
ShaderLanguage::HLSL_D3D9,
ShaderLanguage::HLSL_D3D11,
ShaderLanguage::GLSL_VULKAN,
ShaderLanguage::GLSL_1xx,
ShaderLanguage::GLSL_3xx,
};
const int numLanguages = ARRAY_SIZE(languages);
bool TestShaderGenerators() {
LoadD3D11();
init_glslang();
LoadD3DCompilerDynamic();
if (!TestReinterpretShaders()) {
return false;
}
ShaderLanguage languages[] = {
ShaderLanguage::HLSL_D3D9,
ShaderLanguage::HLSL_D3D11,
ShaderLanguage::GLSL_VULKAN,
ShaderLanguage::GLSL_1xx,
ShaderLanguage::GLSL_3xx,
};
const int numLanguages = ARRAY_SIZE(languages);
bool TestVertexShaders() {
char *buffer[numLanguages];
for (int i = 0; i < numLanguages; i++) {
@ -290,8 +281,21 @@ bool TestShaderGenerators() {
printf("%d/%d vertex shaders generated (it's normal that it's not all, there are invalid bit combos)\n", successes, count * numLanguages);
successes = 0;
count = 200;
for (int i = 0; i < numLanguages; i++) {
delete[] buffer[i];
}
return true;
}
bool TestFragmentShaders() {
char *buffer[numLanguages];
for (int i = 0; i < numLanguages; i++) {
buffer[i] = new char[65536];
}
GMRng rng;
int successes = 0;
int count = 300;
// Generate a bunch of random fragment shader IDs, try to generate shader source.
// Then compile it and check that it's ok.
@ -337,15 +341,31 @@ bool TestShaderGenerators() {
printf("%d/%d fragment shaders generated (it's normal that it's not all, there are invalid bit combos)\n", successes, count * numLanguages);
successes = 0;
count = 200;
_CrtCheckMemory();
for (int i = 0; i < numLanguages; i++) {
delete[] buffer[i];
}
return true;
}
bool TestShaderGenerators() {
LoadD3D11();
init_glslang();
LoadD3DCompilerDynamic();
if (!TestReinterpretShaders()) {
return false;
}
if (!TestFragmentShaders()) {
return false;
}
if (!TestVertexShaders()) {
return false;
}
_CrtCheckMemory();
return true;
}