Shader bit setup and code generation for logic-ops-in-shader

This commit is contained in:
Henrik Rydgård 2022-09-04 11:14:47 +02:00
parent 49d5b64479
commit 2474eb6a72
4 changed files with 45 additions and 15 deletions

View File

@ -108,11 +108,6 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
blueToAlpha = true;
}
GEBlendSrcFactor replaceBlendFuncA = (GEBlendSrcFactor)id.Bits(FS_BIT_BLENDFUNC_A, 4);
GEBlendDstFactor replaceBlendFuncB = (GEBlendDstFactor)id.Bits(FS_BIT_BLENDFUNC_B, 4);
GEBlendMode replaceBlendEq = (GEBlendMode)id.Bits(FS_BIT_BLENDEQ, 3);
StencilValueType replaceAlphaWithStencilType = (StencilValueType)id.Bits(FS_BIT_REPLACE_ALPHA_WITH_STENCIL_TYPE, 4);
bool isModeClear = id.Bit(FS_BIT_CLEARMODE);
const char *shading = "";
@ -121,7 +116,16 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
bool useDiscardStencilBugWorkaround = id.Bit(FS_BIT_NO_DEPTH_CANNOT_DISCARD_STENCIL);
bool readFramebuffer = replaceBlend == REPLACE_BLEND_READ_FRAMEBUFFER || colorWriteMask;
GEBlendSrcFactor replaceBlendFuncA = (GEBlendSrcFactor)id.Bits(FS_BIT_BLENDFUNC_A, 4);
GEBlendDstFactor replaceBlendFuncB = (GEBlendDstFactor)id.Bits(FS_BIT_BLENDFUNC_B, 4);
GEBlendMode replaceBlendEq = (GEBlendMode)id.Bits(FS_BIT_BLENDEQ, 3);
StencilValueType replaceAlphaWithStencilType = (StencilValueType)id.Bits(FS_BIT_REPLACE_ALPHA_WITH_STENCIL_TYPE, 4);
// Distinct from the logic op simulation support.
GELogicOp replaceLogicOpType = isModeClear ? GE_LOGIC_COPY : (GELogicOp)id.Bits(FS_BIT_REPLACE_LOGIC_OP, 4);
bool replaceLogicOp = replaceLogicOpType != GE_LOGIC_COPY;
bool readFramebuffer = replaceBlend == REPLACE_BLEND_READ_FRAMEBUFFER || colorWriteMask || replaceLogicOp;
bool readFramebufferTex = readFramebuffer && !gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH);
bool needFragCoord = readFramebuffer || gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT);
@ -1078,16 +1082,37 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
return false;
}
// Final color computed - apply color write mask.
// TODO: Maybe optimize to only do math on the affected channels?
// Or .. meh. That would require more shader bits. Though we could
// of course optimize for the common mask 0xF00000, though again, blue-to-alpha
// does a better job with that.
if (colorWriteMask) {
// Final color computed - apply logic ops and bitwise color write mask, through shader blending, if specified.
if (colorWriteMask || replaceLogicOp) {
WRITE(p, " highp uint v32 = packUnorm4x8(%s);\n", compat.fragColor0);
WRITE(p, " highp uint d32 = packUnorm4x8(destColor);\n");
// Note that the mask has been flipped to the PC way - 1 means write.
WRITE(p, " v32 = (v32 & u_colorWriteMask) | (d32 & ~u_colorWriteMask);\n");
// v32 is both the "s" to the logical operation, and the value that we'll merge to the destination with masking later.
// d32 is the "d" to the logical operation.
// TODO: Do logical ops work on just RGB or also A on the PSP?
switch (replaceLogicOpType) {
case GE_LOGIC_CLEAR: p.C(" v32 = 0;\n"); break;
case GE_LOGIC_AND: p.C(" v32 = v32 & d32;\n"); break;
case GE_LOGIC_AND_REVERSE: p.C(" v32 = v32 & ~d32;\n"); break;
case GE_LOGIC_COPY: break; // source to dest, do nothing. Will be set to this, if not used.
case GE_LOGIC_AND_INVERTED: p.C(" v32 = ~v32 & d32;\n"); break;
case GE_LOGIC_NOOP: p.C(" v32 = d32;\n"); break;
case GE_LOGIC_XOR: p.C(" v32 = v32 ^ d32;\n"); break;
case GE_LOGIC_OR: p.C(" v32 = v32 | d32;\n"); break;
case GE_LOGIC_NOR: p.C(" v32 = ~(v32 | d32);\n"); break;
case GE_LOGIC_EQUIV: p.C(" v32 = ~(v32 ^ d32);\n"); break;
case GE_LOGIC_INVERTED: p.C(" v32 = ~d32;\n"); break;
case GE_LOGIC_OR_REVERSE: p.C(" v32 = v32 | ~d32;\n"); break;
case GE_LOGIC_COPY_INVERTED: p.C(" v32 = ~v32;\n"); break;
case GE_LOGIC_OR_INVERTED: p.C(" v32 = (~v32) | d32;\n"); break;
case GE_LOGIC_NAND: p.C(" v32 = ~(v32 & d32);\n"); break;
case GE_LOGIC_SET: p.C(" v32 = 0xFFFFFFFF;\n"); break;
}
// Note that the mask has already been flipped to the PC way - 1 means write.
if (colorWriteMask) {
WRITE(p, " v32 = (v32 & u_colorWriteMask) | (d32 & ~u_colorWriteMask);\n");
}
WRITE(p, " %s = unpackUnorm4x8(v32);\n", compat.fragColor0);
}

View File

@ -268,6 +268,7 @@ void ComputeFragmentShaderID(FShaderID *id_out, const ComputedPipelineState &pip
ReplaceBlendType replaceBlend = pipelineState.blendState.replaceBlend;
ReplaceAlphaType stencilToAlpha = pipelineState.blendState.replaceAlphaWithStencil;
SimulateLogicOpType simulateLogicOpType = pipelineState.blendState.simulateLogicOpType;
GELogicOp replaceLogicOpType = GE_LOGIC_COPY;
// All texfuncs except replace are the same for RGB as for RGBA with full alpha.
// Note that checking this means that we must dirty the fragment shader ID whenever textureFullAlpha changes.
@ -325,6 +326,9 @@ void ComputeFragmentShaderID(FShaderID *id_out, const ComputedPipelineState &pip
// 2 bits.
id.SetBits(FS_BIT_SIMULATE_LOGIC_OP_TYPE, 2, simulateLogicOpType);
// 4 bits. Set to GE_LOGIC_COPY if not used, which does nothing in the shader generator.
id.SetBits(FS_BIT_REPLACE_LOGIC_OP, 4, (int)replaceLogicOpType);
// If replaceBlend == REPLACE_BLEND_STANDARD (or REPLACE_BLEND_NO) nothing is done, so we kill these bits.
if (replaceBlend == REPLACE_BLEND_BLUE_TO_ALPHA) {
id.SetBits(FS_BIT_REPLACE_BLEND, 3, replaceBlend);

View File

@ -95,6 +95,7 @@ enum FShaderBit : uint8_t {
FS_BIT_COLOR_WRITEMASK = 50,
FS_BIT_3D_TEXTURE = 51,
FS_BIT_SHADER_SMOOTHED_DEPAL = 52,
FS_BIT_REPLACE_LOGIC_OP = 53, // 4 bits. GE_LOGIC_COPY means no-op/off.
};
static inline FShaderBit operator +(FShaderBit bit, int i) {

View File

@ -370,7 +370,7 @@ VulkanFragmentShader *ShaderManagerVulkan::GetFragmentShaderFromModule(VkShaderM
// instantaneous.
#define CACHE_HEADER_MAGIC 0xff51f420
#define CACHE_VERSION 21
#define CACHE_VERSION 22
struct VulkanCacheHeader {
uint32_t magic;
uint32_t version;