Merge pull request #15946 from hrydgard/shader-blend-refactor

Shader blend refactor part 1
This commit is contained in:
Unknown W. Brackets 2022-09-02 20:14:10 -07:00 committed by GitHub
commit df624d8651
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
22 changed files with 121 additions and 111 deletions

View File

@ -23,6 +23,7 @@
#include "Common/Data/Collections/Hashmaps.h"
#include "GPU/GPUState.h"
#include "GPU/Common/GPUStateUtils.h"
#include "GPU/Common/GPUDebugInterface.h"
#include "GPU/Common/IndexGenerator.h"
#include "GPU/Common/VertexDecoderCommon.h"
@ -192,6 +193,8 @@ protected:
// Sometimes, unusual situations mean we need to reset dirty flags after state calc finishes.
uint64_t dirtyRequiresRecheck_ = 0;
ComputedPipelineState pipelineState_;
// Hardware tessellation
TessellationDataTransfer *tessDataTransfer;
};

View File

@ -121,7 +121,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
bool useDiscardStencilBugWorkaround = id.Bit(FS_BIT_NO_DEPTH_CANNOT_DISCARD_STENCIL);
bool readFramebuffer = replaceBlend == REPLACE_BLEND_COPY_FBO || colorWriteMask;
bool readFramebuffer = replaceBlend == REPLACE_BLEND_READ_FRAMEBUFFER || colorWriteMask;
bool readFramebufferTex = readFramebuffer && !gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH);
bool needFragCoord = readFramebuffer || gstate_c.Supports(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT);
@ -943,7 +943,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLangu
WRITE(p, " v.rgb = v.rgb * %s;\n", srcFactor);
}
if (replaceBlend == REPLACE_BLEND_COPY_FBO) {
if (replaceBlend == REPLACE_BLEND_READ_FRAMEBUFFER) {
const char *srcFactor = nullptr;
const char *dstFactor = nullptr;

View File

@ -179,7 +179,7 @@ ReplaceAlphaType ReplaceAlphaWithStencil(ReplaceBlendType replaceBlend) {
return REPLACE_ALPHA_NO;
}
if (replaceBlend != REPLACE_BLEND_NO && replaceBlend != REPLACE_BLEND_COPY_FBO) {
if (replaceBlend != REPLACE_BLEND_NO && replaceBlend != REPLACE_BLEND_READ_FRAMEBUFFER) {
if (nonAlphaSrcFactors[gstate.getBlendFuncA()] && nonAlphaDestFactors[gstate.getBlendFuncB()]) {
return REPLACE_ALPHA_YES;
} else {
@ -270,14 +270,14 @@ ReplaceBlendType ReplaceBlendWithShader(bool allowFramebufferRead, GEBufferForma
// Let's get the non-factor modes out of the way first.
switch (eq) {
case GE_BLENDMODE_ABSDIFF:
return !allowFramebufferRead ? REPLACE_BLEND_STANDARD : REPLACE_BLEND_COPY_FBO;
return !allowFramebufferRead ? REPLACE_BLEND_STANDARD : REPLACE_BLEND_READ_FRAMEBUFFER;
case GE_BLENDMODE_MIN:
case GE_BLENDMODE_MAX:
if (gstate_c.Supports(GPU_SUPPORTS_BLEND_MINMAX)) {
return REPLACE_BLEND_STANDARD;
} else {
return !allowFramebufferRead ? REPLACE_BLEND_STANDARD : REPLACE_BLEND_COPY_FBO;
return !allowFramebufferRead ? REPLACE_BLEND_STANDARD : REPLACE_BLEND_READ_FRAMEBUFFER;
}
default:
@ -300,19 +300,19 @@ ReplaceBlendType ReplaceBlendWithShader(bool allowFramebufferRead, GEBufferForma
return REPLACE_BLEND_2X_ALPHA;
// Can't double, we need the source color to be correct.
// Doubling only alpha would clamp the src alpha incorrectly.
return !allowFramebufferRead ? REPLACE_BLEND_2X_ALPHA : REPLACE_BLEND_COPY_FBO;
return !allowFramebufferRead ? REPLACE_BLEND_2X_ALPHA : REPLACE_BLEND_READ_FRAMEBUFFER;
case GE_DSTBLEND_DOUBLEDSTALPHA:
case GE_DSTBLEND_DOUBLEINVDSTALPHA:
if (bufferFormat == GE_FORMAT_565)
return REPLACE_BLEND_2X_ALPHA;
return !allowFramebufferRead ? REPLACE_BLEND_2X_ALPHA : REPLACE_BLEND_COPY_FBO;
return !allowFramebufferRead ? REPLACE_BLEND_2X_ALPHA : REPLACE_BLEND_READ_FRAMEBUFFER;
case GE_DSTBLEND_DOUBLESRCALPHA:
// We can't technically do this correctly (due to clamping) without reading the dst color.
// Using a copy isn't accurate either, though, when there's overlap.
if (gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH))
return !allowFramebufferRead ? REPLACE_BLEND_PRE_SRC_2X_ALPHA : REPLACE_BLEND_COPY_FBO;
return !allowFramebufferRead ? REPLACE_BLEND_PRE_SRC_2X_ALPHA : REPLACE_BLEND_READ_FRAMEBUFFER;
return REPLACE_BLEND_PRE_SRC_2X_ALPHA;
case GE_DSTBLEND_DOUBLEINVSRCALPHA:
@ -339,7 +339,7 @@ ReplaceBlendType ReplaceBlendWithShader(bool allowFramebufferRead, GEBufferForma
return REPLACE_BLEND_STANDARD;
}
// Can't double, we need the source color to be correct.
return !allowFramebufferRead ? REPLACE_BLEND_STANDARD : REPLACE_BLEND_COPY_FBO;
return !allowFramebufferRead ? REPLACE_BLEND_STANDARD : REPLACE_BLEND_READ_FRAMEBUFFER;
case GE_DSTBLEND_DOUBLEDSTALPHA:
case GE_DSTBLEND_DOUBLEINVDSTALPHA:
@ -348,7 +348,7 @@ ReplaceBlendType ReplaceBlendWithShader(bool allowFramebufferRead, GEBufferForma
// Doubling will have no effect here.
return REPLACE_BLEND_STANDARD;
}
return !allowFramebufferRead ? REPLACE_BLEND_2X_SRC : REPLACE_BLEND_COPY_FBO;
return !allowFramebufferRead ? REPLACE_BLEND_2X_SRC : REPLACE_BLEND_READ_FRAMEBUFFER;
case GE_DSTBLEND_DOUBLESRCALPHA:
case GE_DSTBLEND_DOUBLEINVSRCALPHA:
@ -357,7 +357,7 @@ ReplaceBlendType ReplaceBlendWithShader(bool allowFramebufferRead, GEBufferForma
}
// Double both src (for dst alpha) and alpha (for dst factor.)
// But to be accurate (clamping), we need to read the dst color.
return !allowFramebufferRead ? REPLACE_BLEND_PRE_SRC_2X_ALPHA : REPLACE_BLEND_COPY_FBO;
return !allowFramebufferRead ? REPLACE_BLEND_PRE_SRC_2X_ALPHA : REPLACE_BLEND_READ_FRAMEBUFFER;
case GE_DSTBLEND_SRCALPHA:
case GE_DSTBLEND_INVSRCALPHA:
@ -369,7 +369,7 @@ ReplaceBlendType ReplaceBlendWithShader(bool allowFramebufferRead, GEBufferForma
return REPLACE_BLEND_STANDARD;
}
// We can't technically do this correctly (due to clamping) without reading the dst alpha.
return !allowFramebufferRead ? REPLACE_BLEND_2X_SRC : REPLACE_BLEND_COPY_FBO;
return !allowFramebufferRead ? REPLACE_BLEND_2X_SRC : REPLACE_BLEND_READ_FRAMEBUFFER;
}
case GE_SRCBLEND_DOUBLEINVDSTALPHA:
@ -383,14 +383,14 @@ ReplaceBlendType ReplaceBlendWithShader(bool allowFramebufferRead, GEBufferForma
if (bufferFormat == GE_FORMAT_565) {
return REPLACE_BLEND_STANDARD;
}
return !allowFramebufferRead ? REPLACE_BLEND_STANDARD : REPLACE_BLEND_COPY_FBO;
return !allowFramebufferRead ? REPLACE_BLEND_STANDARD : REPLACE_BLEND_READ_FRAMEBUFFER;
case GE_DSTBLEND_DOUBLESRCALPHA:
case GE_DSTBLEND_DOUBLEINVSRCALPHA:
if (bufferFormat == GE_FORMAT_565) {
return REPLACE_BLEND_2X_ALPHA;
}
return !allowFramebufferRead ? REPLACE_BLEND_2X_ALPHA : REPLACE_BLEND_COPY_FBO;
return !allowFramebufferRead ? REPLACE_BLEND_2X_ALPHA : REPLACE_BLEND_READ_FRAMEBUFFER;
case GE_DSTBLEND_SRCALPHA:
case GE_DSTBLEND_INVSRCALPHA:
@ -401,7 +401,7 @@ ReplaceBlendType ReplaceBlendWithShader(bool allowFramebufferRead, GEBufferForma
if (bufferFormat == GE_FORMAT_565) {
return REPLACE_BLEND_STANDARD;
}
return !allowFramebufferRead ? REPLACE_BLEND_STANDARD : REPLACE_BLEND_COPY_FBO;
return !allowFramebufferRead ? REPLACE_BLEND_STANDARD : REPLACE_BLEND_READ_FRAMEBUFFER;
}
case GE_SRCBLEND_FIXA:
@ -409,7 +409,7 @@ ReplaceBlendType ReplaceBlendWithShader(bool allowFramebufferRead, GEBufferForma
switch (funcB) {
case GE_DSTBLEND_DOUBLESRCALPHA:
// Can't safely double alpha, will clamp.
return !allowFramebufferRead ? REPLACE_BLEND_2X_ALPHA : REPLACE_BLEND_COPY_FBO;
return !allowFramebufferRead ? REPLACE_BLEND_2X_ALPHA : REPLACE_BLEND_READ_FRAMEBUFFER;
case GE_DSTBLEND_DOUBLEINVSRCALPHA:
// Doubling alpha is safe for the inverse, will clamp to zero either way.
@ -420,7 +420,7 @@ ReplaceBlendType ReplaceBlendWithShader(bool allowFramebufferRead, GEBufferForma
if (bufferFormat == GE_FORMAT_565) {
return REPLACE_BLEND_STANDARD;
}
return !allowFramebufferRead ? REPLACE_BLEND_STANDARD : REPLACE_BLEND_COPY_FBO;
return !allowFramebufferRead ? REPLACE_BLEND_STANDARD : REPLACE_BLEND_READ_FRAMEBUFFER;
case GE_DSTBLEND_FIXB:
default:
@ -454,14 +454,14 @@ ReplaceBlendType ReplaceBlendWithShader(bool allowFramebufferRead, GEBufferForma
if (funcA == GE_SRCBLEND_SRCALPHA || funcA == GE_SRCBLEND_INVSRCALPHA) {
// Can't safely double alpha, will clamp. However, a copy may easily be worse due to overlap.
if (gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH))
return !allowFramebufferRead ? REPLACE_BLEND_PRE_SRC_2X_ALPHA : REPLACE_BLEND_COPY_FBO;
return !allowFramebufferRead ? REPLACE_BLEND_PRE_SRC_2X_ALPHA : REPLACE_BLEND_READ_FRAMEBUFFER;
return REPLACE_BLEND_PRE_SRC_2X_ALPHA;
} else {
// This means dst alpha/color is used in the src factor.
// Unfortunately, copying here causes overlap problems in Silent Hill games (it seems?)
// We will just hope that doubling alpha for the dst factor will not clamp too badly.
if (gstate_c.Supports(GPU_SUPPORTS_ANY_FRAMEBUFFER_FETCH))
return !allowFramebufferRead ? REPLACE_BLEND_2X_ALPHA : REPLACE_BLEND_COPY_FBO;
return !allowFramebufferRead ? REPLACE_BLEND_2X_ALPHA : REPLACE_BLEND_READ_FRAMEBUFFER;
return REPLACE_BLEND_2X_ALPHA;
}
@ -478,7 +478,7 @@ ReplaceBlendType ReplaceBlendWithShader(bool allowFramebufferRead, GEBufferForma
if (bufferFormat == GE_FORMAT_565) {
return REPLACE_BLEND_STANDARD;
}
return !allowFramebufferRead ? REPLACE_BLEND_STANDARD : REPLACE_BLEND_COPY_FBO;
return !allowFramebufferRead ? REPLACE_BLEND_STANDARD : REPLACE_BLEND_READ_FRAMEBUFFER;
default:
return REPLACE_BLEND_STANDARD;
@ -956,7 +956,7 @@ void ApplyStencilReplaceAndLogicOpIgnoreBlend(ReplaceAlphaType replaceAlphaWithS
case STENCIL_VALUE_INCR_4:
case STENCIL_VALUE_INCR_8:
// We'll add the incremented value output by the shader.
blendState.enabled = true;
blendState.blendEnabled = true;
blendState.setFactors(srcBlend, dstBlend, BlendFactor::ONE, BlendFactor::ONE);
blendState.setEquation(blendEq, BlendEq::ADD);
break;
@ -964,23 +964,23 @@ void ApplyStencilReplaceAndLogicOpIgnoreBlend(ReplaceAlphaType replaceAlphaWithS
case STENCIL_VALUE_DECR_4:
case STENCIL_VALUE_DECR_8:
// We'll subtract the incremented value output by the shader.
blendState.enabled = true;
blendState.blendEnabled = true;
blendState.setFactors(srcBlend, dstBlend, BlendFactor::ONE, BlendFactor::ONE);
blendState.setEquation(blendEq, BlendEq::SUBTRACT);
break;
case STENCIL_VALUE_INVERT:
// The shader will output one, and reverse subtracting will essentially invert.
blendState.enabled = true;
blendState.blendEnabled = true;
blendState.setFactors(srcBlend, dstBlend, BlendFactor::ONE, BlendFactor::ONE);
blendState.setEquation(blendEq, BlendEq::REVERSE_SUBTRACT);
break;
default:
if (srcBlend == BlendFactor::ONE && dstBlend == BlendFactor::ZERO && blendEq == BlendEq::ADD) {
blendState.enabled = false;
blendState.blendEnabled = false;
} else {
blendState.enabled = true;
blendState.blendEnabled = true;
blendState.setFactors(srcBlend, dstBlend, BlendFactor::ONE, BlendFactor::ZERO);
blendState.setEquation(blendEq, BlendEq::ADD);
}
@ -1020,38 +1020,42 @@ bool IsColorWriteMaskComplex(bool allowFramebufferRead) {
// by modern hardware, we do that. This is 99.9% of the time.
// When that's not enough, we fall back on a technique similar to shader blending,
// we read from the framebuffer (or a copy of it).
// We also prepare uniformMask so that if doing this in the shader gets forced-on,
// we have the right mask already.
void ConvertMaskState(GenericMaskState &maskState, bool allowFramebufferRead) {
if (gstate_c.blueToAlpha) {
maskState.applyFramebufferRead = false;
maskState.rgba[0] = false;
maskState.rgba[1] = false;
maskState.rgba[2] = false;
maskState.rgba[3] = true;
maskState.uniformMask = 0xFF000000;
maskState.maskRGBA[0] = false;
maskState.maskRGBA[1] = false;
maskState.maskRGBA[2] = false;
maskState.maskRGBA[3] = true;
return;
}
// Invert to convert masks from the PSP's format where 1 is don't draw to PC where 1 is draw.
uint32_t colorMask = ~((gstate.pmskc & 0xFFFFFF) | (gstate.pmska << 24));
maskState.uniformMask = colorMask;
maskState.applyFramebufferRead = false;
for (int i = 0; i < 4; i++) {
int channelMask = colorMask & 0xFF;
switch (channelMask) {
case 0x0:
maskState.rgba[i] = false;
maskState.maskRGBA[i] = false;
break;
case 0xFF:
maskState.rgba[i] = true;
maskState.maskRGBA[i] = true;
break;
default:
if (allowFramebufferRead) {
// Instead of just 'true', restrict shader bitmasks to Outrun temporarily.
// TODO: This check must match the one in IsColorWriteMaskComplex.
maskState.applyFramebufferRead = PSP_CoreParameter().compat.flags().ShaderColorBitmask;
maskState.rgba[i] = true;
maskState.maskRGBA[i] = true;
} else {
// Use the old heuristic.
maskState.rgba[i] = channelMask >= 128;
maskState.maskRGBA[i] = channelMask >= 128;
}
}
colorMask >>= 8;
@ -1059,10 +1063,10 @@ void ConvertMaskState(GenericMaskState &maskState, bool allowFramebufferRead) {
// Let's not write to alpha if stencil isn't enabled.
if (IsStencilTestOutputDisabled()) {
maskState.rgba[3] = false;
maskState.maskRGBA[3] = false;
} else if (ReplaceAlphaWithStencilType() == STENCIL_VALUE_KEEP) {
// If the stencil type is set to KEEP, we shouldn't write to the stencil/alpha channel.
maskState.rgba[3] = false;
maskState.maskRGBA[3] = false;
}
}
@ -1081,13 +1085,16 @@ void ConvertBlendState(GenericBlendState &blendState, bool allowFramebufferRead,
blendState.applyFramebufferRead = false;
blendState.dirtyShaderBlendFixValues = false;
blendState.useBlendColor = false;
blendState.replaceAlphaWithStencil = REPLACE_ALPHA_NO;
ReplaceBlendType replaceBlend = ReplaceBlendWithShader(allowFramebufferRead, gstate_c.framebufFormat);
if (forceReplaceBlend) {
replaceBlend = REPLACE_BLEND_COPY_FBO;
replaceBlend = REPLACE_BLEND_READ_FRAMEBUFFER;
}
blendState.replaceBlend = replaceBlend;
ReplaceAlphaType replaceAlphaWithStencil = ReplaceAlphaWithStencil(replaceBlend);
blendState.replaceAlphaWithStencil = replaceAlphaWithStencil;
bool usePreSrc = false;
bool blueToAlpha = false;
@ -1101,27 +1108,26 @@ void ConvertBlendState(GenericBlendState &blendState, bool allowFramebufferRead,
case REPLACE_BLEND_BLUE_TO_ALPHA:
blueToAlpha = true;
blendState.enabled = gstate.isAlphaBlendEnabled();
blendState.blendEnabled = gstate.isAlphaBlendEnabled();
// We'll later convert the color blend to blend in the alpha channel.
break;
case REPLACE_BLEND_COPY_FBO:
blendState.enabled = true;
case REPLACE_BLEND_READ_FRAMEBUFFER:
blendState.blendEnabled = true;
blendState.applyFramebufferRead = true;
blendState.resetFramebufferRead = false;
blendState.replaceAlphaWithStencil = replaceAlphaWithStencil;
break;
case REPLACE_BLEND_PRE_SRC:
case REPLACE_BLEND_PRE_SRC_2X_ALPHA:
blendState.enabled = true;
blendState.blendEnabled = true;
usePreSrc = true;
break;
case REPLACE_BLEND_STANDARD:
case REPLACE_BLEND_2X_ALPHA:
case REPLACE_BLEND_2X_SRC:
blendState.enabled = true;
blendState.blendEnabled = true;
break;
}

View File

@ -39,7 +39,7 @@ enum ReplaceBlendType {
// Full blend equation runs in shader.
// We might have to make a copy of the framebuffer target to read from.
REPLACE_BLEND_COPY_FBO,
REPLACE_BLEND_READ_FRAMEBUFFER,
// Color blend mode and color gets copied to alpha blend mode.
REPLACE_BLEND_BLUE_TO_ALPHA,
@ -137,12 +137,18 @@ enum class BlendEq : uint8_t {
COUNT
};
// Computed blend setup, including shader stuff.
struct GenericBlendState {
bool enabled;
bool resetFramebufferRead;
bool applyFramebufferRead;
bool dirtyShaderBlendFixValues;
// Shader generation state
ReplaceAlphaType replaceAlphaWithStencil;
ReplaceBlendType replaceBlend;
// Resulting hardware blend state
bool blendEnabled;
BlendFactor srcColor;
BlendFactor dstColor;
@ -181,7 +187,7 @@ void ApplyStencilReplaceAndLogicOpIgnoreBlend(ReplaceAlphaType replaceAlphaWithS
struct GenericMaskState {
bool applyFramebufferRead;
uint32_t uniformMask; // For each bit, opposite to the PSP.
bool rgba[4]; // true = draw, false = don't draw this channel
bool maskRGBA[4]; // true = draw, false = don't draw this channel
};
void ConvertMaskState(GenericMaskState &maskState, bool allowFramebufferRead);
@ -197,9 +203,14 @@ struct GenericStencilFuncState {
GEStencilOp zFail;
GEStencilOp zPass;
};
void ConvertStencilFuncState(GenericStencilFuncState &stencilFuncState);
struct ComputedPipelineState {
GenericBlendState blendState;
GenericMaskState maskState;
// TODO: Add logic and possibly stencil here.
};
// See issue #15898
inline bool SpongebobDepthInverseConditions(const GenericStencilFuncState &stencilState) {
// Check that the depth/stencil state matches the conditions exactly.

View File

@ -246,7 +246,7 @@ std::string FragmentShaderDesc(const FShaderID &id) {
// Here we must take all the bits of the gstate that determine what the fragment shader will
// look like, and concatenate them together into an ID.
void ComputeFragmentShaderID(FShaderID *id_out, const Draw::Bugs &bugs) {
void ComputeFragmentShaderID(FShaderID *id_out, const ComputedPipelineState &pipelineState, const Draw::Bugs &bugs) {
FShaderID id;
if (gstate.isModeClear()) {
// We only need one clear shader, so let's ignore the rest of the bits.
@ -263,15 +263,15 @@ void ComputeFragmentShaderID(FShaderID *id_out, const Draw::Bugs &bugs) {
bool doFlatShading = gstate.getShadeMode() == GE_SHADE_FLAT;
bool useShaderDepal = gstate_c.useShaderDepal;
bool useSmoothedDepal = gstate_c.useSmoothedShaderDepal;
bool colorWriteMask = IsColorWriteMaskComplex(gstate_c.allowFramebufferRead);
bool colorWriteMask = pipelineState.maskState.applyFramebufferRead;
// Note how we here recompute some of the work already done in state mapping.
// Not ideal! At least we share the code.
ReplaceBlendType replaceBlend = ReplaceBlendWithShader(gstate_c.allowFramebufferRead, gstate_c.framebufFormat);
if (colorWriteMask) {
replaceBlend = REPLACE_BLEND_COPY_FBO;
}
ReplaceAlphaType stencilToAlpha = ReplaceAlphaWithStencil(replaceBlend);
ReplaceBlendType replaceBlend = pipelineState.blendState.replaceBlend;
ReplaceAlphaType stencilToAlpha = pipelineState.blendState.replaceAlphaWithStencil;
// For debugging, can probably delete soon.
// _assert_(colorWriteMask == IsColorWriteMaskComplex(gstate_c.allowFramebufferRead));
// _assert_(replaceBlend == ReplaceBlendWithShader(gstate_c.allowFramebufferRead, gstate_c.framebufFormat);
// _assert_(stencilToAlpha == ReplaceAlphaWithStencil(replaceBlend));
// All texfuncs except replace are the same for RGB as for RGBA with full alpha.
// Note that checking this means that we must dirty the fragment shader ID whenever textureFullAlpha changes.

View File

@ -226,11 +226,11 @@ namespace Draw {
class Bugs;
}
void ComputeVertexShaderID(VShaderID *id, uint32_t vertexType, bool useHWTransform, bool useHWTessellation, bool weightsAsFloat);
// Generates a compact string that describes the shader. Useful in a list to get an overview
// of the current flora of shaders.
std::string VertexShaderDesc(const VShaderID &id);
void ComputeFragmentShaderID(FShaderID *id, const Draw::Bugs &bugs);
struct ComputedPipelineState;
void ComputeFragmentShaderID(FShaderID *id, const ComputedPipelineState &pipelineState, const Draw::Bugs &bugs);
std::string FragmentShaderDesc(const FShaderID &id);

View File

@ -538,7 +538,7 @@ rotateVBO:
D3D11VertexShader *vshader;
D3D11FragmentShader *fshader;
shaderManager_->GetShaders(prim, lastVType_, &vshader, &fshader, useHWTransform, useHWTessellation_, decOptions_.expandAllWeightsToFloat);
shaderManager_->GetShaders(prim, lastVType_, &vshader, &fshader, pipelineState_, useHWTransform, useHWTessellation_, decOptions_.expandAllWeightsToFloat);
ID3D11InputLayout *inputLayout = SetupDecFmtForDraw(vshader, dec_->GetDecVtxFmt(), dec_->VertexType());
context_->PSSetShader(fshader->GetShader(), nullptr, 0);
context_->VSSetShader(vshader->GetShader(), nullptr, 0);
@ -648,7 +648,7 @@ rotateVBO:
if (result.action == SW_DRAW_PRIMITIVES) {
D3D11VertexShader *vshader;
D3D11FragmentShader *fshader;
shaderManager_->GetShaders(prim, lastVType_, &vshader, &fshader, false, false, decOptions_.expandAllWeightsToFloat);
shaderManager_->GetShaders(prim, lastVType_, &vshader, &fshader, pipelineState_, false, false, decOptions_.expandAllWeightsToFloat);
context_->PSSetShader(fshader->GetShader(), nullptr, 0);
context_->VSSetShader(vshader->GetShader(), nullptr, 0);
shaderManager_->UpdateUniforms(framebufferManager_->UseBufferedRendering());

View File

@ -177,7 +177,7 @@ void ShaderManagerD3D11::BindUniforms() {
context_->PSSetConstantBuffers(0, 1, ps_cbs);
}
void ShaderManagerD3D11::GetShaders(int prim, u32 vertType, D3D11VertexShader **vshader, D3D11FragmentShader **fshader, bool useHWTransform, bool useHWTessellation, bool weightsAsFloat) {
void ShaderManagerD3D11::GetShaders(int prim, u32 vertType, D3D11VertexShader **vshader, D3D11FragmentShader **fshader, const ComputedPipelineState &pipelineState, bool useHWTransform, bool useHWTessellation, bool weightsAsFloat) {
VShaderID VSID;
FShaderID FSID;
@ -190,7 +190,7 @@ void ShaderManagerD3D11::GetShaders(int prim, u32 vertType, D3D11VertexShader **
if (gstate_c.IsDirty(DIRTY_FRAGMENTSHADER_STATE)) {
gstate_c.Clean(DIRTY_FRAGMENTSHADER_STATE);
ComputeFragmentShaderID(&FSID, draw_->GetBugs());
ComputeFragmentShaderID(&FSID, pipelineState, draw_->GetBugs());
} else {
FSID = lastFSID_;
}

View File

@ -85,7 +85,7 @@ public:
ShaderManagerD3D11(Draw::DrawContext *draw, ID3D11Device *device, ID3D11DeviceContext *context, D3D_FEATURE_LEVEL featureLevel);
~ShaderManagerD3D11();
void GetShaders(int prim, u32 vertType, D3D11VertexShader **vshader, D3D11FragmentShader **fshader, bool useHWTransform, bool useHWTessellation, bool weightsAsFloat);
void GetShaders(int prim, u32 vertType, D3D11VertexShader **vshader, D3D11FragmentShader **fshader, const ComputedPipelineState &pipelineState, bool useHWTransform, bool useHWTessellation, bool weightsAsFloat);
void ClearShaders();
void DirtyLastShader() override;

View File

@ -154,11 +154,9 @@ void DrawEngineD3D11::ApplyDrawState(int prim) {
} else {
keys_.blend.value = 0;
GenericMaskState maskState;
GenericMaskState &maskState = pipelineState_.maskState;
GenericBlendState &blendState = pipelineState_.blendState;
ConvertMaskState(maskState, gstate_c.allowFramebufferRead);
// Set blend - unless we need to do it in the shader.
GenericBlendState blendState;
ConvertBlendState(blendState, gstate_c.allowFramebufferRead, maskState.applyFramebufferRead);
if (blendState.applyFramebufferRead || maskState.applyFramebufferRead) {
@ -186,7 +184,7 @@ void DrawEngineD3D11::ApplyDrawState(int prim) {
gstate_c.Dirty(DIRTY_FRAGMENTSHADER_STATE);
}
if (blendState.enabled) {
if (blendState.blendEnabled) {
keys_.blend.blendEnable = true;
keys_.blend.logicOpEnable = false;
keys_.blend.blendOpColor = d3d11BlendEqLookup[(size_t)blendState.eqColor];
@ -219,7 +217,7 @@ void DrawEngineD3D11::ApplyDrawState(int prim) {
}
}
keys_.blend.colorWriteMask = (maskState.rgba[0] ? 1 : 0) | (maskState.rgba[1] ? 2 : 0) | (maskState.rgba[2] ? 4 : 0) | (maskState.rgba[3] ? 8 : 0);
keys_.blend.colorWriteMask = (maskState.maskRGBA[0] ? 1 : 0) | (maskState.maskRGBA[1] ? 2 : 0) | (maskState.maskRGBA[2] ? 4 : 0) | (maskState.maskRGBA[3] ? 8 : 0);
}
}

View File

@ -522,7 +522,7 @@ rotateVBO:
ApplyDrawState(prim);
ApplyDrawStateLate();
VSShader *vshader = shaderManager_->ApplyShader(true, useHWTessellation_, lastVType_, decOptions_.expandAllWeightsToFloat);
VSShader *vshader = shaderManager_->ApplyShader(true, useHWTessellation_, lastVType_, decOptions_.expandAllWeightsToFloat, pipelineState_);
IDirect3DVertexDeclaration9 *pHardwareVertexDecl = SetupDecFmtForDraw(vshader, dec_->GetDecVtxFmt(), dec_->VertexType());
if (pHardwareVertexDecl) {
@ -613,7 +613,7 @@ rotateVBO:
ApplyDrawStateLate();
VSShader *vshader = shaderManager_->ApplyShader(false, false, lastVType_, decOptions_.expandAllWeightsToFloat);
VSShader *vshader = shaderManager_->ApplyShader(false, false, lastVType_, decOptions_.expandAllWeightsToFloat, pipelineState_);
if (result.action == SW_DRAW_PRIMITIVES) {
if (result.setStencil) {

View File

@ -544,7 +544,7 @@ void ShaderManagerDX9::DirtyLastShader() { // disables vertex arrays
lastPShader_ = nullptr;
}
VSShader *ShaderManagerDX9::ApplyShader(bool useHWTransform, bool useHWTessellation, u32 vertType, bool weightsAsFloat) {
VSShader *ShaderManagerDX9::ApplyShader(bool useHWTransform, bool useHWTessellation, u32 vertType, bool weightsAsFloat, const ComputedPipelineState &pipelineState) {
VShaderID VSID;
if (gstate_c.IsDirty(DIRTY_VERTEXSHADER_STATE)) {
gstate_c.Clean(DIRTY_VERTEXSHADER_STATE);
@ -556,7 +556,7 @@ VSShader *ShaderManagerDX9::ApplyShader(bool useHWTransform, bool useHWTessellat
FShaderID FSID;
if (gstate_c.IsDirty(DIRTY_FRAGMENTSHADER_STATE)) {
gstate_c.Clean(DIRTY_FRAGMENTSHADER_STATE);
ComputeFragmentShaderID(&FSID, draw_->GetBugs());
ComputeFragmentShaderID(&FSID, pipelineState, draw_->GetBugs());
} else {
FSID = lastFSID_;
}

View File

@ -78,7 +78,7 @@ public:
~ShaderManagerDX9();
void ClearCache(bool deleteThem); // TODO: deleteThem currently not respected
VSShader *ApplyShader(bool useHWTransform, bool useHWTessellation, u32 vertType, bool weightsAsFloat);
VSShader *ApplyShader(bool useHWTransform, bool useHWTessellation, u32 vertType, bool weightsAsFloat, const ComputedPipelineState &pipelineState);
void DirtyShader();
void DirtyLastShader() override;

View File

@ -132,11 +132,9 @@ void DrawEngineDX9::ApplyDrawState(int prim) {
}
dxstate.colorMask.set(mask);
} else {
GenericMaskState maskState;
GenericMaskState &maskState = pipelineState_.maskState;
GenericBlendState &blendState = pipelineState_.blendState;
ConvertMaskState(maskState, gstate_c.allowFramebufferRead);
// Set blend - unless we need to do it in the shader.
GenericBlendState blendState;
ConvertBlendState(blendState, gstate_c.allowFramebufferRead, maskState.applyFramebufferRead);
if (blendState.applyFramebufferRead || maskState.applyFramebufferRead) {
@ -162,7 +160,7 @@ void DrawEngineDX9::ApplyDrawState(int prim) {
ResetFramebufferRead();
}
if (blendState.enabled) {
if (blendState.blendEnabled) {
dxstate.blend.enable();
dxstate.blendSeparate.enable();
dxstate.blendEquation.set(dxBlendEqLookup[(size_t)blendState.eqColor], dxBlendEqLookup[(size_t)blendState.eqAlpha]);
@ -182,7 +180,7 @@ void DrawEngineDX9::ApplyDrawState(int prim) {
u32 mask = 0;
for (int i = 0; i < 4; i++) {
if (maskState.rgba[i])
if (maskState.maskRGBA[i])
mask |= 1 << i;
}
dxstate.colorMask.set(mask);

View File

@ -315,7 +315,7 @@ void DrawEngineGLES::DoFlush() {
ApplyDrawState(prim);
ApplyDrawStateLate(false, 0);
LinkedShader *program = shaderManager_->ApplyFragmentShader(vsid, vshader, lastVType_, framebufferManager_->UseBufferedRendering());
LinkedShader *program = shaderManager_->ApplyFragmentShader(vsid, vshader, pipelineState_, lastVType_, framebufferManager_->UseBufferedRendering());
GLRInputLayout *inputLayout = SetupDecFmtForDraw(program, dec_->GetDecVtxFmt());
render_->BindVertexBuffer(inputLayout, vertexBuffer, vertexBufferOffset);
if (useElements) {
@ -405,7 +405,7 @@ void DrawEngineGLES::DoFlush() {
ApplyDrawStateLate(result.setStencil, result.stencilValue);
shaderManager_->ApplyFragmentShader(vsid, vshader, lastVType_, framebufferManager_->UseBufferedRendering());
shaderManager_->ApplyFragmentShader(vsid, vshader, pipelineState_, lastVType_, framebufferManager_->UseBufferedRendering());
if (result.action == SW_DRAW_PRIMITIVES) {
if (result.drawIndexed) {

View File

@ -773,7 +773,7 @@ Shader *ShaderManagerGLES::ApplyVertexShader(bool useHWTransform, bool useHWTess
return vs;
}
LinkedShader *ShaderManagerGLES::ApplyFragmentShader(VShaderID VSID, Shader *vs, u32 vertType, bool useBufferedRendering) {
LinkedShader *ShaderManagerGLES::ApplyFragmentShader(VShaderID VSID, Shader *vs, const ComputedPipelineState &pipelineState, u32 vertType, bool useBufferedRendering) {
uint64_t dirty = gstate_c.GetDirtyUniforms();
if (dirty) {
if (lastShader_)
@ -785,7 +785,7 @@ LinkedShader *ShaderManagerGLES::ApplyFragmentShader(VShaderID VSID, Shader *vs,
FShaderID FSID;
if (gstate_c.IsDirty(DIRTY_FRAGMENTSHADER_STATE)) {
gstate_c.Clean(DIRTY_FRAGMENTSHADER_STATE);
ComputeFragmentShaderID(&FSID, draw_->GetBugs());
ComputeFragmentShaderID(&FSID, pipelineState, draw_->GetBugs());
} else {
FSID = lastFSID_;
}

View File

@ -159,7 +159,7 @@ public:
// This is the old ApplyShader split into two parts, because of annoying information dependencies.
// If you call ApplyVertexShader, you MUST call ApplyFragmentShader soon afterwards.
Shader *ApplyVertexShader(bool useHWTransform, bool useHWTessellation, u32 vertType, bool weightsAsFloat, VShaderID *VSID);
LinkedShader *ApplyFragmentShader(VShaderID VSID, Shader *vs, u32 vertType, bool useBufferedRendering);
LinkedShader *ApplyFragmentShader(VShaderID VSID, Shader *vs, const ComputedPipelineState &pipelineState, u32 vertType, bool useBufferedRendering);
void DeviceLost();
void DeviceRestore(Draw::DrawContext *draw);

View File

@ -153,13 +153,9 @@ void DrawEngineGLES::ApplyDrawState(int prim) {
bool alphaMask = gstate.isClearModeAlphaMask();
renderManager->SetNoBlendAndMask((colorMask ? 7 : 0) | (alphaMask ? 8 : 0));
} else {
// Do the large chunks of state conversion. We might be able to hide these two behind a dirty-flag each,
// to avoid recomputing heavy stuff unnecessarily every draw call.
GenericMaskState maskState;
GenericMaskState &maskState = pipelineState_.maskState;
GenericBlendState &blendState = pipelineState_.blendState;
ConvertMaskState(maskState, gstate_c.allowFramebufferRead);
GenericBlendState blendState;
ConvertBlendState(blendState, gstate_c.allowFramebufferRead, maskState.applyFramebufferRead);
if (blendState.applyFramebufferRead || maskState.applyFramebufferRead) {
@ -189,7 +185,7 @@ void DrawEngineGLES::ApplyDrawState(int prim) {
gstate_c.Dirty(DIRTY_FRAGMENTSHADER_STATE);
}
if (blendState.enabled) {
if (blendState.blendEnabled) {
if (blendState.dirtyShaderBlendFixValues) {
// Not quite sure how necessary this is.
dirtyRequiresRecheck_ |= DIRTY_SHADERBLEND;
@ -203,9 +199,9 @@ void DrawEngineGLES::ApplyDrawState(int prim) {
}
}
int mask = (int)maskState.rgba[0] | ((int)maskState.rgba[1] << 1) | ((int)maskState.rgba[2] << 2) | ((int)maskState.rgba[3] << 3);
if (blendState.enabled) {
renderManager->SetBlendAndMask(mask, blendState.enabled,
int mask = (int)maskState.maskRGBA[0] | ((int)maskState.maskRGBA[1] << 1) | ((int)maskState.maskRGBA[2] << 2) | ((int)maskState.maskRGBA[3] << 3);
if (blendState.blendEnabled) {
renderManager->SetBlendAndMask(mask, blendState.blendEnabled,
glBlendFactorLookup[(size_t)blendState.srcColor], glBlendFactorLookup[(size_t)blendState.dstColor],
glBlendFactorLookup[(size_t)blendState.srcAlpha], glBlendFactorLookup[(size_t)blendState.dstAlpha],
glBlendEqLookup[(size_t)blendState.eqColor], glBlendEqLookup[(size_t)blendState.eqAlpha]);

View File

@ -761,7 +761,7 @@ void DrawEngineVulkan::DoFlush() {
ConvertStateToVulkanKey(*framebufferManager_, shaderManager_, prim, pipelineKey_, dynState_);
}
shaderManager_->GetShaders(prim, lastVType_, &vshader, &fshader, true, useHWTessellation_, decOptions_.expandAllWeightsToFloat); // usehwtransform
shaderManager_->GetShaders(prim, lastVType_, &vshader, &fshader, pipelineState_, true, useHWTessellation_, decOptions_.expandAllWeightsToFloat); // usehwtransform
if (!vshader) {
// We're screwed.
return;
@ -890,11 +890,11 @@ void DrawEngineVulkan::DoFlush() {
sampler = nullSampler_;
}
if (!lastPipeline_ || gstate_c.IsDirty(DIRTY_BLEND_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_RASTER_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE) || prim != lastPrim_) {
shaderManager_->GetShaders(prim, lastVType_, &vshader, &fshader, false, false, decOptions_.expandAllWeightsToFloat); // usehwtransform
_dbg_assert_msg_(!vshader->UseHWTransform(), "Bad vshader");
if (prim != lastPrim_ || gstate_c.IsDirty(DIRTY_BLEND_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_RASTER_STATE | DIRTY_DEPTHSTENCIL_STATE)) {
ConvertStateToVulkanKey(*framebufferManager_, shaderManager_, prim, pipelineKey_, dynState_);
}
shaderManager_->GetShaders(prim, lastVType_, &vshader, &fshader, pipelineState_, false, false, decOptions_.expandAllWeightsToFloat); // usehwtransform
_dbg_assert_msg_(!vshader->UseHWTransform(), "Bad vshader");
Draw::NativeObject object = framebufferManager_->UseBufferedRendering() ? Draw::NativeObject::FRAMEBUFFER_RENDERPASS : Draw::NativeObject::BACKBUFFER_RENDERPASS;
VkRenderPass renderPass = (VkRenderPass)draw_->GetNativeObject(object);
VulkanPipeline *pipeline = pipelineManager_->GetOrCreatePipeline(renderManager, pipelineLayout_, renderPass, pipelineKey_, &dec_->decFmt, vshader, fshader, false);

View File

@ -217,7 +217,7 @@ uint64_t ShaderManagerVulkan::UpdateUniforms(bool useBufferedRendering) {
return dirty;
}
void ShaderManagerVulkan::GetShaders(int prim, u32 vertType, VulkanVertexShader **vshader, VulkanFragmentShader **fshader, bool useHWTransform, bool useHWTessellation, bool weightsAsFloat) {
void ShaderManagerVulkan::GetShaders(int prim, u32 vertType, VulkanVertexShader **vshader, VulkanFragmentShader **fshader, const ComputedPipelineState &pipelineState, bool useHWTransform, bool useHWTessellation, bool weightsAsFloat) {
VShaderID VSID;
if (gstate_c.IsDirty(DIRTY_VERTEXSHADER_STATE)) {
gstate_c.Clean(DIRTY_VERTEXSHADER_STATE);
@ -229,7 +229,7 @@ void ShaderManagerVulkan::GetShaders(int prim, u32 vertType, VulkanVertexShader
FShaderID FSID;
if (gstate_c.IsDirty(DIRTY_FRAGMENTSHADER_STATE)) {
gstate_c.Clean(DIRTY_FRAGMENTSHADER_STATE);
ComputeFragmentShaderID(&FSID, draw_->GetBugs());
ComputeFragmentShaderID(&FSID, pipelineState, draw_->GetBugs());
} else {
FSID = lastFSID_;
}

View File

@ -89,7 +89,7 @@ public:
void DeviceLost();
void DeviceRestore(Draw::DrawContext *draw);
void GetShaders(int prim, u32 vertType, VulkanVertexShader **vshader, VulkanFragmentShader **fshader, bool useHWTransform, bool useHWTessellation, bool weightsAsFloat);
void GetShaders(int prim, u32 vertType, VulkanVertexShader **vshader, VulkanFragmentShader **fshader, const ComputedPipelineState &pipelineState, bool useHWTransform, bool useHWTessellation, bool weightsAsFloat);
void ClearShaders();
void DirtyShader();
void DirtyLastShader() override;

View File

@ -127,7 +127,6 @@ void DrawEngineVulkan::ResetFramebufferRead() {
fboTexBound_ = false;
}
// TODO: Do this more progressively. No need to compute the entire state if the entire state hasn't changed.
// In Vulkan, we simply collect all the state together into a "pipeline key" - we don't actually set any state here
// (the caller is responsible for setting the little dynamic state that is supported, dynState).
void DrawEngineVulkan::ConvertStateToVulkanKey(FramebufferManagerVulkan &fbManager, ShaderManagerVulkan *shaderManager, int prim, VulkanPipelineRasterStateKey &key, VulkanDynamicState &dynState) {
@ -162,16 +161,15 @@ void DrawEngineVulkan::ConvertStateToVulkanKey(FramebufferManagerVulkan &fbManag
key.logicOp = VK_LOGIC_OP_CLEAR;
}
GenericMaskState maskState;
GenericMaskState &maskState = pipelineState_.maskState;
GenericBlendState &blendState = pipelineState_.blendState;
ConvertMaskState(maskState, gstate_c.allowFramebufferRead);
// Set blend - unless we need to do it in the shader.
GenericBlendState blendState;
ConvertBlendState(blendState, gstate_c.allowFramebufferRead, maskState.applyFramebufferRead);
if (blendState.applyFramebufferRead || maskState.applyFramebufferRead) {
ApplyFramebufferRead(&fboTexNeedsBind_);
// The shader takes over the responsibility for blending, so recompute.
// We might still end up using blend to write something to alpha.
ApplyStencilReplaceAndLogicOpIgnoreBlend(blendState.replaceAlphaWithStencil, blendState);
dirtyRequiresRecheck_ |= DIRTY_FRAGMENTSHADER_STATE;
gstate_c.Dirty(DIRTY_FRAGMENTSHADER_STATE);
@ -181,7 +179,7 @@ void DrawEngineVulkan::ConvertStateToVulkanKey(FramebufferManagerVulkan &fbManag
gstate_c.Dirty(DIRTY_FRAGMENTSHADER_STATE);
}
if (blendState.enabled) {
if (blendState.blendEnabled) {
key.blendEnable = true;
key.blendOpColor = vkBlendEqLookup[(size_t)blendState.eqColor];
key.blendOpAlpha = vkBlendEqLookup[(size_t)blendState.eqAlpha];
@ -209,10 +207,10 @@ void DrawEngineVulkan::ConvertStateToVulkanKey(FramebufferManagerVulkan &fbManag
}
key.colorWriteMask =
(maskState.rgba[0] ? VK_COLOR_COMPONENT_R_BIT : 0) |
(maskState.rgba[1] ? VK_COLOR_COMPONENT_G_BIT : 0) |
(maskState.rgba[2] ? VK_COLOR_COMPONENT_B_BIT : 0) |
(maskState.rgba[3] ? VK_COLOR_COMPONENT_A_BIT : 0);
(maskState.maskRGBA[0] ? VK_COLOR_COMPONENT_R_BIT : 0) |
(maskState.maskRGBA[1] ? VK_COLOR_COMPONENT_G_BIT : 0) |
(maskState.maskRGBA[2] ? VK_COLOR_COMPONENT_B_BIT : 0) |
(maskState.maskRGBA[3] ? VK_COLOR_COMPONENT_A_BIT : 0);
// Workaround proposed in #10421, for bug where the color write mask is not applied correctly on Adreno.
if ((gstate.pmskc & 0x00FFFFFF) == 0x00FFFFFF && g_Config.bVendorBugChecksEnabled && draw_->GetBugs().Has(Draw::Bugs::COLORWRITEMASK_BROKEN_WITH_DEPTHTEST)) {