mirror of
https://github.com/libretro/ppsspp.git
synced 2025-01-31 13:52:21 +00:00
GPU: Apply color test after doubling.
This commit is contained in:
parent
8cdead90f9
commit
703181607e
@ -233,7 +233,7 @@ void ComputeFragmentShaderID(ShaderID *id_out) {
|
||||
bool enableFog = gstate.isFogEnabled() && !isModeThrough;
|
||||
bool enableAlphaTest = gstate.isAlphaTestEnabled() && !IsAlphaTestTriviallyTrue();
|
||||
bool enableColorTest = gstate.isColorTestEnabled() && !IsColorTestTriviallyTrue();
|
||||
bool enableColorDoubling = gstate.isColorDoublingEnabled() && gstate.isTextureMapEnabled();
|
||||
bool enableColorDoubling = gstate.isColorDoublingEnabled() && gstate.isTextureMapEnabled() && gstate.getTextureFunction() == GE_TEXFUNC_MODULATE;
|
||||
bool doTextureProjection = (gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_MATRIX && MatrixNeedsProjection(gstate.tgenMatrix));
|
||||
bool doTextureAlpha = gstate.isTextureAlphaUsed();
|
||||
bool doFlatShading = gstate.getShadeMode() == GE_SHADE_FLAT;
|
||||
|
@ -304,6 +304,11 @@ bool GenerateFragmentShaderHLSL(const FShaderID &id, char *buffer, ShaderLanguag
|
||||
}
|
||||
}
|
||||
if (enableColorTest) {
|
||||
// Color doubling happens before the color test, but we try to optimize doubling when test is off.
|
||||
if (enableColorDoubling) {
|
||||
WRITE(p, " v.rgb = v.rgb * 2.0;\n");
|
||||
}
|
||||
|
||||
if (colorTestAgainstZero) {
|
||||
// When testing against 0 (common), we can avoid some math.
|
||||
// 0.002 is approximately half of 1.0 / 255.0.
|
||||
@ -322,14 +327,14 @@ bool GenerateFragmentShaderHLSL(const FShaderID &id, char *buffer, ShaderLanguag
|
||||
if (colorTestFuncs[colorTestFunc][0] != '#') {
|
||||
const char *test = colorTestFuncs[colorTestFunc];
|
||||
if (lang == HLSL_D3D11) {
|
||||
WRITE(p, " uint3 v_scaled = roundAndScaleTo255iv(v.rgb);\n");
|
||||
WRITE(p, " uint3 v_scaled = roundAndScaleTo255iv(clamp(v.rgb, 0.0, 1.0));\n");
|
||||
WRITE(p, " uint3 v_masked = v_scaled & u_alphacolormask.rgb;\n");
|
||||
WRITE(p, " uint3 colorTestRef = u_alphacolorref.rgb & u_alphacolormask.rgb;\n");
|
||||
// We have to test the components separately, or we get incorrect results. See #10629.
|
||||
WRITE(p, " if (v_masked.r %s colorTestRef.r && v_masked.g %s colorTestRef.g && v_masked.b %s colorTestRef.b) discard;\n", test, test, test);
|
||||
} else {
|
||||
// TODO: Use a texture to lookup bitwise ops instead?
|
||||
WRITE(p, " float3 colortest = roundAndScaleTo255v(v.rgb);\n");
|
||||
WRITE(p, " float3 colortest = roundAndScaleTo255v(clamp(v.rgb, 0.0, 1.0));\n");
|
||||
WRITE(p, " if ((colortest.r %s u_alphacolorref.r) && (colortest.g %s u_alphacolorref.g) && (colortest.b %s u_alphacolorref.b)) clip(-1);\n", test, test, test);
|
||||
}
|
||||
}
|
||||
@ -337,13 +342,17 @@ bool GenerateFragmentShaderHLSL(const FShaderID &id, char *buffer, ShaderLanguag
|
||||
WRITE(p, lang == HLSL_DX9 ? " clip(-1);\n" : " discard;\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Color doubling happens after the color test.
|
||||
if (enableColorDoubling && replaceBlend == REPLACE_BLEND_2X_SRC) {
|
||||
WRITE(p, " v.rgb = v.rgb * 4.0;\n");
|
||||
} else if (enableColorDoubling || replaceBlend == REPLACE_BLEND_2X_SRC) {
|
||||
WRITE(p, " v.rgb = v.rgb * 2.0;\n");
|
||||
if (replaceBlend == REPLACE_BLEND_2X_SRC) {
|
||||
WRITE(p, " v.rgb = v.rgb * 2.0;\n");
|
||||
}
|
||||
} else {
|
||||
// If there's no color test, we can potentially double and replace blend at once.
|
||||
if (enableColorDoubling && replaceBlend == REPLACE_BLEND_2X_SRC) {
|
||||
WRITE(p, " v.rgb = v.rgb * 4.0;\n");
|
||||
} else if (enableColorDoubling || replaceBlend == REPLACE_BLEND_2X_SRC) {
|
||||
WRITE(p, " v.rgb = v.rgb * 2.0;\n");
|
||||
}
|
||||
}
|
||||
|
||||
if (enableFog) {
|
||||
|
@ -558,6 +558,14 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, uint64_t *uniform
|
||||
}
|
||||
|
||||
if (enableColorTest) {
|
||||
// Color doubling happens before the color test, but we try to optimize doubling when test is off.
|
||||
if (enableColorDoubling) {
|
||||
WRITE(p, " v.rgb = v.rgb * 2.0;\n");
|
||||
if (g_Config.bFragmentTestCache && !colorTestAgainstZero) {
|
||||
WRITE(p, " vScale256.rgb = vScale256.rgb * 2.0;\n");
|
||||
}
|
||||
}
|
||||
|
||||
if (colorTestAgainstZero) {
|
||||
// When testing against 0 (common), we can avoid some math.
|
||||
// 0.002 is approximately half of 1.0 / 255.0.
|
||||
@ -576,7 +584,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, uint64_t *uniform
|
||||
WRITE(p, " float gResult = %s(testtex, vec2(vScale256.g, 0)).g;\n", texture);
|
||||
WRITE(p, " float bResult = %s(testtex, vec2(vScale256.b, 0)).b;\n", texture);
|
||||
if (colorTestFunc == GE_COMP_EQUAL) {
|
||||
// Equal means all parts must be equal.
|
||||
// Equal means all parts must be equal (so discard if any is not.)
|
||||
WRITE(p, " if (rResult < 0.5 || gResult < 0.5 || bResult < 0.5) %s\n", discardStatement);
|
||||
} else {
|
||||
// Not equal means any part must be not equal.
|
||||
@ -587,7 +595,7 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, uint64_t *uniform
|
||||
if (colorTestFuncs[colorTestFunc][0] != '#') {
|
||||
if (bitwiseOps) {
|
||||
// Apparently GLES3 does not support vector bitwise ops.
|
||||
WRITE(p, " ivec3 v_scaled = roundAndScaleTo255iv(v.rgb);\n");
|
||||
WRITE(p, " ivec3 v_scaled = roundAndScaleTo255iv(clamp(v.rgb, 0.0, 1.0));\n");
|
||||
const char *maskedFragColor = "ivec3(v_scaled.r & u_alphacolormask.r, v_scaled.g & u_alphacolormask.g, v_scaled.b & u_alphacolormask.b)";
|
||||
const char *maskedColorRef = "ivec3(int(u_alphacolorref.r) & u_alphacolormask.r, int(u_alphacolorref.g) & u_alphacolormask.g, int(u_alphacolorref.b) & u_alphacolormask.b)";
|
||||
WRITE(p, " if (%s %s %s) %s\n", maskedFragColor, colorTestFuncs[colorTestFunc], maskedColorRef, discardStatement);
|
||||
@ -600,13 +608,17 @@ bool GenerateFragmentShader(const FShaderID &id, char *buffer, uint64_t *uniform
|
||||
WRITE(p, " %s\n", discardStatement);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Color doubling happens after the color test.
|
||||
if (enableColorDoubling && replaceBlend == REPLACE_BLEND_2X_SRC) {
|
||||
WRITE(p, " v.rgb = v.rgb * 4.0;\n");
|
||||
} else if (enableColorDoubling || replaceBlend == REPLACE_BLEND_2X_SRC) {
|
||||
WRITE(p, " v.rgb = v.rgb * 2.0;\n");
|
||||
if (replaceBlend == REPLACE_BLEND_2X_SRC) {
|
||||
WRITE(p, " v.rgb = v.rgb * 2.0;\n");
|
||||
}
|
||||
} else {
|
||||
// If there's no color test, we can potentially double and replace blend at once.
|
||||
if (enableColorDoubling && replaceBlend == REPLACE_BLEND_2X_SRC) {
|
||||
WRITE(p, " v.rgb = v.rgb * 4.0;\n");
|
||||
} else if (enableColorDoubling || replaceBlend == REPLACE_BLEND_2X_SRC) {
|
||||
WRITE(p, " v.rgb = v.rgb * 2.0;\n");
|
||||
}
|
||||
}
|
||||
|
||||
if (enableFog) {
|
||||
|
@ -81,6 +81,8 @@ void FragmentTestCacheGLES::BindTestTexture(int slot) {
|
||||
GLRTexture *tex = CreateTestTexture(funcs, refs, masks, valid);
|
||||
lastTexture_ = tex;
|
||||
render_->BindTexture(slot, tex);
|
||||
// We only need to do this once for the texture.
|
||||
render_->SetTextureSampler(slot, GL_CLAMP_TO_EDGE, GL_CLAMP_TO_EDGE, GL_NEAREST, GL_NEAREST, 0.0f);
|
||||
FragmentTestTexture item;
|
||||
item.lastFrame = gpuStats.numFlips;
|
||||
item.texture = tex;
|
||||
|
@ -622,7 +622,13 @@ static inline Vec4<int> GetTextureFunctionOutput(const Vec4<int>& prim_color, co
|
||||
// We can be accurate up to 24 bit integers, should be enough.
|
||||
const __m128 p = _mm_cvtepi32_ps(prim_color.ivec);
|
||||
const __m128 t = _mm_cvtepi32_ps(texcolor.ivec);
|
||||
out_rgb.ivec = _mm_cvtps_epi32(_mm_div_ps(_mm_mul_ps(p, t), _mm_set_ps1(255.0f)));
|
||||
const __m128 b = _mm_mul_ps(p, t);
|
||||
if (gstate.isColorDoublingEnabled()) {
|
||||
// We double right here, only for modulate. Other tex funcs do not color double.
|
||||
out_rgb.ivec = _mm_cvtps_epi32(_mm_mul_ps(b, _mm_set_ps1(2.0f / 255.0f)));
|
||||
} else {
|
||||
out_rgb.ivec = _mm_cvtps_epi32(_mm_mul_ps(b, _mm_set_ps1(1.0f / 255.0f)));
|
||||
}
|
||||
|
||||
if (rgba) {
|
||||
return Vec4<int>(out_rgb.ivec);
|
||||
@ -630,7 +636,11 @@ static inline Vec4<int> GetTextureFunctionOutput(const Vec4<int>& prim_color, co
|
||||
out_a = prim_color.a();
|
||||
}
|
||||
#else
|
||||
out_rgb = prim_color.rgb() * texcolor.rgb() / 255;
|
||||
if (gstate.isColorDoublingEnabled()) {
|
||||
out_rgb = (prim_color.rgb() * texcolor.rgb() * 2) / 255;
|
||||
} else {
|
||||
out_rgb = prim_color.rgb() * texcolor.rgb() / 255;
|
||||
}
|
||||
out_a = (rgba) ? (prim_color.a() * texcolor.a() / 255) : prim_color.a();
|
||||
#endif
|
||||
break;
|
||||
@ -894,7 +904,7 @@ static inline Vec3<int> AlphaBlendingResult(const Vec4<int> &source, const Vec4<
|
||||
|
||||
template <bool clearMode>
|
||||
inline void DrawSinglePixel(const DrawingCoords &p, u16 z, u8 fog, const Vec4<int> &color_in) {
|
||||
Vec4<int> prim_color = color_in;
|
||||
Vec4<int> prim_color = color_in.Clamp(0, 255);
|
||||
// Depth range test - applied in clear mode, if not through mode.
|
||||
if (!gstate.isModeThrough())
|
||||
if (z < gstate.getDepthRangeMin() || z > gstate.getDepthRangeMax())
|
||||
@ -935,14 +945,6 @@ inline void DrawSinglePixel(const DrawingCoords &p, u16 z, u8 fog, const Vec4<in
|
||||
SetPixelDepth(p.x, p.y, z);
|
||||
}
|
||||
|
||||
// Doubling happens only when texturing is enabled, and after tests.
|
||||
if (gstate.isTextureMapEnabled() && gstate.isColorDoublingEnabled() && !clearMode) {
|
||||
// TODO: Does this need to be clamped before blending?
|
||||
prim_color.r() <<= 1;
|
||||
prim_color.g() <<= 1;
|
||||
prim_color.b() <<= 1;
|
||||
}
|
||||
|
||||
if (gstate.isFogEnabled() && !gstate.isModeThrough() && !clearMode) {
|
||||
Vec3<int> fogColor = Vec3<int>::FromRGB(gstate.fogcolor);
|
||||
fogColor = (prim_color.rgb() * (int)fog + fogColor * (255 - (int)fog)) / 255;
|
||||
|
@ -375,6 +375,11 @@ bool GenerateVulkanGLSLFragmentShader(const FShaderID &id, char *buffer) {
|
||||
}
|
||||
|
||||
if (enableColorTest) {
|
||||
// Color doubling happens before the color test, but we try to optimize doubling when test is off.
|
||||
if (enableColorDoubling) {
|
||||
WRITE(p, " v.rgb = v.rgb * 2.0;\n");
|
||||
}
|
||||
|
||||
if (colorTestAgainstZero) {
|
||||
// When testing against 0 (common), we can avoid some math.
|
||||
// Have my doubts that this special case is actually worth it, but whatever.
|
||||
@ -392,19 +397,23 @@ bool GenerateVulkanGLSLFragmentShader(const FShaderID &id, char *buffer) {
|
||||
} else {
|
||||
const char *colorTestFuncs[] = { "#", "#", " != ", " == " };
|
||||
if (colorTestFuncs[colorTestFunc][0] != '#') {
|
||||
WRITE(p, " ivec3 v_scaled = roundAndScaleTo255iv(v.rgb);\n");
|
||||
WRITE(p, " ivec3 v_scaled = roundAndScaleTo255iv(clamp(v.rgb, 0.0, 1.0));\n");
|
||||
WRITE(p, " if ((v_scaled & base.alphacolormask.rgb) %s (base.alphacolorref.rgb & base.alphacolormask.rgb)) %s\n", colorTestFuncs[colorTestFunc], discardStatement);
|
||||
} else {
|
||||
WRITE(p, " %s\n", discardStatement);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Color doubling happens after the color test.
|
||||
if (enableColorDoubling && replaceBlend == REPLACE_BLEND_2X_SRC) {
|
||||
WRITE(p, " v.rgb = v.rgb * 4.0;\n");
|
||||
} else if (enableColorDoubling || replaceBlend == REPLACE_BLEND_2X_SRC) {
|
||||
WRITE(p, " v.rgb = v.rgb * 2.0;\n");
|
||||
if (replaceBlend == REPLACE_BLEND_2X_SRC) {
|
||||
WRITE(p, " v.rgb = v.rgb * 2.0;\n");
|
||||
}
|
||||
} else {
|
||||
// If there's no color test, we can potentially double and replace blend at once.
|
||||
if (enableColorDoubling && replaceBlend == REPLACE_BLEND_2X_SRC) {
|
||||
WRITE(p, " v.rgb = v.rgb * 4.0;\n");
|
||||
} else if (enableColorDoubling || replaceBlend == REPLACE_BLEND_2X_SRC) {
|
||||
WRITE(p, " v.rgb = v.rgb * 2.0;\n");
|
||||
}
|
||||
}
|
||||
|
||||
if (enableFog) {
|
||||
|
@ -373,6 +373,7 @@ int main(int argc, const char* argv[])
|
||||
g_Config.iSplineBezierQuality = 2;
|
||||
g_Config.bHighQualityDepth = true;
|
||||
g_Config.bMemStickInserted = true;
|
||||
g_Config.bFragmentTestCache = true;
|
||||
|
||||
#ifdef _WIN32
|
||||
InitSysDirectories();
|
||||
|
Loading…
x
Reference in New Issue
Block a user