mirror of
https://github.com/hrydgard/ppsspp.git
synced 2024-11-23 21:39:52 +00:00
Combined two uniforms to get the base UBO down to 512b, in order to not waste space (nVidia needs 256-byte alignment)
This commit is contained in:
parent
c33c3cf3d4
commit
827481d41d
@ -398,7 +398,7 @@ bool GenerateVulkanGLSLFragmentShader(const ShaderID &id, char *buffer) {
|
|||||||
if (stencilToAlpha != REPLACE_ALPHA_NO) {
|
if (stencilToAlpha != REPLACE_ALPHA_NO) {
|
||||||
switch (replaceAlphaWithStencilType) {
|
switch (replaceAlphaWithStencilType) {
|
||||||
case STENCIL_VALUE_UNIFORM:
|
case STENCIL_VALUE_UNIFORM:
|
||||||
replacedAlpha = "base.stencilReplaceValue";
|
replacedAlpha = "base.fogcoef_stencilreplace.z";
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case STENCIL_VALUE_ZERO:
|
case STENCIL_VALUE_ZERO:
|
||||||
|
@ -169,6 +169,10 @@ ShaderManagerVulkan::ShaderManagerVulkan(VulkanContext *vulkan)
|
|||||||
memset(&ub_base, 0, sizeof(ub_base));
|
memset(&ub_base, 0, sizeof(ub_base));
|
||||||
memset(&ub_lights, 0, sizeof(ub_lights));
|
memset(&ub_lights, 0, sizeof(ub_lights));
|
||||||
memset(&ub_bones, 0, sizeof(ub_bones));
|
memset(&ub_bones, 0, sizeof(ub_bones));
|
||||||
|
|
||||||
|
ILOG("sizeof(ub_base): %d", (int)sizeof(ub_base));
|
||||||
|
ILOG("sizeof(ub_lights): %d", (int)sizeof(ub_lights));
|
||||||
|
ILOG("sizeof(ub_bones): %d", (int)sizeof(ub_bones));
|
||||||
}
|
}
|
||||||
|
|
||||||
ShaderManagerVulkan::~ShaderManagerVulkan() {
|
ShaderManagerVulkan::~ShaderManagerVulkan() {
|
||||||
@ -202,9 +206,6 @@ void ShaderManagerVulkan::BaseUpdateUniforms(int dirtyUniforms) {
|
|||||||
if (dirtyUniforms & DIRTY_FOGCOLOR) {
|
if (dirtyUniforms & DIRTY_FOGCOLOR) {
|
||||||
Uint8x3ToFloat4(ub_base.fogColor, gstate.fogcolor);
|
Uint8x3ToFloat4(ub_base.fogColor, gstate.fogcolor);
|
||||||
}
|
}
|
||||||
if (dirtyUniforms & DIRTY_STENCILREPLACEVALUE) {
|
|
||||||
Uint8x1ToFloat4(ub_base.stencilReplace, gstate.getStencilTestRef());
|
|
||||||
}
|
|
||||||
if (dirtyUniforms & DIRTY_SHADERBLEND) {
|
if (dirtyUniforms & DIRTY_SHADERBLEND) {
|
||||||
Uint8x3ToFloat4(ub_base.blendFixA, gstate.getFixA());
|
Uint8x3ToFloat4(ub_base.blendFixA, gstate.getFixA());
|
||||||
Uint8x3ToFloat4(ub_base.blendFixB, gstate.getFixB());
|
Uint8x3ToFloat4(ub_base.blendFixB, gstate.getFixB());
|
||||||
@ -270,28 +271,31 @@ void ShaderManagerVulkan::BaseUpdateUniforms(int dirtyUniforms) {
|
|||||||
if (dirtyUniforms & DIRTY_TEXMATRIX) {
|
if (dirtyUniforms & DIRTY_TEXMATRIX) {
|
||||||
ConvertMatrix4x3To4x4(ub_base.tex, gstate.tgenMatrix);
|
ConvertMatrix4x3To4x4(ub_base.tex, gstate.tgenMatrix);
|
||||||
}
|
}
|
||||||
if (dirtyUniforms & DIRTY_FOGCOEF) {
|
|
||||||
float fogcoef[2] = {
|
// Combined two small uniforms
|
||||||
|
if (dirtyUniforms & (DIRTY_FOGCOEF | DIRTY_STENCILREPLACEVALUE)) {
|
||||||
|
float fogcoef_stencil[3] = {
|
||||||
getFloat24(gstate.fog1),
|
getFloat24(gstate.fog1),
|
||||||
getFloat24(gstate.fog2),
|
getFloat24(gstate.fog2),
|
||||||
|
(float)gstate.getStencilTestRef()
|
||||||
};
|
};
|
||||||
if (my_isinf(fogcoef[1])) {
|
if (my_isinf(fogcoef_stencil[1])) {
|
||||||
// not really sure what a sensible value might be.
|
// not really sure what a sensible value might be.
|
||||||
fogcoef[1] = fogcoef[1] < 0.0f ? -10000.0f : 10000.0f;
|
fogcoef_stencil[1] = fogcoef_stencil[1] < 0.0f ? -10000.0f : 10000.0f;
|
||||||
} else if (my_isnan(fogcoef[1])) {
|
} else if (my_isnan(fogcoef_stencil[1])) {
|
||||||
// Workaround for https://github.com/hrydgard/ppsspp/issues/5384#issuecomment-38365988
|
// Workaround for https://github.com/hrydgard/ppsspp/issues/5384#issuecomment-38365988
|
||||||
// Just put the fog far away at a large finite distance.
|
// Just put the fog far away at a large finite distance.
|
||||||
// Infinities and NaNs are rather unpredictable in shaders on many GPUs
|
// Infinities and NaNs are rather unpredictable in shaders on many GPUs
|
||||||
// so it's best to just make it a sane calculation.
|
// so it's best to just make it a sane calculation.
|
||||||
fogcoef[0] = 100000.0f;
|
fogcoef_stencil[0] = 100000.0f;
|
||||||
fogcoef[1] = 1.0f;
|
fogcoef_stencil[1] = 1.0f;
|
||||||
}
|
}
|
||||||
#ifndef MOBILE_DEVICE
|
#ifndef MOBILE_DEVICE
|
||||||
else if (my_isnanorinf(fogcoef[1]) || my_isnanorinf(fogcoef[0])) {
|
else if (my_isnanorinf(fogcoef_stencil[1]) || my_isnanorinf(fogcoef_stencil[0])) {
|
||||||
ERROR_LOG_REPORT_ONCE(fognan, G3D, "Unhandled fog NaN/INF combo: %f %f", fogcoef[0], fogcoef[1]);
|
ERROR_LOG_REPORT_ONCE(fognan, G3D, "Unhandled fog NaN/INF combo: %f %f", fogcoef_stencil[0], fogcoef_stencil[1]);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
CopyFloat2(ub_base.fogCoef, fogcoef);
|
CopyFloat3(ub_base.fogCoef_stencil, fogcoef_stencil);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Texturing
|
// Texturing
|
||||||
|
@ -82,6 +82,8 @@ enum {
|
|||||||
};
|
};
|
||||||
|
|
||||||
// TODO: Split into two structs, one for software transform and one for hardware transform, to save space.
|
// TODO: Split into two structs, one for software transform and one for hardware transform, to save space.
|
||||||
|
// This is just a bit too big to fit in 512 bytes...
|
||||||
|
// 512 bytes. Probably can't get to 256 (nVidia's UBO alignment).
|
||||||
struct UB_VS_FS_Base {
|
struct UB_VS_FS_Base {
|
||||||
float proj[16];
|
float proj[16];
|
||||||
float proj_through[16];
|
float proj_through[16];
|
||||||
@ -90,14 +92,13 @@ struct UB_VS_FS_Base {
|
|||||||
float tex[16]; // not that common, may want to break out
|
float tex[16]; // not that common, may want to break out
|
||||||
float uvScaleOffset[4];
|
float uvScaleOffset[4];
|
||||||
float depthRange[4];
|
float depthRange[4];
|
||||||
float fogCoef[4];
|
float fogCoef_stencil[4];
|
||||||
float matAmbient[4];
|
float matAmbient[4];
|
||||||
// Fragment data
|
// Fragment data
|
||||||
float fogColor[4];
|
float fogColor[4];
|
||||||
float texEnvColor[4];
|
float texEnvColor[4];
|
||||||
int alphaColorRef[4];
|
int alphaColorRef[4];
|
||||||
int colorTestMask[4];
|
int colorTestMask[4];
|
||||||
float stencilReplace[4]; // only first float used
|
|
||||||
float blendFixA[4];
|
float blendFixA[4];
|
||||||
float blendFixB[4];
|
float blendFixB[4];
|
||||||
float texClamp[4];
|
float texClamp[4];
|
||||||
@ -112,19 +113,19 @@ R"( mat4 proj_mtx;
|
|||||||
mat4 tex_mtx;
|
mat4 tex_mtx;
|
||||||
vec4 uvscaleoffset;
|
vec4 uvscaleoffset;
|
||||||
vec4 depthRange;
|
vec4 depthRange;
|
||||||
vec2 fogcoef;
|
vec3 fogcoef_stencilreplace;
|
||||||
vec4 matambientalpha;
|
vec4 matambientalpha;
|
||||||
vec3 fogcolor;
|
vec3 fogcolor;
|
||||||
vec3 texenv;
|
vec3 texenv;
|
||||||
ivec4 alphacolorref;
|
ivec4 alphacolorref;
|
||||||
ivec4 alphacolormask;
|
ivec4 alphacolormask;
|
||||||
float stencilReplaceValue;
|
|
||||||
vec3 blendFixA;
|
vec3 blendFixA;
|
||||||
vec3 blendFixB;
|
vec3 blendFixB;
|
||||||
vec4 texclamp;
|
vec4 texclamp;
|
||||||
vec2 texclampoff;
|
vec2 texclampoff;
|
||||||
)";
|
)";
|
||||||
|
|
||||||
|
// 576 bytes. Can we get down to 512?
|
||||||
struct UB_VS_Lights {
|
struct UB_VS_Lights {
|
||||||
float ambientColor[4];
|
float ambientColor[4];
|
||||||
float materialDiffuse[4];
|
float materialDiffuse[4];
|
||||||
|
@ -505,7 +505,7 @@ bool GenerateVulkanGLSLVertexShader(const ShaderID &id, char *buffer, bool *uses
|
|||||||
|
|
||||||
// Compute fogdepth
|
// Compute fogdepth
|
||||||
if (enableFog)
|
if (enableFog)
|
||||||
WRITE(p, " v_fogdepth = (viewPos.z + base.fogcoef.x) * base.fogcoef.y;\n");
|
WRITE(p, " v_fogdepth = (viewPos.z + base.fogcoef_stencilreplace.x) * base.fogcoef_stencilreplace.y;\n");
|
||||||
}
|
}
|
||||||
WRITE(p, "}\n");
|
WRITE(p, "}\n");
|
||||||
return true;
|
return true;
|
||||||
|
Loading…
Reference in New Issue
Block a user