Combined two uniforms to get the base UBO down to 512b, in order to not waste space (nVidia needs 256-byte alignment)

This commit is contained in:
Henrik Rydgard 2016-03-20 20:53:46 +01:00
parent c33c3cf3d4
commit 827481d41d
4 changed files with 24 additions and 19 deletions

View File

@ -398,7 +398,7 @@ bool GenerateVulkanGLSLFragmentShader(const ShaderID &id, char *buffer) {
if (stencilToAlpha != REPLACE_ALPHA_NO) {
switch (replaceAlphaWithStencilType) {
case STENCIL_VALUE_UNIFORM:
replacedAlpha = "base.stencilReplaceValue";
replacedAlpha = "base.fogcoef_stencilreplace.z";
break;
case STENCIL_VALUE_ZERO:

View File

@ -169,6 +169,10 @@ ShaderManagerVulkan::ShaderManagerVulkan(VulkanContext *vulkan)
memset(&ub_base, 0, sizeof(ub_base));
memset(&ub_lights, 0, sizeof(ub_lights));
memset(&ub_bones, 0, sizeof(ub_bones));
ILOG("sizeof(ub_base): %d", (int)sizeof(ub_base));
ILOG("sizeof(ub_lights): %d", (int)sizeof(ub_lights));
ILOG("sizeof(ub_bones): %d", (int)sizeof(ub_bones));
}
ShaderManagerVulkan::~ShaderManagerVulkan() {
@ -202,9 +206,6 @@ void ShaderManagerVulkan::BaseUpdateUniforms(int dirtyUniforms) {
if (dirtyUniforms & DIRTY_FOGCOLOR) {
Uint8x3ToFloat4(ub_base.fogColor, gstate.fogcolor);
}
if (dirtyUniforms & DIRTY_STENCILREPLACEVALUE) {
Uint8x1ToFloat4(ub_base.stencilReplace, gstate.getStencilTestRef());
}
if (dirtyUniforms & DIRTY_SHADERBLEND) {
Uint8x3ToFloat4(ub_base.blendFixA, gstate.getFixA());
Uint8x3ToFloat4(ub_base.blendFixB, gstate.getFixB());
@ -270,28 +271,31 @@ void ShaderManagerVulkan::BaseUpdateUniforms(int dirtyUniforms) {
if (dirtyUniforms & DIRTY_TEXMATRIX) {
ConvertMatrix4x3To4x4(ub_base.tex, gstate.tgenMatrix);
}
if (dirtyUniforms & DIRTY_FOGCOEF) {
float fogcoef[2] = {
// Combined two small uniforms
if (dirtyUniforms & (DIRTY_FOGCOEF | DIRTY_STENCILREPLACEVALUE)) {
float fogcoef_stencil[3] = {
getFloat24(gstate.fog1),
getFloat24(gstate.fog2),
(float)gstate.getStencilTestRef()
};
if (my_isinf(fogcoef[1])) {
if (my_isinf(fogcoef_stencil[1])) {
// not really sure what a sensible value might be.
fogcoef[1] = fogcoef[1] < 0.0f ? -10000.0f : 10000.0f;
} else if (my_isnan(fogcoef[1])) {
fogcoef_stencil[1] = fogcoef_stencil[1] < 0.0f ? -10000.0f : 10000.0f;
} else if (my_isnan(fogcoef_stencil[1])) {
// Workaround for https://github.com/hrydgard/ppsspp/issues/5384#issuecomment-38365988
// Just put the fog far away at a large finite distance.
// Infinities and NaNs are rather unpredictable in shaders on many GPUs
// so it's best to just make it a sane calculation.
fogcoef[0] = 100000.0f;
fogcoef[1] = 1.0f;
fogcoef_stencil[0] = 100000.0f;
fogcoef_stencil[1] = 1.0f;
}
#ifndef MOBILE_DEVICE
else if (my_isnanorinf(fogcoef[1]) || my_isnanorinf(fogcoef[0])) {
ERROR_LOG_REPORT_ONCE(fognan, G3D, "Unhandled fog NaN/INF combo: %f %f", fogcoef[0], fogcoef[1]);
else if (my_isnanorinf(fogcoef_stencil[1]) || my_isnanorinf(fogcoef_stencil[0])) {
ERROR_LOG_REPORT_ONCE(fognan, G3D, "Unhandled fog NaN/INF combo: %f %f", fogcoef_stencil[0], fogcoef_stencil[1]);
}
#endif
CopyFloat2(ub_base.fogCoef, fogcoef);
CopyFloat3(ub_base.fogCoef_stencil, fogcoef_stencil);
}
// Texturing

View File

@ -82,6 +82,8 @@ enum {
};
// TODO: Split into two structs, one for software transform and one for hardware transform, to save space.
// This is just a bit too big to fit in 512 bytes...
// 512 bytes. Probably can't get to 256 (nVidia's UBO alignment).
struct UB_VS_FS_Base {
float proj[16];
float proj_through[16];
@ -90,14 +92,13 @@ struct UB_VS_FS_Base {
float tex[16]; // not that common, may want to break out
float uvScaleOffset[4];
float depthRange[4];
float fogCoef[4];
float fogCoef_stencil[4];
float matAmbient[4];
// Fragment data
float fogColor[4];
float texEnvColor[4];
int alphaColorRef[4];
int colorTestMask[4];
float stencilReplace[4]; // only first float used
float blendFixA[4];
float blendFixB[4];
float texClamp[4];
@ -112,19 +113,19 @@ R"( mat4 proj_mtx;
mat4 tex_mtx;
vec4 uvscaleoffset;
vec4 depthRange;
vec2 fogcoef;
vec3 fogcoef_stencilreplace;
vec4 matambientalpha;
vec3 fogcolor;
vec3 texenv;
ivec4 alphacolorref;
ivec4 alphacolormask;
float stencilReplaceValue;
vec3 blendFixA;
vec3 blendFixB;
vec4 texclamp;
vec2 texclampoff;
)";
// 576 bytes. Can we get down to 512?
struct UB_VS_Lights {
float ambientColor[4];
float materialDiffuse[4];

View File

@ -505,7 +505,7 @@ bool GenerateVulkanGLSLVertexShader(const ShaderID &id, char *buffer, bool *uses
// Compute fogdepth
if (enableFog)
WRITE(p, " v_fogdepth = (viewPos.z + base.fogcoef.x) * base.fogcoef.y;\n");
WRITE(p, " v_fogdepth = (viewPos.z + base.fogcoef_stencilreplace.x) * base.fogcoef_stencilreplace.y;\n");
}
WRITE(p, "}\n");
return true;