Combined two uniforms to get the base UBO down to 512b, in order to not waste space (nVidia needs 256-byte alignment)

This commit is contained in:
Henrik Rydgard 2016-03-20 20:53:46 +01:00
parent c33c3cf3d4
commit 827481d41d
4 changed files with 24 additions and 19 deletions

View File

@ -398,7 +398,7 @@ bool GenerateVulkanGLSLFragmentShader(const ShaderID &id, char *buffer) {
if (stencilToAlpha != REPLACE_ALPHA_NO) { if (stencilToAlpha != REPLACE_ALPHA_NO) {
switch (replaceAlphaWithStencilType) { switch (replaceAlphaWithStencilType) {
case STENCIL_VALUE_UNIFORM: case STENCIL_VALUE_UNIFORM:
replacedAlpha = "base.stencilReplaceValue"; replacedAlpha = "base.fogcoef_stencilreplace.z";
break; break;
case STENCIL_VALUE_ZERO: case STENCIL_VALUE_ZERO:

View File

@ -169,6 +169,10 @@ ShaderManagerVulkan::ShaderManagerVulkan(VulkanContext *vulkan)
memset(&ub_base, 0, sizeof(ub_base)); memset(&ub_base, 0, sizeof(ub_base));
memset(&ub_lights, 0, sizeof(ub_lights)); memset(&ub_lights, 0, sizeof(ub_lights));
memset(&ub_bones, 0, sizeof(ub_bones)); memset(&ub_bones, 0, sizeof(ub_bones));
ILOG("sizeof(ub_base): %d", (int)sizeof(ub_base));
ILOG("sizeof(ub_lights): %d", (int)sizeof(ub_lights));
ILOG("sizeof(ub_bones): %d", (int)sizeof(ub_bones));
} }
ShaderManagerVulkan::~ShaderManagerVulkan() { ShaderManagerVulkan::~ShaderManagerVulkan() {
@ -202,9 +206,6 @@ void ShaderManagerVulkan::BaseUpdateUniforms(int dirtyUniforms) {
if (dirtyUniforms & DIRTY_FOGCOLOR) { if (dirtyUniforms & DIRTY_FOGCOLOR) {
Uint8x3ToFloat4(ub_base.fogColor, gstate.fogcolor); Uint8x3ToFloat4(ub_base.fogColor, gstate.fogcolor);
} }
if (dirtyUniforms & DIRTY_STENCILREPLACEVALUE) {
Uint8x1ToFloat4(ub_base.stencilReplace, gstate.getStencilTestRef());
}
if (dirtyUniforms & DIRTY_SHADERBLEND) { if (dirtyUniforms & DIRTY_SHADERBLEND) {
Uint8x3ToFloat4(ub_base.blendFixA, gstate.getFixA()); Uint8x3ToFloat4(ub_base.blendFixA, gstate.getFixA());
Uint8x3ToFloat4(ub_base.blendFixB, gstate.getFixB()); Uint8x3ToFloat4(ub_base.blendFixB, gstate.getFixB());
@ -270,28 +271,31 @@ void ShaderManagerVulkan::BaseUpdateUniforms(int dirtyUniforms) {
if (dirtyUniforms & DIRTY_TEXMATRIX) { if (dirtyUniforms & DIRTY_TEXMATRIX) {
ConvertMatrix4x3To4x4(ub_base.tex, gstate.tgenMatrix); ConvertMatrix4x3To4x4(ub_base.tex, gstate.tgenMatrix);
} }
if (dirtyUniforms & DIRTY_FOGCOEF) {
float fogcoef[2] = { // Combined two small uniforms
if (dirtyUniforms & (DIRTY_FOGCOEF | DIRTY_STENCILREPLACEVALUE)) {
float fogcoef_stencil[3] = {
getFloat24(gstate.fog1), getFloat24(gstate.fog1),
getFloat24(gstate.fog2), getFloat24(gstate.fog2),
(float)gstate.getStencilTestRef()
}; };
if (my_isinf(fogcoef[1])) { if (my_isinf(fogcoef_stencil[1])) {
// not really sure what a sensible value might be. // not really sure what a sensible value might be.
fogcoef[1] = fogcoef[1] < 0.0f ? -10000.0f : 10000.0f; fogcoef_stencil[1] = fogcoef_stencil[1] < 0.0f ? -10000.0f : 10000.0f;
} else if (my_isnan(fogcoef[1])) { } else if (my_isnan(fogcoef_stencil[1])) {
// Workaround for https://github.com/hrydgard/ppsspp/issues/5384#issuecomment-38365988 // Workaround for https://github.com/hrydgard/ppsspp/issues/5384#issuecomment-38365988
// Just put the fog far away at a large finite distance. // Just put the fog far away at a large finite distance.
// Infinities and NaNs are rather unpredictable in shaders on many GPUs // Infinities and NaNs are rather unpredictable in shaders on many GPUs
// so it's best to just make it a sane calculation. // so it's best to just make it a sane calculation.
fogcoef[0] = 100000.0f; fogcoef_stencil[0] = 100000.0f;
fogcoef[1] = 1.0f; fogcoef_stencil[1] = 1.0f;
} }
#ifndef MOBILE_DEVICE #ifndef MOBILE_DEVICE
else if (my_isnanorinf(fogcoef[1]) || my_isnanorinf(fogcoef[0])) { else if (my_isnanorinf(fogcoef_stencil[1]) || my_isnanorinf(fogcoef_stencil[0])) {
ERROR_LOG_REPORT_ONCE(fognan, G3D, "Unhandled fog NaN/INF combo: %f %f", fogcoef[0], fogcoef[1]); ERROR_LOG_REPORT_ONCE(fognan, G3D, "Unhandled fog NaN/INF combo: %f %f", fogcoef_stencil[0], fogcoef_stencil[1]);
} }
#endif #endif
CopyFloat2(ub_base.fogCoef, fogcoef); CopyFloat3(ub_base.fogCoef_stencil, fogcoef_stencil);
} }
// Texturing // Texturing

View File

@ -82,6 +82,8 @@ enum {
}; };
// TODO: Split into two structs, one for software transform and one for hardware transform, to save space. // TODO: Split into two structs, one for software transform and one for hardware transform, to save space.
// This is just a bit too big to fit in 512 bytes...
// 512 bytes. Probably can't get to 256 (nVidia's UBO alignment).
struct UB_VS_FS_Base { struct UB_VS_FS_Base {
float proj[16]; float proj[16];
float proj_through[16]; float proj_through[16];
@ -90,14 +92,13 @@ struct UB_VS_FS_Base {
float tex[16]; // not that common, may want to break out float tex[16]; // not that common, may want to break out
float uvScaleOffset[4]; float uvScaleOffset[4];
float depthRange[4]; float depthRange[4];
float fogCoef[4]; float fogCoef_stencil[4];
float matAmbient[4]; float matAmbient[4];
// Fragment data // Fragment data
float fogColor[4]; float fogColor[4];
float texEnvColor[4]; float texEnvColor[4];
int alphaColorRef[4]; int alphaColorRef[4];
int colorTestMask[4]; int colorTestMask[4];
float stencilReplace[4]; // only first float used
float blendFixA[4]; float blendFixA[4];
float blendFixB[4]; float blendFixB[4];
float texClamp[4]; float texClamp[4];
@ -112,19 +113,19 @@ R"( mat4 proj_mtx;
mat4 tex_mtx; mat4 tex_mtx;
vec4 uvscaleoffset; vec4 uvscaleoffset;
vec4 depthRange; vec4 depthRange;
vec2 fogcoef; vec3 fogcoef_stencilreplace;
vec4 matambientalpha; vec4 matambientalpha;
vec3 fogcolor; vec3 fogcolor;
vec3 texenv; vec3 texenv;
ivec4 alphacolorref; ivec4 alphacolorref;
ivec4 alphacolormask; ivec4 alphacolormask;
float stencilReplaceValue;
vec3 blendFixA; vec3 blendFixA;
vec3 blendFixB; vec3 blendFixB;
vec4 texclamp; vec4 texclamp;
vec2 texclampoff; vec2 texclampoff;
)"; )";
// 576 bytes. Can we get down to 512?
struct UB_VS_Lights { struct UB_VS_Lights {
float ambientColor[4]; float ambientColor[4];
float materialDiffuse[4]; float materialDiffuse[4];

View File

@ -505,7 +505,7 @@ bool GenerateVulkanGLSLVertexShader(const ShaderID &id, char *buffer, bool *uses
// Compute fogdepth // Compute fogdepth
if (enableFog) if (enableFog)
WRITE(p, " v_fogdepth = (viewPos.z + base.fogcoef.x) * base.fogcoef.y;\n"); WRITE(p, " v_fogdepth = (viewPos.z + base.fogcoef_stencilreplace.x) * base.fogcoef_stencilreplace.y;\n");
} }
WRITE(p, "}\n"); WRITE(p, "}\n");
return true; return true;