From 03f2c9648dba5b7919d0c34e8381a02f84851695 Mon Sep 17 00:00:00 2001 From: Scott Mansell Date: Fri, 15 Jan 2016 17:51:54 +1300 Subject: [PATCH] Shader UID change: Only store the two bits of components we need. This frees up 21 bits and allows us to shorten the UID struct by an entire 32 bits. It's not strictly needed (as it's encoded into the length) but I added a bit for per-pixel lighiting to make my life easier in the following commits. --- Source/Core/VideoCommon/NativeVertexFormat.h | 1 + Source/Core/VideoCommon/PixelShaderGen.cpp | 10 +++++++--- Source/Core/VideoCommon/PixelShaderGen.h | 11 ++++------- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/Source/Core/VideoCommon/NativeVertexFormat.h b/Source/Core/VideoCommon/NativeVertexFormat.h index b5d40bd54e..55f8a57483 100644 --- a/Source/Core/VideoCommon/NativeVertexFormat.h +++ b/Source/Core/VideoCommon/NativeVertexFormat.h @@ -31,6 +31,7 @@ enum VB_HAS_NRM2 = (1 << 12), VB_HAS_NRMALL = (7 << 10), + VB_COL_SHIFT = 13, VB_HAS_COL0 = (1 << 13), VB_HAS_COL1 = (1 << 14), diff --git a/Source/Core/VideoCommon/PixelShaderGen.cpp b/Source/Core/VideoCommon/PixelShaderGen.cpp index 052bae7427..80e84a59f0 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.cpp +++ b/Source/Core/VideoCommon/PixelShaderGen.cpp @@ -169,7 +169,6 @@ template static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) { T out; - const u32 components = VertexLoaderManager::g_current_components; // Non-uid template parameters will write to the dummy data (=> gets optimized out) pixel_shader_uid_data dummy_data; pixel_shader_uid_data* uid_data = out.template GetUidData(); @@ -458,13 +457,18 @@ static T GeneratePixelShader(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType) "\tfloat3 ldir, h, cosAttn, distAttn;\n" "\tfloat dist, dist2, attn;\n"); + // The lighting shader only needs the two color bits of the 23bit component bit array. + uid_data->components = + (VertexLoaderManager::g_current_components & (VB_HAS_COL0 | VB_HAS_COL1)) >> VB_COL_SHIFT; + ; + // TODO: Our current constant usage code isn't able to handle more than one buffer. // So we can't mark the VS constant as used here. But keep them here as reference. // out.SetConstantsUsed(C_PLIGHT_COLORS, C_PLIGHT_COLORS+7); // TODO: Can be optimized further // out.SetConstantsUsed(C_PLIGHTS, C_PLIGHTS+31); // TODO: Can be optimized further // out.SetConstantsUsed(C_PMATERIALS, C_PMATERIALS+3); - uid_data->components = components; - GenerateLightingShader(out, uid_data->lighting, components, "colors_", "col"); + GenerateLightingShader(out, uid_data->lighting, uid_data->components << VB_COL_SHIFT, + "colors_", "col"); } // HACK to handle cases where the tex gen is not enabled diff --git a/Source/Core/VideoCommon/PixelShaderGen.h b/Source/Core/VideoCommon/PixelShaderGen.h index 8a62b6bb04..9f019202fe 100644 --- a/Source/Core/VideoCommon/PixelShaderGen.h +++ b/Source/Core/VideoCommon/PixelShaderGen.h @@ -24,12 +24,11 @@ struct pixel_shader_uid_data u32 num_values; // TODO: Shouldn't be a u32 u32 NumValues() const { return num_values; } - u32 components : 23; + u32 components : 2; u32 dstAlphaMode : 2; u32 Pretest : 2; u32 nIndirectStagesUsed : 4; u32 stereo : 1; - u32 genMode_numtexgens : 4; u32 genMode_numtevstages : 4; u32 genMode_numindstages : 3; @@ -38,20 +37,20 @@ struct pixel_shader_uid_data u32 alpha_test_logic : 2; u32 alpha_test_use_zcomploc_hack : 1; u32 fog_proj : 1; + u32 fog_fsel : 3; u32 fog_RangeBaseEnabled : 1; u32 ztex_op : 2; u32 fast_depth_calc : 1; u32 per_pixel_depth : 1; + u32 per_pixel_lighting : 1; u32 forced_early_z : 1; u32 early_ztest : 1; u32 bounding_box : 1; - - // TODO: 29 bits of padding is a waste. Can we free up some bits elseware? u32 zfreeze : 1; u32 msaa : 1; u32 ssaa : 1; - u32 pad : 29; + u32 pad : 17; u32 texMtxInfo_n_projection : 8; // 8x1 bit u32 tevindref_bi0 : 3; @@ -136,8 +135,6 @@ struct pixel_shader_uid_data u32 pad3 : 14; } stagehash[16]; - // TODO: I think we're fine without an enablePixelLighting field, should probably double check, - // though.. LightingUidData lighting; }; #pragma pack()