Merge branch 'shader-uids-awesome'.

Replaces the old, hardcoded shader ID generator with a semi-automatic mechanism that generates IDs from hints in the code generator.

Also introduces a flexible framework to do all kinds of funky stuff with the shader code generation logic. As an example, a uniform usage profile generation class is added (unused for now, though).

Functionality can still be tested by setting the EnableShaderDebugging field in the gfx config to True. Any two shaders which are identified with the same ID will be written to a file and an error message will be written to the Dolphin log.
This commit is contained in:
NeoBrainX 2013-06-17 13:27:22 +02:00
commit 88bc8255b8
28 changed files with 1630 additions and 1401 deletions

View File

@ -144,6 +144,11 @@
#define TEVALPHAARG_KONST 6
#define TEVALPHAARG_ZERO 7
#define GX_TEVPREV 0
#define GX_TEVREG0 1
#define GX_TEVREG1 2
#define GX_TEVREG2 3
#define ALPHACMP_NEVER 0
#define ALPHACMP_LESS 1
#define ALPHACMP_EQUAL 2
@ -371,7 +376,7 @@ struct TevStageCombiner
union TwoTevStageOrders
{
struct
struct
{
u32 texmap0 : 3; // indirect tex stage texmap
u32 texcoord0 : 3;

View File

@ -89,21 +89,21 @@ void GFXDebuggerBase::DumpPixelShader(const char* path)
if (!useDstAlpha)
{
output = "Destination alpha disabled:\n";
output += GeneratePixelShaderCode(DSTALPHA_NONE, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components);
/// output += GeneratePixelShaderCode(DSTALPHA_NONE, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components);
}
else
{
if(g_ActiveConfig.backend_info.bSupportsDualSourceBlend)
{
output = "Using dual source blending for destination alpha:\n";
output += GeneratePixelShaderCode(DSTALPHA_DUAL_SOURCE_BLEND, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components);
/// output += GeneratePixelShaderCode(DSTALPHA_DUAL_SOURCE_BLEND, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components);
}
else
{
output = "Using two passes for emulating destination alpha:\n";
output += GeneratePixelShaderCode(DSTALPHA_NONE, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components);
/// output += GeneratePixelShaderCode(DSTALPHA_NONE, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components);
output += "\n\nDestination alpha pass shader:\n";
output += GeneratePixelShaderCode(DSTALPHA_ALPHA_PASS, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components);
/// output += GeneratePixelShaderCode(DSTALPHA_ALPHA_PASS, g_ActiveConfig.backend_info.APIType, g_nativeVertexFmt->m_components);
}
}
@ -117,7 +117,7 @@ void GFXDebuggerBase::DumpVertexShader(const char* path)
sprintf(filename, "%sdump_vs.txt", path);
File::CreateEmptyFile(filename);
File::WriteStringToFile(true, GenerateVertexShaderCode(g_nativeVertexFmt->m_components, g_ActiveConfig.backend_info.APIType), filename);
/// File::WriteStringToFile(true, GenerateVertexShaderCode(g_nativeVertexFmt->m_components, g_ActiveConfig.backend_info.APIType), filename);
}
void GFXDebuggerBase::DumpPixelShaderConstants(const char* path)

View File

@ -5,217 +5,3 @@
#include "LightingShaderGen.h"
#include "NativeVertexFormat.h"
#include "XFMemory.h"
#define WRITE p+=sprintf
int GetLightingShaderId(u32* out)
{
for (u32 i = 0; i < xfregs.numChan.numColorChans; ++i)
{
out[i] = xfregs.color[i].enablelighting ?
(u32)xfregs.color[i].hex :
(u32)xfregs.color[i].matsource;
out[i] |= (xfregs.alpha[i].enablelighting ?
(u32)xfregs.alpha[i].hex :
(u32)xfregs.alpha[i].matsource) << 15;
}
_assert_(xfregs.numChan.numColorChans <= 2);
return xfregs.numChan.numColorChans;
}
// coloralpha - 1 if color, 2 if alpha
char *GenerateLightShader(char *p, int index, const LitChannel& chan, const char* lightsName, int coloralpha)
{
const char* swizzle = "xyzw";
if (coloralpha == 1 )
swizzle = "xyz";
else if (coloralpha == 2 )
swizzle = "w";
if (!(chan.attnfunc & 1))
{
// attenuation disabled
switch (chan.diffusefunc)
{
case LIGHTDIF_NONE:
WRITE(p, "lacc.%s += %s[%d].%s;\n", swizzle, lightsName, index * 5, swizzle);
break;
case LIGHTDIF_SIGN:
case LIGHTDIF_CLAMP:
WRITE(p, "ldir = normalize(%s[%d + 3].xyz - pos.xyz);\n", lightsName, index * 5);
WRITE(p, "lacc.%s += %sdot(ldir, _norm0)) * %s[%d].%s;\n",
swizzle, chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0f," :"(", lightsName, index * 5, swizzle);
break;
default: _assert_(0);
}
}
else // spec and spot
{
if (chan.attnfunc == 3)
{ // spot
WRITE(p, "ldir = %s[%d + 3].xyz - pos.xyz;\n", lightsName, index * 5);
WRITE(p, "dist2 = dot(ldir, ldir);\n"
"dist = sqrt(dist2);\n"
"ldir = ldir / dist;\n"
"attn = max(0.0f, dot(ldir, %s[%d + 4].xyz));\n", lightsName, index * 5);
WRITE(p, "attn = max(0.0f, dot(%s[%d + 1].xyz, float3(1.0f, attn, attn*attn))) / dot(%s[%d + 2].xyz, float3(1.0f,dist,dist2));\n", lightsName, index * 5, lightsName, index * 5);
}
else if (chan.attnfunc == 1)
{ // specular
WRITE(p, "ldir = normalize(%s[%d + 3].xyz);\n", lightsName, index * 5);
WRITE(p, "attn = (dot(_norm0,ldir) >= 0.0f) ? max(0.0f, dot(_norm0, %s[%d + 4].xyz)) : 0.0f;\n", lightsName, index * 5);
WRITE(p, "attn = max(0.0f, dot(%s[%d + 1].xyz, float3(1,attn,attn*attn))) / dot(%s[%d + 2].xyz, float3(1,attn,attn*attn));\n", lightsName, index * 5, lightsName, index * 5);
}
switch (chan.diffusefunc)
{
case LIGHTDIF_NONE:
WRITE(p, "lacc.%s += attn * %s[%d].%s;\n", swizzle, lightsName, index * 5, swizzle);
break;
case LIGHTDIF_SIGN:
case LIGHTDIF_CLAMP:
WRITE(p, "lacc.%s += attn * %sdot(ldir, _norm0)) * %s[%d].%s;\n",
swizzle,
chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0f," :"(",
lightsName,
index * 5,
swizzle);
break;
default: _assert_(0);
}
}
WRITE(p, "\n");
return p;
}
// vertex shader
// lights/colors
// materials name is I_MATERIALS in vs and I_PMATERIALS in ps
// inColorName is color in vs and colors_ in ps
// dest is o.colors_ in vs and colors_ in ps
char *GenerateLightingShader(char *p, int components, const char* materialsName, const char* lightsName, const char* inColorName, const char* dest)
{
for (unsigned int j = 0; j < xfregs.numChan.numColorChans; j++)
{
const LitChannel& color = xfregs.color[j];
const LitChannel& alpha = xfregs.alpha[j];
WRITE(p, "{\n");
if (color.matsource) // from vertex
{
if (components & (VB_HAS_COL0 << j))
WRITE(p, "mat = %s%d;\n", inColorName, j);
else if (components & VB_HAS_COL0)
WRITE(p, "mat = %s0;\n", inColorName);
else
WRITE(p, "mat = float4(1.0f, 1.0f, 1.0f, 1.0f);\n");
}
else // from color
{
WRITE(p, "mat = %s[%d];\n", materialsName, j+2);
}
if (color.enablelighting)
{
if (color.ambsource) // from vertex
{
if (components & (VB_HAS_COL0<<j) )
WRITE(p, "lacc = %s%d;\n", inColorName, j);
else if (components & VB_HAS_COL0 )
WRITE(p, "lacc = %s0;\n", inColorName);
else
WRITE(p, "lacc = float4(0.0f, 0.0f, 0.0f, 0.0f);\n");
}
else // from color
{
WRITE(p, "lacc = %s[%d];\n", materialsName, j);
}
}
else
{
WRITE(p, "lacc = float4(1.0f, 1.0f, 1.0f, 1.0f);\n");
}
// check if alpha is different
if (alpha.matsource != color.matsource)
{
if (alpha.matsource) // from vertex
{
if (components & (VB_HAS_COL0<<j))
WRITE(p, "mat.w = %s%d.w;\n", inColorName, j);
else if (components & VB_HAS_COL0)
WRITE(p, "mat.w = %s0.w;\n", inColorName);
else WRITE(p, "mat.w = 1.0f;\n");
}
else // from color
{
WRITE(p, "mat.w = %s[%d].w;\n", materialsName, j+2);
}
}
if (alpha.enablelighting)
{
if (alpha.ambsource) // from vertex
{
if (components & (VB_HAS_COL0<<j) )
WRITE(p, "lacc.w = %s%d.w;\n", inColorName, j);
else if (components & VB_HAS_COL0 )
WRITE(p, "lacc.w = %s0.w;\n", inColorName);
else
WRITE(p, "lacc.w = 0.0f;\n");
}
else // from color
{
WRITE(p, "lacc.w = %s[%d].w;\n", materialsName, j);
}
}
else
{
WRITE(p, "lacc.w = 1.0f;\n");
}
if(color.enablelighting && alpha.enablelighting)
{
// both have lighting, test if they use the same lights
int mask = 0;
if(color.lightparams == alpha.lightparams)
{
mask = color.GetFullLightMask() & alpha.GetFullLightMask();
if(mask)
{
for (int i = 0; i < 8; ++i)
{
if (mask & (1<<i))
p = GenerateLightShader(p, i, color, lightsName, 3);
}
}
}
// no shared lights
for (int i = 0; i < 8; ++i)
{
if (!(mask&(1<<i)) && (color.GetFullLightMask() & (1<<i)))
p = GenerateLightShader(p, i, color, lightsName, 1);
if (!(mask&(1<<i)) && (alpha.GetFullLightMask() & (1<<i)))
p = GenerateLightShader(p, i, alpha, lightsName, 2);
}
}
else if (color.enablelighting || alpha.enablelighting)
{
// lights are disabled on one channel so process only the active ones
const LitChannel& workingchannel = color.enablelighting ? color : alpha;
int coloralpha = color.enablelighting ? 1 : 2;
for (int i = 0; i < 8; ++i)
{
if (workingchannel.GetFullLightMask() & (1<<i))
p = GenerateLightShader(p, i, workingchannel, lightsName, coloralpha);
}
}
WRITE(p, "%s%d = mat * clamp(lacc, 0.0, 1.0);\n", dest, j);
WRITE(p, "}\n");
}
return p;
}

View File

@ -5,9 +5,255 @@
#ifndef _LIGHTINGSHADERGEN_H_
#define _LIGHTINGSHADERGEN_H_
#include "CommonTypes.h"
#include "ShaderGenCommon.h"
#include "NativeVertexFormat.h"
#include "XFMemory.h"
int GetLightingShaderId(u32* out);
char *GenerateLightingShader(char *p, int components, const char* materialsName, const char* lightsName, const char* inColorName, const char* dest);
static const char* LightCol(const char* lightsName, unsigned int index, const char* swizzle)
{
static char result[32];
snprintf(result, sizeof(result), "%s[5*%d].%s", lightsName, index, swizzle);
return result;
}
static const char* LightCosAtt(const char* lightsName, unsigned int index)
{
static char result[32];
snprintf(result, sizeof(result), "%s[5*%d+1]", lightsName, index);
return result;
}
static const char* LightDistAtt(const char* lightsName, unsigned int index)
{
static char result[32];
snprintf(result, sizeof(result), "%s[5*%d+2]", lightsName, index);
return result;
}
static const char* LightPos(const char* lightsName, unsigned int index)
{
static char result[32];
snprintf(result, sizeof(result), "%s[5*%d+3]", lightsName, index);
return result;
}
static const char* LightDir(const char* lightsName, unsigned int index)
{
static char result[32];
snprintf(result, sizeof(result), "%s[5*%d+4]", lightsName, index);
return result;
}
template<class T>
static void GenerateLightShader(T& object, LightingUidData& uid_data, int index, int litchan_index, const char* lightsName, int coloralpha)
{
const LitChannel& chan = (litchan_index > 1) ? xfregs.alpha[litchan_index-2] : xfregs.color[litchan_index];
const char* swizzle = "xyzw";
if (coloralpha == 1)
swizzle = "xyz";
else if (coloralpha == 2)
swizzle = "w";
uid_data.attnfunc |= chan.attnfunc << (2*litchan_index);
uid_data.diffusefunc |= chan.diffusefunc << (2*litchan_index);
if (!(chan.attnfunc & 1))
{
// atten disabled
switch (chan.diffusefunc)
{
case LIGHTDIF_NONE:
object.Write("lacc.%s += %s;\n", swizzle, LightCol(lightsName, index, swizzle));
break;
case LIGHTDIF_SIGN:
case LIGHTDIF_CLAMP:
object.Write("ldir = normalize(%s.xyz - pos.xyz);\n", LightPos(lightsName, index));
object.Write("lacc.%s += %sdot(ldir, _norm0)) * %s;\n",
swizzle, chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0f," :"(", LightCol(lightsName, index, swizzle));
break;
default: _assert_(0);
}
}
else // spec and spot
{
if (chan.attnfunc == 3)
{ // spot
object.Write("ldir = %s.xyz - pos.xyz;\n", LightPos(lightsName, index));
object.Write("dist2 = dot(ldir, ldir);\n"
"dist = sqrt(dist2);\n"
"ldir = ldir / dist;\n"
"attn = max(0.0f, dot(ldir, %s.xyz));\n", LightDir(lightsName, index));
object.Write("attn = max(0.0f, dot(%s.xyz, float3(1.0f, attn, attn*attn))) / dot(%s.xyz, float3(1.0f,dist,dist2));\n", LightCosAtt(lightsName, index), LightDistAtt(lightsName, index));
}
else if (chan.attnfunc == 1)
{ // specular
object.Write("ldir = normalize(%s.xyz);\n", LightPos(lightsName, index));
object.Write("attn = (dot(_norm0,ldir) >= 0.0f) ? max(0.0f, dot(_norm0, %s.xyz)) : 0.0f;\n", LightDir(lightsName, index));
object.Write("attn = max(0.0f, dot(%s.xyz, float3(1,attn,attn*attn))) / dot(%s.xyz, float3(1,attn,attn*attn));\n", LightCosAtt(lightsName, index), LightDistAtt(lightsName, index));
}
switch (chan.diffusefunc)
{
case LIGHTDIF_NONE:
object.Write("lacc.%s += attn * %s;\n", swizzle, LightCol(lightsName, index, swizzle));
break;
case LIGHTDIF_SIGN:
case LIGHTDIF_CLAMP:
object.Write("lacc.%s += attn * %sdot(ldir, _norm0)) * %s;\n",
swizzle,
chan.diffusefunc != LIGHTDIF_SIGN ? "max(0.0f," :"(",
LightCol(lightsName, index, swizzle));
break;
default: _assert_(0);
}
}
object.Write("\n");
}
// vertex shader
// lights/colors
// materials name is I_MATERIALS in vs and I_PMATERIALS in ps
// inColorName is color in vs and colors_ in ps
// dest is o.colors_ in vs and colors_ in ps
template<class T>
static void GenerateLightingShader(T& object, LightingUidData& uid_data, int components, const char* materialsName, const char* lightsName, const char* inColorName, const char* dest)
{
for (unsigned int j = 0; j < xfregs.numChan.numColorChans; j++)
{
const LitChannel& color = xfregs.color[j];
const LitChannel& alpha = xfregs.alpha[j];
object.Write("{\n");
uid_data.matsource |= xfregs.color[j].matsource << j;
if (color.matsource) // from vertex
{
if (components & (VB_HAS_COL0 << j))
object.Write("mat = %s%d;\n", inColorName, j);
else if (components & VB_HAS_COL0)
object.Write("mat = %s0;\n", inColorName);
else
object.Write("mat = float4(1.0f, 1.0f, 1.0f, 1.0f);\n");
}
else // from color
{
object.Write("mat = %s[%d];\n", materialsName, j+2);
}
uid_data.enablelighting |= xfregs.color[j].enablelighting << j;
if (color.enablelighting)
{
uid_data.ambsource |= xfregs.color[j].ambsource << j;
if (color.ambsource) // from vertex
{
if (components & (VB_HAS_COL0<<j) )
object.Write("lacc = %s%d;\n", inColorName, j);
else if (components & VB_HAS_COL0 )
object.Write("lacc = %s0;\n", inColorName);
else
object.Write("lacc = float4(0.0f, 0.0f, 0.0f, 0.0f);\n");
}
else // from color
{
object.Write("lacc = %s[%d];\n", materialsName, j);
}
}
else
{
object.Write("lacc = float4(1.0f, 1.0f, 1.0f, 1.0f);\n");
}
// check if alpha is different
uid_data.matsource |= xfregs.alpha[j].matsource << (j+2);
if (alpha.matsource != color.matsource)
{
if (alpha.matsource) // from vertex
{
if (components & (VB_HAS_COL0<<j))
object.Write("mat.w = %s%d.w;\n", inColorName, j);
else if (components & VB_HAS_COL0)
object.Write("mat.w = %s0.w;\n", inColorName);
else object.Write("mat.w = 1.0f;\n");
}
else // from color
{
object.Write("mat.w = %s[%d].w;\n", materialsName, j+2);
}
}
uid_data.enablelighting |= xfregs.alpha[j].enablelighting << (j+2);
if (alpha.enablelighting)
{
uid_data.ambsource |= xfregs.alpha[j].ambsource << (j+2);
if (alpha.ambsource) // from vertex
{
if (components & (VB_HAS_COL0<<j) )
object.Write("lacc.w = %s%d.w;\n", inColorName, j);
else if (components & VB_HAS_COL0 )
object.Write("lacc.w = %s0.w;\n", inColorName);
else
object.Write("lacc.w = 0.0f;\n");
}
else // from color
{
object.Write("lacc.w = %s[%d].w;\n", materialsName, j);
}
}
else
{
object.Write("lacc.w = 1.0f;\n");
}
if(color.enablelighting && alpha.enablelighting)
{
// both have lighting, test if they use the same lights
int mask = 0;
uid_data.attnfunc |= color.attnfunc << (2*j);
uid_data.attnfunc |= alpha.attnfunc << (2*(j+2));
uid_data.diffusefunc |= color.diffusefunc << (2*j);
uid_data.diffusefunc |= alpha.diffusefunc << (2*(j+2));
uid_data.light_mask |= color.GetFullLightMask() << (8*j);
uid_data.light_mask |= alpha.GetFullLightMask() << (8*(j+2));
if(color.lightparams == alpha.lightparams)
{
mask = color.GetFullLightMask() & alpha.GetFullLightMask();
if(mask)
{
for (int i = 0; i < 8; ++i)
{
if (mask & (1<<i))
{
GenerateLightShader<T>(object, uid_data, i, j, lightsName, 3);
}
}
}
}
// no shared lights
for (int i = 0; i < 8; ++i)
{
if (!(mask&(1<<i)) && (color.GetFullLightMask() & (1<<i)))
GenerateLightShader<T>(object, uid_data, i, j, lightsName, 1);
if (!(mask&(1<<i)) && (alpha.GetFullLightMask() & (1<<i)))
GenerateLightShader<T>(object, uid_data, i, j+2, lightsName, 2);
}
}
else if (color.enablelighting || alpha.enablelighting)
{
// lights are disabled on one channel so process only the active ones
const LitChannel& workingchannel = color.enablelighting ? color : alpha;
const int lit_index = color.enablelighting ? j : (j+2);
int coloralpha = color.enablelighting ? 1 : 2;
uid_data.light_mask |= workingchannel.GetFullLightMask() << (8*lit_index);
for (int i = 0; i < 8; ++i)
{
if (workingchannel.GetFullLightMask() & (1<<i))
GenerateLightShader<T>(object, uid_data, i, lit_index, lightsName, coloralpha);
}
}
object.Write("%s%d = mat * clamp(lacc, 0.0, 1.0);\n", dest, j);
object.Write("}\n");
}
}
#endif // _LIGHTINGSHADERGEN_H_

File diff suppressed because it is too large Load Diff

View File

@ -6,6 +6,8 @@
#define GCOGL_PIXELSHADER_H
#include "VideoCommon.h"
#include "ShaderGenCommon.h"
#include "BPMemory.h"
#define I_COLORS "color"
#define I_KCOLORS "k"
@ -31,8 +33,14 @@
#define C_PLIGHTS (C_FOG + 3)
#define C_PMATERIALS (C_PLIGHTS + 40)
#define C_PENVCONST_END (C_PMATERIALS + 4)
#define PIXELSHADERUID_MAX_VALUES 70
#define PIXELSHADERUID_MAX_VALUES_SAFE 116
// Different ways to achieve rendering with destination alpha
enum DSTALPHA_MODE
{
DSTALPHA_NONE, // Render normally, without destination alpha
DSTALPHA_ALPHA_PASS, // Render normally first, then render again for alpha
DSTALPHA_DUAL_SOURCE_BLEND // Use dual-source blending
};
// Annoying sure, can be removed once we get up to GLSL ~1.3
const s_svar PSVar_Loc[] = { {I_COLORS, C_COLORS, 4 },
@ -47,90 +55,135 @@ const s_svar PSVar_Loc[] = { {I_COLORS, C_COLORS, 4 },
{I_PMATERIALS, C_PMATERIALS, 4 },
};
// DO NOT make anything in this class virtual.
template<bool safe>
class _PIXELSHADERUID
// TODO: Should compact packing be enabled?
//#pragma pack(4)
struct pixel_shader_uid_data
{
public:
u32 values[safe ? PIXELSHADERUID_MAX_VALUES_SAFE : PIXELSHADERUID_MAX_VALUES];
int num_values;
// TODO: Optimize field order for easy access!
_PIXELSHADERUID()
u32 components;
u32 dstAlphaMode : 2;
u32 Pretest : 2;
u32 genMode_numtexgens : 4;
u32 genMode_numtevstages : 4;
u32 genMode_numindstages : 3;
u32 nIndirectStagesUsed : 8;
u32 texMtxInfo_n_projection : 8; // 8x1 bit
u32 tevindref_bi0 : 3;
u32 tevindref_bc0 : 3;
u32 tevindref_bi1 : 3;
u32 tevindref_bc1 : 3;
u32 tevindref_bi2 : 3;
u32 tevindref_bc3 : 3;
u32 tevindref_bi4 : 3;
u32 tevindref_bc4 : 3;
inline void SetTevindrefValues(int index, u32 texcoord, u32 texmap)
{
if (index == 0) { tevindref_bc0 = texcoord; tevindref_bi0 = texmap; }
else if (index == 1) { tevindref_bc1 = texcoord; tevindref_bi1 = texmap; }
else if (index == 2) { tevindref_bc3 = texcoord; tevindref_bi2 = texmap; }
else if (index == 3) { tevindref_bc4 = texcoord; tevindref_bi4 = texmap; }
}
inline void SetTevindrefTexmap(int index, u32 texmap)
{
if (index == 0) { tevindref_bi0 = texmap; }
else if (index == 1) { tevindref_bi1 = texmap; }
else if (index == 2) { tevindref_bi2 = texmap; }
else if (index == 3) { tevindref_bi4 = texmap; }
}
_PIXELSHADERUID(const _PIXELSHADERUID& r)
{
num_values = r.num_values;
u64 tevorders_n_texcoord : 48; // 16 x 3 bits
if (safe)
memcpy(values, r.values, PIXELSHADERUID_MAX_VALUES_SAFE);
else
memcpy(values, r.values, r.GetNumValues() * sizeof(values[0]));
u64 tevind_n_sw : 48; // 16 x 3 bits
u64 tevind_n_tw : 48; // 16 x 3 bits
u32 tevind_n_fb_addprev : 16; // 16 x 1 bit
u32 tevind_n_bs : 32; // 16 x 2 bits
u32 tevind_n_fmt : 32; // 16 x 2 bits
u32 tevind_n_bt : 32; // 16 x 2 bits
u64 tevind_n_bias : 48; // 16 x 3 bits
u64 tevind_n_mid : 64; // 16 x 4 bits
// NOTE: These assume that the affected bits are zero before calling
void Set_tevind_sw(int index, u64 val)
{
tevind_n_sw |= val << (3*index);
}
void Set_tevind_tw(int index, u64 val)
{
tevind_n_tw |= val << (3*index);
}
void Set_tevind_bias(int index, u64 val)
{
tevind_n_bias |= val << (3*index);
}
void Set_tevind_mid(int index, u64 val)
{
tevind_n_mid |= val << (4*index);
}
int GetNumValues() const
{
if (safe)
return (sizeof(values) / sizeof(u32));
else
return num_values;
}
u32 tevksel_n_swap1 : 16; // 8x2 bits
u32 tevksel_n_swap2 : 16; // 8x2 bits
u64 tevksel_n_kcsel0 : 40; // 8x5 bits
u64 tevksel_n_kasel0 : 40; // 8x5 bits
u64 tevksel_n_kcsel1 : 40; // 8x5 bits
u64 tevksel_n_kasel1 : 40; // 8x5 bits
void set_tevksel_kcsel(int index, int i, u64 value) { if (i) tevksel_n_kcsel1 |= value << (5*index); else tevksel_n_kcsel0 |= value << (5*index); }
void set_tevksel_kasel(int index, int i, u64 value) { if( i) tevksel_n_kasel1 |= value << (5*index); else tevksel_n_kasel0 |= value << (5*index); }
bool operator <(const _PIXELSHADERUID& _Right) const
{
int N = GetNumValues();
u64 cc_n_d : 64; // 16x4 bits
u64 cc_n_c : 64; // 16x4 bits
u64 cc_n_b : 64; // 16x4 bits
u64 cc_n_a : 64; // 16x4 bits
u32 cc_n_bias : 32; // 16x2 bits
u32 cc_n_op : 16; // 16x1 bit
u32 cc_n_clamp : 16; // 16x1 bit
u32 cc_n_shift : 32; // 16x2 bits
u32 cc_n_dest : 32; // 16x2 bits
if (N < _Right.GetNumValues())
return true;
else if (N > _Right.GetNumValues())
return false;
u32 ac_n_rswap : 32; // 16x2 bits
u32 ac_n_tswap : 32; // 16x2 bits
u64 ac_n_d : 48; // 16x3 bits
u64 ac_n_c : 48; // 16x3 bits
u64 ac_n_b : 48; // 16x3 bits
u64 ac_n_a : 48; // 16x3 bits
u32 ac_n_bias : 32; // 16x2 bits
u32 ac_n_op : 16; // 16x1 bit
u32 ac_n_clamp : 16; // 16x1 bit
u32 ac_n_shift : 32; // 16x2 bits
u32 ac_n_dest : 32; // 16x2 bits
for (int i = 0; i < N; ++i)
{
if (values[i] < _Right.values[i])
return true;
else if (values[i] > _Right.values[i])
return false;
}
u32 alpha_test_comp0 : 3;
u32 alpha_test_comp1 : 3;
u32 alpha_test_logic : 2;
u32 alpha_test_use_zcomploc_hack : 1;
return false;
}
u32 fog_proj : 1;
u32 fog_fsel : 3;
u32 fog_RangeBaseEnabled : 1;
bool operator ==(const _PIXELSHADERUID& _Right) const
{
int N = GetNumValues();
u32 ztex_op : 2;
if (N != _Right.GetNumValues())
return false;
u32 fast_depth_calc : 1;
u32 per_pixel_depth : 1;
u32 bHasIndStage : 16;
for (int i = 0; i < N; ++i)
{
if (values[i] != _Right.values[i])
return false;
}
u32 xfregs_numTexGen_numTexGens : 4;
return true;
}
// TODO: I think we're fine without an enablePixelLighting field, should probably double check, though..
LightingUidData lighting;
};
//#pragma pack()
typedef _PIXELSHADERUID<false> PIXELSHADERUID;
typedef _PIXELSHADERUID<true> PIXELSHADERUIDSAFE;
typedef ShaderUid<pixel_shader_uid_data> PixelShaderUid;
typedef ShaderCode PixelShaderCode; // TODO: Obsolete
typedef ShaderConstantProfile PixelShaderConstantProfile; // TODO: Obsolete
// Different ways to achieve rendering with destination alpha
enum DSTALPHA_MODE
{
DSTALPHA_NONE, // Render normally, without destination alpha
DSTALPHA_ALPHA_PASS, // Render normally first, then render again for alpha
DSTALPHA_DUAL_SOURCE_BLEND // Use dual-source blending
};
const char *GeneratePixelShaderCode(DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components);
void GetPixelShaderId(PIXELSHADERUID *uid, DSTALPHA_MODE dstAlphaMode, u32 components);
void GetSafePixelShaderId(PIXELSHADERUIDSAFE *uid, DSTALPHA_MODE dstAlphaMode, u32 components);
// Used to make sure that our optimized pixel shader IDs don't lose any possible shader code changes
void ValidatePixelShaderIDs(API_TYPE api, PIXELSHADERUIDSAFE old_id, const std::string& old_code, DSTALPHA_MODE dstAlphaMode, u32 components);
void GeneratePixelShaderCode(PixelShaderCode& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components);
void GetPixelShaderUid(PixelShaderUid& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components);
void GetPixelShaderConstantProfile(PixelShaderConstantProfile& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components);
#endif // GCOGL_PIXELSHADER_H

View File

@ -29,19 +29,45 @@ static u32 lastTexDims[8]; // width | height << 16 | wrap_s << 28 | wrap_t << 30
static u32 lastZBias;
static int nMaterialsChanged;
static float s_constant_cache[C_PENVCONST_END*4];
inline void SetPSConstant4f(unsigned int const_number, float f1, float f2, float f3, float f4)
{
// if (s_constant_cache[const_number*4] == f1 && s_constant_cache[const_number*4+1] == f2 &&
// s_constant_cache[const_number*4+2] == f3 && s_constant_cache[const_number*4+3] == f4)
// return;
g_renderer->SetPSConstant4f(const_number, f1, f2, f3, f4);
s_constant_cache[const_number*4] = f1;
s_constant_cache[const_number*4+1] = f2;
s_constant_cache[const_number*4+2] = f3;
s_constant_cache[const_number*4+3] = f4;
}
inline void SetPSConstant4fv(unsigned int const_number, const float *f)
{
// if (s_constant_cache[const_number*4] == f[0] && s_constant_cache[const_number*4+1] == f[1] &&
// s_constant_cache[const_number*4+2] == f[2] && s_constant_cache[const_number*4+3] == f[3])
// return;
g_renderer->SetPSConstant4fv(const_number, f);
s_constant_cache[const_number*4] = f[0];
s_constant_cache[const_number*4+1] = f[1];
s_constant_cache[const_number*4+2] = f[2];
s_constant_cache[const_number*4+3] = f[3];
}
inline void SetMultiPSConstant4fv(unsigned int const_number, unsigned int count, const float *f)
{
// for (unsigned int i = 0; i < 4*count; ++i)
// if (s_constant_cache[const_number*4+i] != f[i])
// break;
// else if (i == 4*count-1)
// return;
g_renderer->SetMultiPSConstant4fv(const_number, count, f);
for (unsigned int i = 0; i < 4*count; ++i)
s_constant_cache[const_number*4+i] = f[i];
}
void PixelShaderManager::Init()
@ -50,6 +76,7 @@ void PixelShaderManager::Init()
memset(lastTexDims, 0, sizeof(lastTexDims));
lastZBias = 0;
memset(lastRGBAfull, 0, sizeof(lastRGBAfull));
memset(s_constant_cache, 0, sizeof(s_constant_cache)); // TODO: Should reflect that on the GPU side....
Dirty();
}
@ -70,11 +97,24 @@ void PixelShaderManager::Shutdown()
}
void PixelShaderManager::SetConstants()
void PixelShaderManager::SetConstants(u32 components)
{
if (g_ActiveConfig.backend_info.APIType == API_OPENGL && !g_ActiveConfig.backend_info.bSupportsGLSLUBO)
Dirty();
// TODO: Probably broken in the non-UBO path
PixelShaderConstantProfile constant_profile(C_PENVCONST_END);
/// TODO: dst alpha/api/components type parameter...
GetPixelShaderConstantProfile(constant_profile, DSTALPHA_DUAL_SOURCE_BLEND, API_OPENGL, components);
static int saved_updates = 0;
static int necessary_updates = 0;
// TODO: Remove this!
#define IncStuff() { \
saved_updates++; \
/*printf("Saved a constant update at line %d! Saved %d against %d now!\n", __LINE__, saved_updates, necessary_updates);*/ }
for (int i = 0; i < 2; ++i)
{
if (s_nColorsChanged[i])
@ -82,30 +122,37 @@ void PixelShaderManager::SetConstants()
int baseind = i ? C_KCOLORS : C_COLORS;
for (int j = 0; j < 4; ++j)
{
if (s_nColorsChanged[i] & (1 << j))
if ((s_nColorsChanged[i] & (1 << j)) && constant_profile.ConstantIsUsed(baseind+j))
{
SetPSConstant4fv(baseind+j, &lastRGBAfull[i][j][0]);
s_nColorsChanged[i] &= ~(1<<j);
++necessary_updates;
} else if ((s_nColorsChanged[i] & (1 << j))) IncStuff();
}
s_nColorsChanged[i] = 0;
}
}
if (s_nTexDimsChanged)
if (s_nTexDimsChanged)
{
for (int i = 0; i < 8; ++i)
{
if (s_nTexDimsChanged & (1<<i))
if ((s_nTexDimsChanged & (1<<i)) && constant_profile.ConstantIsUsed(C_TEXDIMS+i))
{
++necessary_updates;
SetPSTextureDims(i);
}
s_nTexDimsChanged = 0;
}
s_nTexDimsChanged &= ~(1<<i);
}else if (s_nTexDimsChanged & (1<<i)) IncStuff();
}
}
if (s_bAlphaChanged)
if (s_bAlphaChanged && constant_profile.ConstantIsUsed(C_ALPHA))
{
++necessary_updates;
SetPSConstant4f(C_ALPHA, (lastAlpha&0xff)/255.0f, ((lastAlpha>>8)&0xff)/255.0f, 0, ((lastAlpha>>16)&0xff)/255.0f);
s_bAlphaChanged = false;
}
} else if (s_bAlphaChanged) IncStuff();
if (s_bZTextureTypeChanged)
if (s_bZTextureTypeChanged && constant_profile.ConstantIsUsed(C_ZBIAS))
{
float ftemp[4];
switch (bpmem.ztex2.type)
@ -121,13 +168,14 @@ void PixelShaderManager::SetConstants()
case 2:
// 24 bits
ftemp[0] = 16711680.0f/16777215.0f; ftemp[1] = 65280.0f/16777215.0f; ftemp[2] = 255.0f/16777215.0f; ftemp[3] = 0;
break;
}
break;
}
++necessary_updates;
SetPSConstant4fv(C_ZBIAS, ftemp);
s_bZTextureTypeChanged = false;
}
} else if (s_bZTextureTypeChanged) IncStuff();
if (s_bZBiasChanged || s_bDepthRangeChanged)
if ((s_bZBiasChanged || s_bDepthRangeChanged) && constant_profile.ConstantIsUsed(C_ZBIAS+1))
{
// reversed gxsetviewport(xorig, yorig, width, height, nearz, farz)
// [0] = width/2
@ -138,9 +186,10 @@ void PixelShaderManager::SetConstants()
// [5] = 16777215 * farz
//ERROR_LOG("pixel=%x,%x, bias=%x\n", bpmem.zcontrol.pixel_format, bpmem.ztex2.type, lastZBias);
++necessary_updates;
SetPSConstant4f(C_ZBIAS+1, xfregs.viewport.farZ / 16777216.0f, xfregs.viewport.zRange / 16777216.0f, 0, (float)(lastZBias)/16777215.0f);
s_bZBiasChanged = s_bDepthRangeChanged = false;
}
}else if ((s_bZBiasChanged || s_bDepthRangeChanged)) IncStuff();
// indirect incoming texture scales
if (s_nIndTexScaleChanged)
@ -148,45 +197,51 @@ void PixelShaderManager::SetConstants()
// set as two sets of vec4s, each containing S and T of two ind stages.
float f[8];
if (s_nIndTexScaleChanged & 0x03)
if ((s_nIndTexScaleChanged & 0x03) && constant_profile.ConstantIsUsed(C_INDTEXSCALE))
{
for (u32 i = 0; i < 2; ++i)
{
f[2 * i] = bpmem.texscale[0].getScaleS(i & 1);
f[2 * i + 1] = bpmem.texscale[0].getScaleT(i & 1);
PRIM_LOG("tex indscale%d: %f %f\n", i, f[2 * i], f[2 * i + 1]);
}
f[2 * i] = bpmem.texscale[0].getScaleS(i & 1);
f[2 * i + 1] = bpmem.texscale[0].getScaleT(i & 1);
PRIM_LOG("tex indscale%d: %f %f\n", i, f[2 * i], f[2 * i + 1]);
}
++necessary_updates;
SetPSConstant4fv(C_INDTEXSCALE, f);
}
s_nIndTexScaleChanged &= ~0x03;
}
else if ((s_nIndTexScaleChanged & 0x03)) IncStuff();
if (s_nIndTexScaleChanged & 0x0c)
if ((s_nIndTexScaleChanged & 0x0c) && constant_profile.ConstantIsUsed(C_INDTEXSCALE+1))
{
for (u32 i = 2; i < 4; ++i)
for (u32 i = 2; i < 4; ++i)
{
f[2 * i] = bpmem.texscale[1].getScaleS(i & 1);
f[2 * i + 1] = bpmem.texscale[1].getScaleT(i & 1);
PRIM_LOG("tex indscale%d: %f %f\n", i, f[2 * i], f[2 * i + 1]);
}
f[2 * i] = bpmem.texscale[1].getScaleS(i & 1);
f[2 * i + 1] = bpmem.texscale[1].getScaleT(i & 1);
PRIM_LOG("tex indscale%d: %f %f\n", i, f[2 * i], f[2 * i + 1]);
}
++necessary_updates;
SetPSConstant4fv(C_INDTEXSCALE+1, &f[4]);
}
s_nIndTexScaleChanged = 0;
}
s_nIndTexScaleChanged &= ~0x0c;
}
else if ((s_nIndTexScaleChanged & 0x0c)) IncStuff();
}
if (s_nIndTexMtxChanged)
{
for (int i = 0; i < 3; ++i)
{
if (s_nIndTexMtxChanged & (1 << i))
if ((s_nIndTexMtxChanged & (1 << i)) && (constant_profile.ConstantIsUsed(C_INDTEXMTX+2*i) || constant_profile.ConstantIsUsed(C_INDTEXMTX+2*i+1)))
{
int scale = ((u32)bpmem.indmtx[i].col0.s0 << 0) |
((u32)bpmem.indmtx[i].col1.s1 << 2) |
((u32)bpmem.indmtx[i].col2.s2 << 4);
float fscale = powf(2.0f, (float)(scale - 17)) / 1024.0f;
int scale = ((u32)bpmem.indmtx[i].col0.s0 << 0) |
((u32)bpmem.indmtx[i].col1.s1 << 2) |
((u32)bpmem.indmtx[i].col2.s2 << 4);
float fscale = powf(2.0f, (float)(scale - 17)) / 1024.0f;
// xyz - static matrix
// TODO w - dynamic matrix scale / 256...... somehow / 4 works better
// rev 2972 - now using / 256.... verify that this works
// xyz - static matrix
// TODO w - dynamic matrix scale / 256...... somehow / 4 works better
// rev 2972 - now using / 256.... verify that this works
++necessary_updates;
++necessary_updates;
SetPSConstant4f(C_INDTEXMTX + 2 * i,
bpmem.indmtx[i].col0.ma * fscale,
bpmem.indmtx[i].col1.mc * fscale,
@ -198,23 +253,26 @@ void PixelShaderManager::SetConstants()
bpmem.indmtx[i].col2.mf * fscale,
fscale * 4.0f);
PRIM_LOG("indmtx%d: scale=%f, mat=(%f %f %f; %f %f %f)\n",
i, 1024.0f*fscale,
bpmem.indmtx[i].col0.ma * fscale, bpmem.indmtx[i].col1.mc * fscale, bpmem.indmtx[i].col2.me * fscale,
bpmem.indmtx[i].col0.mb * fscale, bpmem.indmtx[i].col1.md * fscale, bpmem.indmtx[i].col2.mf * fscale);
}
}
s_nIndTexMtxChanged = 0;
}
PRIM_LOG("indmtx%d: scale=%f, mat=(%f %f %f; %f %f %f)\n",
i, 1024.0f*fscale,
bpmem.indmtx[i].col0.ma * fscale, bpmem.indmtx[i].col1.mc * fscale, bpmem.indmtx[i].col2.me * fscale,
bpmem.indmtx[i].col0.mb * fscale, bpmem.indmtx[i].col1.md * fscale, bpmem.indmtx[i].col2.mf * fscale);
if (s_bFogColorChanged)
s_nIndTexMtxChanged &= ~(1 << i);
}else if ((s_nIndTexMtxChanged & (1 << i))) {IncStuff();IncStuff();}
}
}
if (s_bFogColorChanged && constant_profile.ConstantIsUsed(C_FOG))
{
++necessary_updates;
SetPSConstant4f(C_FOG, bpmem.fog.color.r / 255.0f, bpmem.fog.color.g / 255.0f, bpmem.fog.color.b / 255.0f, 0);
s_bFogColorChanged = false;
}
}else if (s_bFogColorChanged) IncStuff();
if (s_bFogParamChanged)
if (s_bFogParamChanged && constant_profile.ConstantIsUsed(C_FOG+1))
{
++necessary_updates;
if(!g_ActiveConfig.bDisableFog)
{
//downscale magnitude to 0.24 bits
@ -226,11 +284,12 @@ void PixelShaderManager::SetConstants()
else
SetPSConstant4f(C_FOG + 1, 0.0, 1.0, 0.0, 1.0);
s_bFogParamChanged = false;
}
s_bFogParamChanged = false;
}else if ( s_bFogParamChanged) IncStuff();
if (s_bFogRangeAdjustChanged)
if (s_bFogRangeAdjustChanged && constant_profile.ConstantIsUsed(C_FOG+2))
{
++necessary_updates;
if(!g_ActiveConfig.bDisableFog && bpmem.fogRange.Base.Enabled == 1)
{
//bpmem.fogRange.Base.Center : center of the viewport in x axis. observation: bpmem.fogRange.Base.Center = realcenter + 342;
@ -251,8 +310,9 @@ void PixelShaderManager::SetConstants()
}
s_bFogRangeAdjustChanged = false;
}
}else if ( s_bFogRangeAdjustChanged) IncStuff();
// TODO: use constant profile here!
if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) // config check added because the code in here was crashing for me inside SetPSConstant4f
{
if (nLightsChanged[0] >= 0)
@ -349,8 +409,10 @@ void PixelShaderManager::SetPSTextureDims(int texid)
SetPSConstant4fv(C_TEXDIMS + texid, fdims);
}
// This one is high in profiles (0.5%). TODO: Move conversion out, only store the raw color value
// This one is high in profiles (0.5%).
// TODO: Move conversion out, only store the raw color value
// and update it when the shader constant is set, only.
// TODO: Conversion should be checked in the context of tev_fixes..
void PixelShaderManager::SetColorChanged(int type, int num, bool high)
{
float *pf = &lastRGBAfull[type][num][0];

View File

@ -21,7 +21,7 @@ public:
static void Shutdown();
static void DoState(PointerWrap &p);
static void SetConstants(); // sets pixel shader constants
static void SetConstants(u32 components); // sets pixel shader constants
// constant management, should be called after memory is committed
static void SetColorChanged(int type, int index, bool high);

View File

@ -0,0 +1,286 @@
// Copyright (C) 2003 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
#ifndef _SHADERGENCOMMON_H
#define _SHADERGENCOMMON_H
#include <stdio.h>
#include <stdarg.h>
#include <string>
#include <vector>
#include <algorithm>
#include "CommonTypes.h"
#include "VideoCommon.h"
/**
* Common interface for classes that need to go through the shader generation path (GenerateVertexShader, GeneratePixelShader)
* In particular, this includes the shader code generator (ShaderCode).
* A different class (ShaderUid) can be used to uniquely identify each ShaderCode object.
* More interesting things can be done with this, e.g. ShaderConstantProfile checks what shader constants are being used. This can be used to optimize buffer management.
* Each of the ShaderCode, ShaderUid and ShaderConstantProfile child classes only implement the subset of ShaderGeneratorInterface methods that are required for the specific tasks.
*/
class ShaderGeneratorInterface
{
public:
/*
* Used when the shader generator would write a piece of ShaderCode.
* Can be used like printf.
* @note In the ShaderCode implementation, this does indeed write the parameter string to an internal buffer. However, you're free to do whatever you like with the parameter.
*/
void Write(const char* fmt, ...) {}
/*
* Returns a read pointer to the internal buffer.
* @note When implementing this method in a child class, you likely want to return the argument of the last SetBuffer call here
* @note SetBuffer() should be called before using GetBuffer().
*/
const char* GetBuffer() { return NULL; }
/*
* Can be used to give the object a place to write to. This should be called before using Write().
* @param buffer pointer to a char buffer that the object can write to
*/
void SetBuffer(char* buffer) { }
/*
* Tells us that a specific constant range (including last_index) is being used by the shader
*/
inline void SetConstantsUsed(unsigned int first_index, unsigned int last_index) {}
/*
* Returns a pointer to an internally stored object of the uid_data type.
* @warning since most child classes use the default implementation you shouldn't access this directly without adding precautions against NULL access (e.g. via adding a dummy structure, cf. the vertex/pixel shader generators)
*/
template<class uid_data>
uid_data& GetUidData() { return *(uid_data*)NULL; }
};
/**
* Shader UID class used to uniquely identify the ShaderCode output written in the shader generator.
* uid_data can be any struct of parameters that uniquely identify each shader code output.
* Unless performance is not an issue, uid_data should be tightly packed to reduce memory footprint.
* Shader generators will write to specific uid_data fields; ShaderUid methods will only read raw u32 values from a union.
*/
template<class uid_data>
class ShaderUid : public ShaderGeneratorInterface
{
public:
ShaderUid()
{
// TODO: Move to Shadergen => can be optimized out
memset(values, 0, sizeof(values));
}
bool operator == (const ShaderUid& obj) const
{
return memcmp(this->values, obj.values, sizeof(values)) == 0;
}
bool operator != (const ShaderUid& obj) const
{
return memcmp(this->values, obj.values, sizeof(values)) != 0;
}
// determines the storage order inside STL containers
bool operator < (const ShaderUid& obj) const
{
// TODO: Store last frame used and order by that? makes much more sense anyway...
for (unsigned int i = 0; i < sizeof(uid_data) / sizeof(u32); ++i)
{
if (this->values[i] < obj.values[i])
return true;
else if (this->values[i] > obj.values[i])
return false;
}
return false;
}
template<class T>
inline T& GetUidData() { return data; }
const uid_data& GetUidData() const { return data; }
size_t GetUidDataSize() const { return sizeof(values); }
private:
union
{
uid_data data;
u32 values[sizeof(uid_data) / sizeof(u32)];
};
};
class ShaderCode : public ShaderGeneratorInterface
{
public:
ShaderCode() : buf(NULL), write_ptr(NULL)
{
}
void Write(const char* fmt, ...)
{
va_list arglist;
va_start(arglist, fmt);
write_ptr += vsprintf(write_ptr, fmt, arglist);
va_end(arglist);
}
const char* GetBuffer() { return buf; }
void SetBuffer(char* buffer) { buf = buffer; write_ptr = buffer; }
private:
const char* buf;
char* write_ptr;
};
/**
* Generates a shader constant profile which can be used to query which constants are used in a shader
*/
class ShaderConstantProfile : public ShaderGeneratorInterface
{
public:
ShaderConstantProfile(int num_constants) { constant_usage.resize(num_constants); }
inline void SetConstantsUsed(unsigned int first_index, unsigned int last_index)
{
for (unsigned int i = first_index; i < last_index+1; ++i)
constant_usage[i] = true;
}
inline bool ConstantIsUsed(unsigned int index)
{
// TODO: Not ready for usage yet
return true;
// return constant_usage[index];
}
private:
std::vector<bool> constant_usage; // TODO: Is vector<bool> appropriate here?
};
template<class T>
static void WriteRegister(T& object, API_TYPE ApiType, const char *prefix, const u32 num)
{
if (ApiType == API_OPENGL)
return; // Nothing to do here
object.Write(" : register(%s%d)", prefix, num);
}
template<class T>
static void WriteLocation(T& object, API_TYPE ApiType, bool using_ubos)
{
if (using_ubos)
return;
object.Write("uniform ");
}
template<class T>
static void DeclareUniform(T& object, API_TYPE api_type, bool using_ubos, const u32 num, const char* type, const char* name)
{
WriteLocation(object, api_type, using_ubos);
object.Write("%s %s ", type, name);
WriteRegister(object, api_type, "c", num);
object.Write(";\n");
}
#pragma pack(4)
/**
* Common uid data used for shader generators that use lighting calculations.
* Expected to be stored as a member called "lighting".
*/
struct LightingUidData
{
u32 matsource : 4; // 4x1 bit
u32 enablelighting : 4; // 4x1 bit
u32 ambsource : 4; // 4x1 bit
u32 diffusefunc : 8; // 4x2 bits
u32 attnfunc : 8; // 4x2 bits
u32 light_mask : 32; // 4x8 bits
};
#pragma pack()
/**
* Checks if there has been
*/
template<class UidT, class CodeT>
class UidChecker
{
public:
void Invalidate()
{
m_shaders.clear();
m_uids.clear();
}
void AddToIndexAndCheck(CodeT& new_code, const UidT& new_uid, const char* shader_type, const char* dump_prefix)
{
bool uid_is_indexed = std::find(m_uids.begin(), m_uids.end(), new_uid) != m_uids.end();
if (!uid_is_indexed)
{
m_uids.push_back(new_uid);
m_shaders[new_uid] = new_code.GetBuffer();
}
else
{
// uid is already in the index => check if there's a shader with the same uid but different code
auto& old_code = m_shaders[new_uid];
if (strcmp(old_code.c_str(), new_code.GetBuffer()) != 0)
{
static int num_failures = 0;
char szTemp[MAX_PATH];
sprintf(szTemp, "%s%ssuid_mismatch_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(),
dump_prefix,
++num_failures);
// TODO: Should also dump uids
std::ofstream file;
OpenFStream(file, szTemp, std::ios_base::out);
file << "Old shader code:\n" << old_code;
file << "\n\nNew shader code:\n" << new_code.GetBuffer();
file << "\n\nShader uid:\n";
for (unsigned int i = 0; i < new_uid.GetUidDataSize(); ++i)
{
u32 value = ((u32*)&new_uid.GetUidData())[i];
if ((i % 4) == 0)
{
unsigned int last_value = (i+3 < new_uid.GetUidDataSize()-1) ? i+3 : new_uid.GetUidDataSize();
file << std::setfill(' ') << std::dec;
file << "Values " << std::setw(2) << i << " - " << last_value << ": ";
}
file << std::setw(8) << std::setfill('0') << std::hex << value << std::setw(1);
if ((i % 4) < 3)
file << ' ';
else
file << std::endl;
}
file.close();
ERROR_LOG(VIDEO, "%s shader uid mismatch! See %s for details", shader_type, szTemp);
}
}
}
private:
std::map<UidT,std::string> m_shaders;
std::vector<UidT> m_uids;
};
#endif // _SHADERGENCOMMON_H

View File

@ -17,235 +17,134 @@
#include "VertexShaderGen.h"
#include "VideoConfig.h"
// Mash together all the inputs that contribute to the code of a generated vertex shader into
// a unique identifier, basically containing all the bits. Yup, it's a lot ....
void GetVertexShaderId(VERTEXSHADERUID *uid, u32 components)
static char text[16768];
template<class T>
static void DefineVSOutputStructMember(T& object, API_TYPE api_type, const char* type, const char* name, int var_index, const char* semantic, int semantic_index = -1)
{
memset(uid->values, 0, sizeof(uid->values));
uid->values[0] = components |
(xfregs.numTexGen.numTexGens << 23) |
(xfregs.numChan.numColorChans << 27) |
(xfregs.dualTexTrans.enabled << 29);
object.Write(" %s %s", type, name);
if (var_index != -1)
object.Write("%d", var_index);
// TODO: If pixel lighting is enabled, do we even have to bother about storing lighting related registers here?
GetLightingShaderId(&uid->values[1]);
uid->values[2] |= (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting) << 31;
u32 *pcurvalue = &uid->values[3];
for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i)
if (api_type == API_OPENGL)
object.Write(";\n");
else
{
TexMtxInfo tinfo = xfregs.texMtxInfo[i];
if (tinfo.texgentype != XF_TEXGEN_EMBOSS_MAP)
tinfo.hex &= 0x7ff;
if (tinfo.texgentype != XF_TEXGEN_REGULAR)
tinfo.projection = 0;
u32 val = ((tinfo.hex >> 1) & 0x1ffff);
if (xfregs.dualTexTrans.enabled && tinfo.texgentype == XF_TEXGEN_REGULAR)
{
// rewrite normalization and post index
val |= ((u32)xfregs.postMtxInfo[i].index << 17) | ((u32)xfregs.postMtxInfo[i].normalize << 23);
}
switch (i & 3)
{
case 0: pcurvalue[0] |= val; break;
case 1: pcurvalue[0] |= val << 24; pcurvalue[1] = val >> 8; ++pcurvalue; break;
case 2: pcurvalue[0] |= val << 16; pcurvalue[1] = val >> 16; ++pcurvalue; break;
case 3: pcurvalue[0] |= val << 8; ++pcurvalue; break;
}
if (semantic_index != -1)
object.Write(" : %s%d;\n", semantic, semantic_index);
else
object.Write(" : %s;\n", semantic);
}
}
void GetSafeVertexShaderId(VERTEXSHADERUIDSAFE *uid, u32 components)
template<class T>
static void GenerateVSOutputStruct(T& object, u32 components, API_TYPE api_type)
{
// Just store all used registers here without caring whether we need all bits or less.
memset(uid->values, 0, sizeof(uid->values));
u32* ptr = uid->values;
*ptr++ = components;
*ptr++ = xfregs.numTexGen.hex;
*ptr++ = xfregs.numChan.hex;
*ptr++ = xfregs.dualTexTrans.hex;
for (int i = 0; i < 2; ++i)
{
*ptr++ = xfregs.color[i].hex;
*ptr++ = xfregs.alpha[i].hex;
}
*ptr++ = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting;
for (unsigned int i = 0; i < 8; ++i)
{
*ptr++ = xfregs.texMtxInfo[i].hex;
*ptr++ = xfregs.postMtxInfo[i].hex;
}
_assert_((ptr - uid->values) == uid->GetNumValues());
}
void ValidateVertexShaderIDs(API_TYPE api, VERTEXSHADERUIDSAFE old_id, const std::string& old_code, u32 components)
{
if (!g_ActiveConfig.bEnableShaderDebugging)
return;
VERTEXSHADERUIDSAFE new_id;
GetSafeVertexShaderId(&new_id, components);
if (!(old_id == new_id))
{
std::string new_code(GenerateVertexShaderCode(components, api));
if (old_code != new_code)
{
_assert_(old_id.GetNumValues() == new_id.GetNumValues());
char msg[8192];
char* ptr = msg;
ptr += sprintf(ptr, "Vertex shader IDs matched but unique IDs did not!\nUnique IDs (old <-> new):\n");
const int N = new_id.GetNumValues();
for (int i = 0; i < N/2; ++i)
ptr += sprintf(ptr, "%02d, %08X %08X | %08X %08X\n", 2*i, old_id.values[2*i], old_id.values[2*i+1],
new_id.values[2*i], new_id.values[2*i+1]);
if (N % 2)
ptr += sprintf(ptr, "%02d, %08X | %08X\n", N-1, old_id.values[N-1], new_id.values[N-1]);
static int num_failures = 0;
char szTemp[MAX_PATH];
sprintf(szTemp, "%svsuid_mismatch_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), num_failures++);
std::ofstream file;
OpenFStream(file, szTemp, std::ios_base::out);
file << msg;
file << "\n\nOld shader code:\n" << old_code;
file << "\n\nNew shader code:\n" << new_code;
file.close();
PanicAlert("Unique pixel shader ID mismatch!\n\nReport this to the devs, along with the contents of %s.", szTemp);
}
}
}
static char text[16384];
#define WRITE p+=sprintf
char* GenerateVSOutputStruct(char* p, u32 components, API_TYPE ApiType)
{
// "centroid" attribute is only supported by D3D11
const char* optCentroid = (ApiType == API_D3D11 ? "centroid" : "");
// GLSL makes this ugly
// TODO: Make pretty
WRITE(p, "struct VS_OUTPUT {\n");
WRITE(p, " %s float4 pos %s POSITION;\n", optCentroid, ApiType == API_OPENGL ? ";//" : ":");
WRITE(p, " %s float4 colors_0 %s COLOR0;\n", optCentroid, ApiType == API_OPENGL ? ";//" : ":");
WRITE(p, " %s float4 colors_1 %s COLOR1;\n", optCentroid, ApiType == API_OPENGL ? ";//" : ":");
object.Write("struct VS_OUTPUT {\n");
DefineVSOutputStructMember(object, api_type, "float4", "pos", -1, "POSITION");
DefineVSOutputStructMember(object, api_type, "float4", "colors_", 0, "COLOR", 0);
DefineVSOutputStructMember(object, api_type, "float4", "colors_", 1, "COLOR", 1);
if (xfregs.numTexGen.numTexGens < 7)
{
for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i)
WRITE(p, " %s float3 tex%d %s TEXCOORD%d;\n", optCentroid, i, ApiType == API_OPENGL ? ";//" : ":", i);
DefineVSOutputStructMember(object, api_type, "float3", "tex", i, "TEXCOORD", i);
WRITE(p, " %s float4 clipPos %s TEXCOORD%d;\n", optCentroid, ApiType == API_OPENGL ? ";//" : ":", xfregs.numTexGen.numTexGens);
DefineVSOutputStructMember(object, api_type, "float4", "clipPos", -1, "TEXCOORD", xfregs.numTexGen.numTexGens);
if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
WRITE(p, " %s float4 Normal %s TEXCOORD%d;\n", optCentroid, ApiType == API_OPENGL ? ";//" : ":", xfregs.numTexGen.numTexGens + 1);
DefineVSOutputStructMember(object, api_type, "float4", "Normal", -1, "TEXCOORD", xfregs.numTexGen.numTexGens + 1);
}
else
{
// clip position is in w of first 4 texcoords
if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
{
for (int i = 0; i < 8; ++i)
WRITE(p, " %s float4 tex%d %s TEXCOORD%d;\n", optCentroid, i, ApiType == API_OPENGL? ";//" : ":", i);
}
else
{
for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i)
WRITE(p, " %s float%d tex%d %s TEXCOORD%d;\n", optCentroid, i < 4 ? 4 : 3 , i, ApiType == API_OPENGL ? ";//" : ":", i);
}
// Store clip position in the w component of first 4 texcoords
bool ppl = g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting;
int num_texcoords = ppl ? 8 : xfregs.numTexGen.numTexGens;
for (int i = 0; i < num_texcoords; ++i)
DefineVSOutputStructMember(object, api_type, (ppl || i < 4) ? "float4" : "float3", "tex", i, "TEXCOORD", i);
}
WRITE(p, "};\n");
return p;
object.Write("};\n");
}
extern const char* WriteRegister(API_TYPE ApiType, const char *prefix, const u32 num);
extern const char *WriteLocation(API_TYPE ApiType);
const char *GenerateVertexShaderCode(u32 components, API_TYPE ApiType)
template<class T>
static void GenerateVertexShader(T& out, u32 components, API_TYPE api_type)
{
// Non-uid template parameters will write to the dummy data (=> gets optimized out)
vertex_shader_uid_data dummy_data;
vertex_shader_uid_data& uid_data = (&out.template GetUidData<vertex_shader_uid_data>() != NULL)
? out.template GetUidData<vertex_shader_uid_data>() : dummy_data;
out.SetBuffer(text);
#ifndef ANDROID
locale_t locale = newlocale(LC_NUMERIC_MASK, "C", NULL); // New locale for compilation
locale_t old_locale = uselocale(locale); // Apply the locale for this thread
locale_t locale;
locale_t old_locale;
if (out.GetBuffer() != NULL)
{
locale = newlocale(LC_NUMERIC_MASK, "C", NULL); // New locale for compilation
old_locale = uselocale(locale); // Apply the locale for this thread
}
#endif
text[sizeof(text) - 1] = 0x7C; // canary
_assert_(bpmem.genMode.numtexgens == xfregs.numTexGen.numTexGens);
_assert_(bpmem.genMode.numcolchans == xfregs.numChan.numColorChans);
bool is_d3d = (ApiType & API_D3D9 || ApiType == API_D3D11);
u32 lightMask = 0;
if (xfregs.numChan.numColorChans > 0)
lightMask |= xfregs.color[0].GetFullLightMask() | xfregs.alpha[0].GetFullLightMask();
if (xfregs.numChan.numColorChans > 1)
lightMask |= xfregs.color[1].GetFullLightMask() | xfregs.alpha[1].GetFullLightMask();
char *p = text;
WRITE(p, "//Vertex Shader: comp:%x, \n", components);
bool is_d3d = (api_type & API_D3D9 || api_type == API_D3D11);
// uniforms
if (g_ActiveConfig.backend_info.bSupportsGLSLUBO)
WRITE(p, "layout(std140) uniform VSBlock {\n");
out.Write("layout(std140) uniform VSBlock {\n");
WRITE(p, "%sfloat4 " I_POSNORMALMATRIX"[6] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_POSNORMALMATRIX));
WRITE(p, "%sfloat4 " I_PROJECTION"[4] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_PROJECTION));
WRITE(p, "%sfloat4 " I_MATERIALS"[4] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_MATERIALS));
WRITE(p, "%sfloat4 " I_LIGHTS"[40] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_LIGHTS));
WRITE(p, "%sfloat4 " I_TEXMATRICES"[24] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_TEXMATRICES)); // also using tex matrices
WRITE(p, "%sfloat4 " I_TRANSFORMMATRICES"[64] %s;\n", WriteLocation(ApiType),WriteRegister(ApiType, "c", C_TRANSFORMMATRICES));
WRITE(p, "%sfloat4 " I_NORMALMATRICES"[32] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_NORMALMATRICES));
WRITE(p, "%sfloat4 " I_POSTTRANSFORMMATRICES"[64] %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_POSTTRANSFORMMATRICES));
WRITE(p, "%sfloat4 " I_DEPTHPARAMS" %s;\n", WriteLocation(ApiType), WriteRegister(ApiType, "c", C_DEPTHPARAMS));
DeclareUniform(out, api_type, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_POSNORMALMATRIX, "float4", I_POSNORMALMATRIX"[6]");
DeclareUniform(out, api_type, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_PROJECTION, "float4", I_PROJECTION"[4]");
DeclareUniform(out, api_type, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_MATERIALS, "float4", I_MATERIALS"[4]");
DeclareUniform(out, api_type, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_LIGHTS, "float4", I_LIGHTS"[40]");
DeclareUniform(out, api_type, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_TEXMATRICES, "float4", I_TEXMATRICES"[24]");
DeclareUniform(out, api_type, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_TRANSFORMMATRICES, "float4", I_TRANSFORMMATRICES"[64]");
DeclareUniform(out, api_type, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_NORMALMATRICES, "float4", I_NORMALMATRICES"[32]");
DeclareUniform(out, api_type, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_POSTTRANSFORMMATRICES, "float4", I_POSTTRANSFORMMATRICES"[64]");
DeclareUniform(out, api_type, g_ActiveConfig.backend_info.bSupportsGLSLUBO, C_DEPTHPARAMS, "float4", I_DEPTHPARAMS);
if (g_ActiveConfig.backend_info.bSupportsGLSLUBO)
WRITE(p, "};\n");
out.Write("};\n");
p = GenerateVSOutputStruct(p, components, ApiType);
GenerateVSOutputStruct(out, components, api_type);
if(ApiType == API_OPENGL)
uid_data.numTexGens = xfregs.numTexGen.numTexGens;
uid_data.components = components;
if(api_type == API_OPENGL)
{
WRITE(p, "ATTRIN float4 rawpos; // ATTR%d,\n", SHADER_POSITION_ATTRIB);
out.Write("ATTRIN float4 rawpos; // ATTR%d,\n", SHADER_POSITION_ATTRIB);
if (components & VB_HAS_POSMTXIDX)
WRITE(p, "ATTRIN float fposmtx; // ATTR%d,\n", SHADER_POSMTX_ATTRIB);
out.Write("ATTRIN float fposmtx; // ATTR%d,\n", SHADER_POSMTX_ATTRIB);
if (components & VB_HAS_NRM0)
WRITE(p, "ATTRIN float3 rawnorm0; // ATTR%d,\n", SHADER_NORM0_ATTRIB);
out.Write("ATTRIN float3 rawnorm0; // ATTR%d,\n", SHADER_NORM0_ATTRIB);
if (components & VB_HAS_NRM1)
WRITE(p, "ATTRIN float3 rawnorm1; // ATTR%d,\n", SHADER_NORM1_ATTRIB);
out.Write("ATTRIN float3 rawnorm1; // ATTR%d,\n", SHADER_NORM1_ATTRIB);
if (components & VB_HAS_NRM2)
WRITE(p, "ATTRIN float3 rawnorm2; // ATTR%d,\n", SHADER_NORM2_ATTRIB);
out.Write("ATTRIN float3 rawnorm2; // ATTR%d,\n", SHADER_NORM2_ATTRIB);
if (components & VB_HAS_COL0)
WRITE(p, "ATTRIN float4 color0; // ATTR%d,\n", SHADER_COLOR0_ATTRIB);
out.Write("ATTRIN float4 color0; // ATTR%d,\n", SHADER_COLOR0_ATTRIB);
if (components & VB_HAS_COL1)
WRITE(p, "ATTRIN float4 color1; // ATTR%d,\n", SHADER_COLOR1_ATTRIB);
out.Write("ATTRIN float4 color1; // ATTR%d,\n", SHADER_COLOR1_ATTRIB);
for (int i = 0; i < 8; ++i)
{
u32 hastexmtx = (components & (VB_HAS_TEXMTXIDX0<<i));
if ((components & (VB_HAS_UV0<<i)) || hastexmtx)
WRITE(p, "ATTRIN float%d tex%d; // ATTR%d,\n", hastexmtx ? 3 : 2, i, SHADER_TEXTURE0_ATTRIB + i);
out.Write("ATTRIN float%d tex%d; // ATTR%d,\n", hastexmtx ? 3 : 2, i, SHADER_TEXTURE0_ATTRIB + i);
}
// Let's set up attributes
if (xfregs.numTexGen.numTexGens < 7)
{
for (int i = 0; i < 8; ++i)
WRITE(p, "VARYOUT float3 uv%d_2;\n", i);
WRITE(p, "VARYOUT float4 clipPos_2;\n");
out.Write("VARYOUT float3 uv%d_2;\n", i);
out.Write("VARYOUT float4 clipPos_2;\n");
if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
WRITE(p, "VARYOUT float4 Normal_2;\n");
out.Write("VARYOUT float4 Normal_2;\n");
}
else
{
@ -253,142 +152,140 @@ const char *GenerateVertexShaderCode(u32 components, API_TYPE ApiType)
if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
{
for (int i = 0; i < 8; ++i)
WRITE(p, "VARYOUT float4 uv%d_2;\n", i);
out.Write("VARYOUT float4 uv%d_2;\n", i);
}
else
{
for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i)
WRITE(p, "VARYOUT float%d uv%d_2;\n", i < 4 ? 4 : 3 , i);
out.Write("VARYOUT float%d uv%d_2;\n", i < 4 ? 4 : 3 , i);
}
}
WRITE(p, "VARYOUT float4 colors_02;\n");
WRITE(p, "VARYOUT float4 colors_12;\n");
out.Write("VARYOUT float4 colors_02;\n");
out.Write("VARYOUT float4 colors_12;\n");
WRITE(p, "void main()\n{\n");
out.Write("void main()\n{\n");
}
else
{
WRITE(p, "VS_OUTPUT main(\n");
out.Write("VS_OUTPUT main(\n");
// inputs
if (components & VB_HAS_NRM0)
WRITE(p, " float3 rawnorm0 : NORMAL0,\n");
out.Write(" float3 rawnorm0 : NORMAL0,\n");
if (components & VB_HAS_NRM1)
{
if (is_d3d)
WRITE(p, " float3 rawnorm1 : NORMAL1,\n");
out.Write(" float3 rawnorm1 : NORMAL1,\n");
else
WRITE(p, " float3 rawnorm1 : ATTR%d,\n", SHADER_NORM1_ATTRIB);
out.Write(" float3 rawnorm1 : ATTR%d,\n", SHADER_NORM1_ATTRIB);
}
if (components & VB_HAS_NRM2)
{
if (is_d3d)
WRITE(p, " float3 rawnorm2 : NORMAL2,\n");
out.Write(" float3 rawnorm2 : NORMAL2,\n");
else
WRITE(p, " float3 rawnorm2 : ATTR%d,\n", SHADER_NORM2_ATTRIB);
out.Write(" float3 rawnorm2 : ATTR%d,\n", SHADER_NORM2_ATTRIB);
}
if (components & VB_HAS_COL0)
{
WRITE(p, " float4 color0 : COLOR0,\n");
}
out.Write(" float4 color0 : COLOR0,\n");
if (components & VB_HAS_COL1)
{
WRITE(p, " float4 color1 : COLOR1,\n");
}
out.Write(" float4 color1 : COLOR1,\n");
for (int i = 0; i < 8; ++i)
{
u32 hastexmtx = (components & (VB_HAS_TEXMTXIDX0<<i));
if ((components & (VB_HAS_UV0<<i)) || hastexmtx)
WRITE(p, " float%d tex%d : TEXCOORD%d,\n", hastexmtx ? 3 : 2, i, i);
out.Write(" float%d tex%d : TEXCOORD%d,\n", hastexmtx ? 3 : 2, i, i);
}
if (components & VB_HAS_POSMTXIDX)
{
if (is_d3d)
WRITE(p, " float4 blend_indices : BLENDINDICES,\n");
out.Write(" float4 blend_indices : BLENDINDICES,\n");
else
WRITE(p, " float fposmtx : ATTR%d,\n", SHADER_POSMTX_ATTRIB);
out.Write(" float fposmtx : ATTR%d,\n", SHADER_POSMTX_ATTRIB);
}
WRITE(p, " float4 rawpos : POSITION) {\n");
out.Write(" float4 rawpos : POSITION) {\n");
}
WRITE(p, "VS_OUTPUT o;\n");
out.Write("VS_OUTPUT o;\n");
// transforms
if (components & VB_HAS_POSMTXIDX)
{
if (ApiType & API_D3D9)
if (api_type & API_D3D9)
{
WRITE(p, "int4 indices = D3DCOLORtoUBYTE4(blend_indices);\n");
WRITE(p, "int posmtx = indices.x;\n");
out.Write("int4 indices = D3DCOLORtoUBYTE4(blend_indices);\n");
out.Write("int posmtx = indices.x;\n");
}
else if (ApiType == API_D3D11)
else if (api_type == API_D3D11)
{
WRITE(p, "int posmtx = blend_indices.x * 255.0f;\n");
out.Write("int posmtx = blend_indices.x * 255.0f;\n");
}
else
{
WRITE(p, "int posmtx = int(fposmtx);\n");
out.Write("int posmtx = int(fposmtx);\n");
}
if (DriverDetails::HasBug(DriverDetails::BUG_NODYNUBOACCESS))
{
// This'll cause issues, but it can't be helped
WRITE(p, "float4 pos = float4(dot(" I_TRANSFORMMATRICES"[0], rawpos), dot(" I_TRANSFORMMATRICES"[1], rawpos), dot(" I_TRANSFORMMATRICES"[2], rawpos), 1);\n");
if (components & VB_HAS_NRMALL)
WRITE(p, "float3 N0 = " I_NORMALMATRICES"[0].xyz, N1 = " I_NORMALMATRICES"[1].xyz, N2 = " I_NORMALMATRICES"[2].xyz;\n");
out.Write("float4 pos = float4(dot(" I_TRANSFORMMATRICES"[0], rawpos), dot(" I_TRANSFORMMATRICES"[1], rawpos), dot(" I_TRANSFORMMATRICES"[2], rawpos), 1);\n");
if (components & VB_HAS_NRMALL)
out.Write("float3 N0 = " I_NORMALMATRICES"[0].xyz, N1 = " I_NORMALMATRICES"[1].xyz, N2 = " I_NORMALMATRICES"[2].xyz;\n");
}
else
{
WRITE(p, "float4 pos = float4(dot(" I_TRANSFORMMATRICES"[posmtx], rawpos), dot(" I_TRANSFORMMATRICES"[posmtx+1], rawpos), dot(" I_TRANSFORMMATRICES"[posmtx+2], rawpos), 1);\n");
out.Write("float4 pos = float4(dot(" I_TRANSFORMMATRICES"[posmtx], rawpos), dot(" I_TRANSFORMMATRICES"[posmtx+1], rawpos), dot(" I_TRANSFORMMATRICES"[posmtx+2], rawpos), 1);\n");
if (components & VB_HAS_NRMALL) {
WRITE(p, "int normidx = posmtx >= 32 ? (posmtx-32) : posmtx;\n");
WRITE(p, "float3 N0 = " I_NORMALMATRICES"[normidx].xyz, N1 = " I_NORMALMATRICES"[normidx+1].xyz, N2 = " I_NORMALMATRICES"[normidx+2].xyz;\n");
out.Write("int normidx = posmtx >= 32 ? (posmtx-32) : posmtx;\n");
out.Write("float3 N0 = " I_NORMALMATRICES"[normidx].xyz, N1 = " I_NORMALMATRICES"[normidx+1].xyz, N2 = " I_NORMALMATRICES"[normidx+2].xyz;\n");
}
}
if (components & VB_HAS_NRM0)
WRITE(p, "float3 _norm0 = normalize(float3(dot(N0, rawnorm0), dot(N1, rawnorm0), dot(N2, rawnorm0)));\n");
out.Write("float3 _norm0 = normalize(float3(dot(N0, rawnorm0), dot(N1, rawnorm0), dot(N2, rawnorm0)));\n");
if (components & VB_HAS_NRM1)
WRITE(p, "float3 _norm1 = float3(dot(N0, rawnorm1), dot(N1, rawnorm1), dot(N2, rawnorm1));\n");
out.Write("float3 _norm1 = float3(dot(N0, rawnorm1), dot(N1, rawnorm1), dot(N2, rawnorm1));\n");
if (components & VB_HAS_NRM2)
WRITE(p, "float3 _norm2 = float3(dot(N0, rawnorm2), dot(N1, rawnorm2), dot(N2, rawnorm2));\n");
out.Write("float3 _norm2 = float3(dot(N0, rawnorm2), dot(N1, rawnorm2), dot(N2, rawnorm2));\n");
}
else
{
WRITE(p, "float4 pos = float4(dot(" I_POSNORMALMATRIX"[0], rawpos), dot(" I_POSNORMALMATRIX"[1], rawpos), dot(" I_POSNORMALMATRIX"[2], rawpos), 1.0f);\n");
out.Write("float4 pos = float4(dot(" I_POSNORMALMATRIX"[0], rawpos), dot(" I_POSNORMALMATRIX"[1], rawpos), dot(" I_POSNORMALMATRIX"[2], rawpos), 1.0f);\n");
if (components & VB_HAS_NRM0)
WRITE(p, "float3 _norm0 = normalize(float3(dot(" I_POSNORMALMATRIX"[3].xyz, rawnorm0), dot(" I_POSNORMALMATRIX"[4].xyz, rawnorm0), dot(" I_POSNORMALMATRIX"[5].xyz, rawnorm0)));\n");
out.Write("float3 _norm0 = normalize(float3(dot(" I_POSNORMALMATRIX"[3].xyz, rawnorm0), dot(" I_POSNORMALMATRIX"[4].xyz, rawnorm0), dot(" I_POSNORMALMATRIX"[5].xyz, rawnorm0)));\n");
if (components & VB_HAS_NRM1)
WRITE(p, "float3 _norm1 = float3(dot(" I_POSNORMALMATRIX"[3].xyz, rawnorm1), dot(" I_POSNORMALMATRIX"[4].xyz, rawnorm1), dot(" I_POSNORMALMATRIX"[5].xyz, rawnorm1));\n");
out.Write("float3 _norm1 = float3(dot(" I_POSNORMALMATRIX"[3].xyz, rawnorm1), dot(" I_POSNORMALMATRIX"[4].xyz, rawnorm1), dot(" I_POSNORMALMATRIX"[5].xyz, rawnorm1));\n");
if (components & VB_HAS_NRM2)
WRITE(p, "float3 _norm2 = float3(dot(" I_POSNORMALMATRIX"[3].xyz, rawnorm2), dot(" I_POSNORMALMATRIX"[4].xyz, rawnorm2), dot(" I_POSNORMALMATRIX"[5].xyz, rawnorm2));\n");
out.Write("float3 _norm2 = float3(dot(" I_POSNORMALMATRIX"[3].xyz, rawnorm2), dot(" I_POSNORMALMATRIX"[4].xyz, rawnorm2), dot(" I_POSNORMALMATRIX"[5].xyz, rawnorm2));\n");
}
if (!(components & VB_HAS_NRM0))
WRITE(p, "float3 _norm0 = float3(0.0f, 0.0f, 0.0f);\n");
out.Write("float3 _norm0 = float3(0.0f, 0.0f, 0.0f);\n");
WRITE(p, "o.pos = float4(dot(" I_PROJECTION"[0], pos), dot(" I_PROJECTION"[1], pos), dot(" I_PROJECTION"[2], pos), dot(" I_PROJECTION"[3], pos));\n");
WRITE(p, "float4 mat, lacc;\n"
"float3 ldir, h;\n"
"float dist, dist2, attn;\n");
out.Write("o.pos = float4(dot(" I_PROJECTION"[0], pos), dot(" I_PROJECTION"[1], pos), dot(" I_PROJECTION"[2], pos), dot(" I_PROJECTION"[3], pos));\n");
out.Write("float4 mat, lacc;\n"
"float3 ldir, h;\n"
"float dist, dist2, attn;\n");
uid_data.numColorChans = xfregs.numChan.numColorChans;
if (xfregs.numChan.numColorChans == 0)
{
if (components & VB_HAS_COL0)
WRITE(p, "o.colors_0 = color0;\n");
out.Write("o.colors_0 = color0;\n");
else
WRITE(p, "o.colors_0 = float4(1.0f, 1.0f, 1.0f, 1.0f);\n");
out.Write("o.colors_0 = float4(1.0f, 1.0f, 1.0f, 1.0f);\n");
}
// TODO: This probably isn't necessary if pixel lighting is enabled.
p = GenerateLightingShader(p, components, I_MATERIALS, I_LIGHTS, "color", "o.colors_");
GenerateLightingShader<T>(out, uid_data.lighting, components, I_MATERIALS, I_LIGHTS, "color", "o.colors_");
if (xfregs.numChan.numColorChans < 2)
{
if (components & VB_HAS_COL1)
WRITE(p, "o.colors_1 = color1;\n");
out.Write("o.colors_1 = color1;\n");
else
WRITE(p, "o.colors_1 = o.colors_0;\n");
out.Write("o.colors_1 = o.colors_0;\n");
}
// special case if only pos and tex coord 0 and tex coord input is AB11
// donko - this has caused problems in some games. removed for now.
@ -400,25 +297,25 @@ const char *GenerateVertexShaderCode(u32 components, API_TYPE ApiType)
*/
// transform texcoords
WRITE(p, "float4 coord = float4(0.0f, 0.0f, 1.0f, 1.0f);\n");
out.Write("float4 coord = float4(0.0f, 0.0f, 1.0f, 1.0f);\n");
for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i)
{
TexMtxInfo& texinfo = xfregs.texMtxInfo[i];
WRITE(p, "{\n");
WRITE(p, "coord = float4(0.0f, 0.0f, 1.0f, 1.0f);\n");
out.Write("{\n");
out.Write("coord = float4(0.0f, 0.0f, 1.0f, 1.0f);\n");
uid_data.texMtxInfo[i].sourcerow = xfregs.texMtxInfo[i].sourcerow;
switch (texinfo.sourcerow)
{
case XF_SRCGEOM_INROW:
_assert_( texinfo.inputform == XF_TEXINPUT_ABC1 );
WRITE(p, "coord = rawpos;\n"); // pos.w is 1
out.Write("coord = rawpos;\n"); // pos.w is 1
break;
case XF_SRCNORMAL_INROW:
if (components & VB_HAS_NRM0)
{
_assert_( texinfo.inputform == XF_TEXINPUT_ABC1 );
WRITE(p, "coord = float4(rawnorm0.xyz, 1.0f);\n");
out.Write("coord = float4(rawnorm0.xyz, 1.0f);\n");
}
break;
case XF_SRCCOLORS_INROW:
@ -428,24 +325,25 @@ const char *GenerateVertexShaderCode(u32 components, API_TYPE ApiType)
if (components & VB_HAS_NRM1)
{
_assert_( texinfo.inputform == XF_TEXINPUT_ABC1 );
WRITE(p, "coord = float4(rawnorm1.xyz, 1.0f);\n");
out.Write("coord = float4(rawnorm1.xyz, 1.0f);\n");
}
break;
case XF_SRCBINORMAL_B_INROW:
if (components & VB_HAS_NRM2)
{
_assert_( texinfo.inputform == XF_TEXINPUT_ABC1 );
WRITE(p, "coord = float4(rawnorm2.xyz, 1.0f);\n");
out.Write("coord = float4(rawnorm2.xyz, 1.0f);\n");
}
break;
default:
_assert_(texinfo.sourcerow <= XF_SRCTEX7_INROW);
if (components & (VB_HAS_UV0<<(texinfo.sourcerow - XF_SRCTEX0_INROW)) )
WRITE(p, "coord = float4(tex%d.x, tex%d.y, 1.0f, 1.0f);\n", texinfo.sourcerow - XF_SRCTEX0_INROW, texinfo.sourcerow - XF_SRCTEX0_INROW);
out.Write("coord = float4(tex%d.x, tex%d.y, 1.0f, 1.0f);\n", texinfo.sourcerow - XF_SRCTEX0_INROW, texinfo.sourcerow - XF_SRCTEX0_INROW);
break;
}
// first transformation
uid_data.texMtxInfo[i].texgentype = xfregs.texMtxInfo[i].texgentype;
switch (texinfo.texgentype)
{
case XF_TEXGEN_EMBOSS_MAP: // calculate tex coords into bump map
@ -453,51 +351,57 @@ const char *GenerateVertexShaderCode(u32 components, API_TYPE ApiType)
if (components & (VB_HAS_NRM1|VB_HAS_NRM2))
{
// transform the light dir into tangent space
WRITE(p, "ldir = normalize(" I_LIGHTS"[5*%d + 3].xyz - pos.xyz);\n", texinfo.embosslightshift);
WRITE(p, "o.tex%d.xyz = o.tex%d.xyz + float3(dot(ldir, _norm1), dot(ldir, _norm2), 0.0f);\n", i, texinfo.embosssourceshift);
uid_data.texMtxInfo[i].embosslightshift = xfregs.texMtxInfo[i].embosslightshift;
uid_data.texMtxInfo[i].embosssourceshift = xfregs.texMtxInfo[i].embosssourceshift;
out.Write("ldir = normalize(%s.xyz - pos.xyz);\n", LightPos(I_LIGHTS, texinfo.embosslightshift));
out.Write("o.tex%d.xyz = o.tex%d.xyz + float3(dot(ldir, _norm1), dot(ldir, _norm2), 0.0f);\n", i, texinfo.embosssourceshift);
}
else
{
_assert_(0); // should have normals
WRITE(p, "o.tex%d.xyz = o.tex%d.xyz;\n", i, texinfo.embosssourceshift);
uid_data.texMtxInfo[i].embosssourceshift = xfregs.texMtxInfo[i].embosssourceshift;
out.Write("o.tex%d.xyz = o.tex%d.xyz;\n", i, texinfo.embosssourceshift);
}
break;
case XF_TEXGEN_COLOR_STRGBC0:
_assert_(texinfo.sourcerow == XF_SRCCOLORS_INROW);
WRITE(p, "o.tex%d.xyz = float3(o.colors_0.x, o.colors_0.y, 1);\n", i);
out.Write("o.tex%d.xyz = float3(o.colors_0.x, o.colors_0.y, 1);\n", i);
break;
case XF_TEXGEN_COLOR_STRGBC1:
_assert_(texinfo.sourcerow == XF_SRCCOLORS_INROW);
WRITE(p, "o.tex%d.xyz = float3(o.colors_1.x, o.colors_1.y, 1);\n", i);
out.Write("o.tex%d.xyz = float3(o.colors_1.x, o.colors_1.y, 1);\n", i);
break;
case XF_TEXGEN_REGULAR:
default:
uid_data.texMtxInfo[i].projection = xfregs.texMtxInfo[i].projection;
if (components & (VB_HAS_TEXMTXIDX0<<i))
{
WRITE(p, "int tmp = int(tex%d.z);\n", i);
out.Write("int tmp = int(tex%d.z);\n", i);
if (texinfo.projection == XF_TEXPROJ_STQ)
WRITE(p, "o.tex%d.xyz = float3(dot(coord, " I_TRANSFORMMATRICES"[tmp]), dot(coord, " I_TRANSFORMMATRICES"[tmp+1]), dot(coord, " I_TRANSFORMMATRICES"[tmp+2]));\n", i);
out.Write("o.tex%d.xyz = float3(dot(coord, " I_TRANSFORMMATRICES"[tmp]), dot(coord, " I_TRANSFORMMATRICES"[tmp+1]), dot(coord, " I_TRANSFORMMATRICES"[tmp+2]));\n", i);
else
WRITE(p, "o.tex%d.xyz = float3(dot(coord, " I_TRANSFORMMATRICES"[tmp]), dot(coord, " I_TRANSFORMMATRICES"[tmp+1]), 1);\n", i);
out.Write("o.tex%d.xyz = float3(dot(coord, " I_TRANSFORMMATRICES"[tmp]), dot(coord, " I_TRANSFORMMATRICES"[tmp+1]), 1);\n", i);
}
else
{
if (texinfo.projection == XF_TEXPROJ_STQ)
WRITE(p, "o.tex%d.xyz = float3(dot(coord, " I_TEXMATRICES"[%d]), dot(coord, " I_TEXMATRICES"[%d]), dot(coord, " I_TEXMATRICES"[%d]));\n", i, 3*i, 3*i+1, 3*i+2);
out.Write("o.tex%d.xyz = float3(dot(coord, " I_TEXMATRICES"[%d]), dot(coord, " I_TEXMATRICES"[%d]), dot(coord, " I_TEXMATRICES"[%d]));\n", i, 3*i, 3*i+1, 3*i+2);
else
WRITE(p, "o.tex%d.xyz = float3(dot(coord, " I_TEXMATRICES"[%d]), dot(coord, " I_TEXMATRICES"[%d]), 1);\n", i, 3*i, 3*i+1);
out.Write("o.tex%d.xyz = float3(dot(coord, " I_TEXMATRICES"[%d]), dot(coord, " I_TEXMATRICES"[%d]), 1);\n", i, 3*i, 3*i+1);
}
break;
}
uid_data.dualTexTrans.enabled = xfregs.dualTexTrans.enabled;
// CHECKME: does this only work for regular tex gen types?
if (xfregs.dualTexTrans.enabled && texinfo.texgentype == XF_TEXGEN_REGULAR)
{
const PostMtxInfo& postInfo = xfregs.postMtxInfo[i];
uid_data.postMtxInfo[i].index = xfregs.postMtxInfo[i].index;
int postidx = postInfo.index;
WRITE(p, "float4 P0 = " I_POSTTRANSFORMMATRICES"[%d];\n"
out.Write("float4 P0 = " I_POSTTRANSFORMMATRICES"[%d];\n"
"float4 P1 = " I_POSTTRANSFORMMATRICES"[%d];\n"
"float4 P2 = " I_POSTTRANSFORMMATRICES"[%d];\n",
postidx&0x3f, (postidx+1)&0x3f, (postidx+2)&0x3f);
@ -509,76 +413,77 @@ const char *GenerateVertexShaderCode(u32 components, API_TYPE ApiType)
// q of output is unknown
// multiply by postmatrix
WRITE(p, "o.tex%d.xyz = float3(dot(P0.xy, o.tex%d.xy) + P0.z + P0.w, dot(P1.xy, o.tex%d.xy) + P1.z + P1.w, 0.0f);\n", i, i, i);
out.Write("o.tex%d.xyz = float3(dot(P0.xy, o.tex%d.xy) + P0.z + P0.w, dot(P1.xy, o.tex%d.xy) + P1.z + P1.w, 0.0f);\n", i, i, i);
}
else
{
uid_data.postMtxInfo[i].normalize = xfregs.postMtxInfo[i].normalize;
if (postInfo.normalize)
WRITE(p, "o.tex%d.xyz = normalize(o.tex%d.xyz);\n", i, i);
out.Write("o.tex%d.xyz = normalize(o.tex%d.xyz);\n", i, i);
// multiply by postmatrix
WRITE(p, "o.tex%d.xyz = float3(dot(P0.xyz, o.tex%d.xyz) + P0.w, dot(P1.xyz, o.tex%d.xyz) + P1.w, dot(P2.xyz, o.tex%d.xyz) + P2.w);\n", i, i, i, i);
out.Write("o.tex%d.xyz = float3(dot(P0.xyz, o.tex%d.xyz) + P0.w, dot(P1.xyz, o.tex%d.xyz) + P1.w, dot(P2.xyz, o.tex%d.xyz) + P2.w);\n", i, i, i, i);
}
}
WRITE(p, "}\n");
out.Write("}\n");
}
// clipPos/w needs to be done in pixel shader, not here
if (xfregs.numTexGen.numTexGens < 7)
{
WRITE(p, "o.clipPos = float4(pos.x,pos.y,o.pos.z,o.pos.w);\n");
out.Write("o.clipPos = float4(pos.x,pos.y,o.pos.z,o.pos.w);\n");
}
else
{
WRITE(p, "o.tex0.w = pos.x;\n");
WRITE(p, "o.tex1.w = pos.y;\n");
WRITE(p, "o.tex2.w = o.pos.z;\n");
WRITE(p, "o.tex3.w = o.pos.w;\n");
out.Write("o.tex0.w = pos.x;\n");
out.Write("o.tex1.w = pos.y;\n");
out.Write("o.tex2.w = o.pos.z;\n");
out.Write("o.tex3.w = o.pos.w;\n");
}
if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
{
if (xfregs.numTexGen.numTexGens < 7)
{
WRITE(p, "o.Normal = float4(_norm0.x,_norm0.y,_norm0.z,pos.z);\n");
out.Write("o.Normal = float4(_norm0.x,_norm0.y,_norm0.z,pos.z);\n");
}
else
{
WRITE(p, "o.tex4.w = _norm0.x;\n");
WRITE(p, "o.tex5.w = _norm0.y;\n");
WRITE(p, "o.tex6.w = _norm0.z;\n");
out.Write("o.tex4.w = _norm0.x;\n");
out.Write("o.tex5.w = _norm0.y;\n");
out.Write("o.tex6.w = _norm0.z;\n");
if (xfregs.numTexGen.numTexGens < 8)
WRITE(p, "o.tex7 = pos.xyzz;\n");
out.Write("o.tex7 = pos.xyzz;\n");
else
WRITE(p, "o.tex7.w = pos.z;\n");
out.Write("o.tex7.w = pos.z;\n");
}
if (components & VB_HAS_COL0)
WRITE(p, "o.colors_0 = color0;\n");
out.Write("o.colors_0 = color0;\n");
if (components & VB_HAS_COL1)
WRITE(p, "o.colors_1 = color1;\n");
out.Write("o.colors_1 = color1;\n");
}
//write the true depth value, if the game uses depth textures pixel shaders will override with the correct values
//if not early z culling will improve speed
if (is_d3d)
{
WRITE(p, "o.pos.z = " I_DEPTHPARAMS".x * o.pos.w + o.pos.z * " I_DEPTHPARAMS".y;\n");
out.Write("o.pos.z = " I_DEPTHPARAMS".x * o.pos.w + o.pos.z * " I_DEPTHPARAMS".y;\n");
}
else
{
// this results in a scale from -1..0 to -1..1 after perspective
// divide
WRITE(p, "o.pos.z = o.pos.w + o.pos.z * 2.0f;\n");
out.Write("o.pos.z = o.pos.w + o.pos.z * 2.0f;\n");
// Sonic Unleashed puts its final rendering at the near or
// far plane of the viewing frustrum(actually box, they use
// orthogonal projection for that), and we end up putting it
// just beyond, and the rendering gets clipped away. (The
// primitive gets dropped)
WRITE(p, "o.pos.z = o.pos.z * 1048575.0f/1048576.0f;\n");
out.Write("o.pos.z = o.pos.z * 1048575.0f/1048576.0f;\n");
// the next steps of the OGL pipeline are:
// (x_c,y_c,z_c,w_c) = o.pos //switch to OGL spec terminology
@ -591,14 +496,14 @@ const char *GenerateVertexShaderCode(u32 components, API_TYPE ApiType)
//seems to get rather complicated
}
if (ApiType & API_D3D9)
if (api_type & API_D3D9)
{
// D3D9 is addressing pixel centers instead of pixel boundaries in clip space.
// Thus we need to offset the final position by half a pixel
WRITE(p, "o.pos = o.pos + float4(" I_DEPTHPARAMS".z, " I_DEPTHPARAMS".w, 0.f, 0.f);\n");
out.Write("o.pos = o.pos + float4(" I_DEPTHPARAMS".z, " I_DEPTHPARAMS".w, 0.f, 0.f);\n");
}
if(ApiType == API_OPENGL)
if(api_type == API_OPENGL)
{
// Bit ugly here
// TODO: Make pretty
@ -610,15 +515,13 @@ const char *GenerateVertexShaderCode(u32 components, API_TYPE ApiType)
for (unsigned int i = 0; i < 8; ++i)
{
if(i < xfregs.numTexGen.numTexGens)
WRITE(p, " uv%d_2.xyz = o.tex%d;\n", i, i);
out.Write(" uv%d_2.xyz = o.tex%d;\n", i, i);
else
WRITE(p, " uv%d_2.xyz = float3(0.0f, 0.0f, 0.0f);\n", i);
out.Write(" uv%d_2.xyz = float3(0.0f, 0.0f, 0.0f);\n", i);
}
WRITE(p, " clipPos_2 = o.clipPos;\n");
out.Write(" clipPos_2 = o.clipPos;\n");
if(g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
WRITE(p, " Normal_2 = o.Normal;\n");
out.Write(" Normal_2 = o.Normal;\n");
}
else
{
@ -626,29 +529,47 @@ const char *GenerateVertexShaderCode(u32 components, API_TYPE ApiType)
if (g_ActiveConfig.bEnablePixelLighting && g_ActiveConfig.backend_info.bSupportsPixelLighting)
{
for (int i = 0; i < 8; ++i)
WRITE(p, " uv%d_2 = o.tex%d;\n", i, i);
out.Write(" uv%d_2 = o.tex%d;\n", i, i);
}
else
{
for (unsigned int i = 0; i < xfregs.numTexGen.numTexGens; ++i)
WRITE(p, " uv%d_2%s = o.tex%d;\n", i, i < 4 ? ".xyzw" : ".xyz" , i);
out.Write(" uv%d_2%s = o.tex%d;\n", i, i < 4 ? ".xyzw" : ".xyz" , i);
}
}
WRITE(p, "colors_02 = o.colors_0;\n");
WRITE(p, "colors_12 = o.colors_1;\n");
WRITE(p, "gl_Position = o.pos;\n");
WRITE(p, "}\n");
out.Write("colors_02 = o.colors_0;\n");
out.Write("colors_12 = o.colors_1;\n");
out.Write("gl_Position = o.pos;\n");
out.Write("}\n");
}
else
{
WRITE(p, "return o;\n}\n");
out.Write("return o;\n}\n");
}
if (text[sizeof(text) - 1] != 0x7C)
PanicAlert("VertexShader generator - buffer too small, canary has been eaten!");
#ifndef ANDROID
uselocale(old_locale); // restore locale
freelocale(locale);
if (out.GetBuffer() != NULL)
{
uselocale(old_locale); // restore locale
freelocale(locale);
}
#endif
return text;
}
void GetVertexShaderUid(VertexShaderUid& object, u32 components, API_TYPE api_type)
{
GenerateVertexShader<VertexShaderUid>(object, components, api_type);
}
void GenerateVertexShaderCode(VertexShaderCode& object, u32 components, API_TYPE api_type)
{
GenerateVertexShader<VertexShaderCode>(object, components, api_type);
}
void GenerateVSOutputStructForGS(ShaderCode& object, u32 components, API_TYPE api_type)
{
GenerateVSOutputStruct<ShaderCode>(object, components, api_type);
}

View File

@ -5,8 +5,10 @@
#ifndef GCOGL_VERTEXSHADER_H
#define GCOGL_VERTEXSHADER_H
#include <stdarg.h>
#include "XFMemory.h"
#include "VideoCommon.h"
#include "ShaderGenCommon.h"
// TODO should be reordered
#define SHADER_POSITION_ATTRIB 0
@ -48,7 +50,8 @@
#define C_NORMALMATRICES (C_TRANSFORMMATRICES + 64)
#define C_POSTTRANSFORMMATRICES (C_NORMALMATRICES + 32)
#define C_DEPTHPARAMS (C_POSTTRANSFORMMATRICES + 64)
#define C_VENVCONST_END (C_DEPTHPARAMS + 1)
#define C_VENVCONST_END (C_DEPTHPARAMS + 1)
const s_svar VSVar_Loc[] = { {I_POSNORMALMATRIX, C_POSNORMALMATRIX, 6 },
{I_PROJECTION , C_PROJECTION, 4 },
{I_MATERIALS, C_MATERIALS, 4 },
@ -59,75 +62,40 @@ const s_svar VSVar_Loc[] = { {I_POSNORMALMATRIX, C_POSNORMALMATRIX, 6 },
{I_POSTTRANSFORMMATRICES, C_POSTTRANSFORMMATRICES, 64 },
{I_DEPTHPARAMS, C_DEPTHPARAMS, 1 },
};
template<bool safe>
class _VERTEXSHADERUID
#pragma pack(4)
struct vertex_shader_uid_data
{
#define NUM_VSUID_VALUES_SAFE 25
public:
u32 values[safe ? NUM_VSUID_VALUES_SAFE : 9];
u32 components;
u32 numColorChans : 2;
u32 numTexGens : 4;
_VERTEXSHADERUID()
{
}
struct {
u32 projection : 1; // XF_TEXPROJ_X
u32 inputform : 2; // XF_TEXINPUT_X
u32 texgentype : 3; // XF_TEXGEN_X
u32 sourcerow : 5; // XF_SRCGEOM_X
u32 embosssourceshift : 3; // what generated texcoord to use
u32 embosslightshift : 3; // light index that is used
} texMtxInfo[8];
struct {
u32 index : 6; // base row of dual transform matrix
u32 normalize : 1; // normalize before send operation
} postMtxInfo[8];
struct {
u32 enabled : 1;
} dualTexTrans;
_VERTEXSHADERUID(const _VERTEXSHADERUID& r)
{
for (size_t i = 0; i < sizeof(values) / sizeof(u32); ++i)
values[i] = r.values[i];
}
int GetNumValues() const
{
if (safe) return NUM_VSUID_VALUES_SAFE;
else return (((values[0] >> 23) & 0xf) * 3 + 3) / 4 + 3; // numTexGens*3/4+1
}
bool operator <(const _VERTEXSHADERUID& _Right) const
{
if (values[0] < _Right.values[0])
return true;
else if (values[0] > _Right.values[0])
return false;
int N = GetNumValues();
for (int i = 1; i < N; ++i)
{
if (values[i] < _Right.values[i])
return true;
else if (values[i] > _Right.values[i])
return false;
}
return false;
}
bool operator ==(const _VERTEXSHADERUID& _Right) const
{
if (values[0] != _Right.values[0])
return false;
int N = GetNumValues();
for (int i = 1; i < N; ++i)
{
if (values[i] != _Right.values[i])
return false;
}
return true;
}
LightingUidData lighting;
};
typedef _VERTEXSHADERUID<false> VERTEXSHADERUID;
typedef _VERTEXSHADERUID<true> VERTEXSHADERUIDSAFE;
#pragma pack()
typedef ShaderUid<vertex_shader_uid_data> VertexShaderUid;
typedef ShaderCode VertexShaderCode; // TODO: Obsolete..
// components is included in the uid.
char* GenerateVSOutputStruct(char* p, u32 components, API_TYPE api_type);
const char *GenerateVertexShaderCode(u32 components, API_TYPE api_type);
void GetVertexShaderId(VERTEXSHADERUID *uid, u32 components);
void GetSafeVertexShaderId(VERTEXSHADERUIDSAFE *uid, u32 components);
// Used to make sure that our optimized vertex shader IDs don't lose any possible shader code changes
void ValidateVertexShaderIDs(API_TYPE api, VERTEXSHADERUIDSAFE old_id, const std::string& old_code, u32 components);
void GetVertexShaderUid(VertexShaderUid& object, u32 components, API_TYPE api_type);
void GenerateVertexShaderCode(VertexShaderCode& object, u32 components, API_TYPE api_type);
void GenerateVSOutputStructForGS(ShaderCode& object, u32 components, API_TYPE api_type);
#endif // GCOGL_VERTEXSHADER_H

View File

@ -252,6 +252,7 @@
<ClInclude Include="Src\PixelShaderGen.h" />
<ClInclude Include="Src\PixelShaderManager.h" />
<ClInclude Include="Src\RenderBase.h" />
<ClInclude Include="Src\ShaderGenCommon.h" />
<ClInclude Include="Src\Statistics.h" />
<ClInclude Include="Src\TextureCacheBase.h" />
<ClInclude Include="Src\TextureConversionShader.h" />

View File

@ -258,6 +258,9 @@
<ClInclude Include="Src\FPSCounter.h">
<Filter>Util</Filter>
</ClInclude>
<ClInclude Include="Src\ShaderGenCommon.h">
<Filter>Shader Generators</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<None Include="CMakeLists.txt" />
@ -291,4 +294,4 @@
<UniqueIdentifier>{e2a527a2-ccc8-4ab8-a93e-dd2628c0f3b6}</UniqueIdentifier>
</Filter>
</ItemGroup>
</Project>
</Project>

View File

@ -169,11 +169,12 @@ bool LineGeometryShader::SetShader(u32 components, float lineWidth,
if (shaderIt == m_shaders.end())
{
// Generate new shader. Warning: not thread-safe.
static char code[16384];
char* p = code;
p = GenerateVSOutputStruct(p, components, API_D3D11);
p += sprintf(p, "\n%s", LINE_GS_COMMON);
static char buffer[16384];
ShaderCode code;
code.SetBuffer(buffer);
GenerateVSOutputStructForGS(code, components, API_D3D11);
code.Write("\n%s", LINE_GS_COMMON);
std::stringstream numTexCoordsStream;
numTexCoordsStream << xfregs.numTexGen.numTexGens;
@ -185,7 +186,7 @@ bool LineGeometryShader::SetShader(u32 components, float lineWidth,
{ "NUM_TEXCOORDS", numTexCoordsStr.c_str() },
{ NULL, NULL }
};
ID3D11GeometryShader* newShader = D3D::CompileAndCreateGeometryShader(code, unsigned int(strlen(code)), macros);
ID3D11GeometryShader* newShader = D3D::CompileAndCreateGeometryShader(code.GetBuffer(), unsigned int(strlen(code.GetBuffer())), macros);
if (!newShader)
{
WARN_LOG(VIDEO, "Line geometry shader for components 0x%.08X failed to compile", components);

View File

@ -28,9 +28,10 @@ namespace DX11
PixelShaderCache::PSCache PixelShaderCache::PixelShaders;
const PixelShaderCache::PSCacheEntry* PixelShaderCache::last_entry;
PIXELSHADERUID PixelShaderCache::last_uid;
PixelShaderUid PixelShaderCache::last_uid;
UidChecker<PixelShaderUid,PixelShaderCode> PixelShaderCache::pixel_uid_checker;
LinearDiskCache<PIXELSHADERUID, u8> g_ps_disk_cache;
LinearDiskCache<PixelShaderUid, u8> g_ps_disk_cache;
ID3D11PixelShader* s_ColorMatrixProgram[2] = {NULL};
ID3D11PixelShader* s_ColorCopyProgram[2] = {NULL};
@ -352,10 +353,10 @@ ID3D11Buffer* &PixelShaderCache::GetConstantBuffer()
}
// this class will load the precompiled shaders into our cache
class PixelShaderCacheInserter : public LinearDiskCacheReader<PIXELSHADERUID, u8>
class PixelShaderCacheInserter : public LinearDiskCacheReader<PixelShaderUid, u8>
{
public:
void Read(const PIXELSHADERUID &key, const u8 *value, u32 value_size)
void Read(const PixelShaderUid &key, const u8 *value, u32 value_size)
{
PixelShaderCache::InsertByteCode(key, value, value_size);
}
@ -414,7 +415,8 @@ void PixelShaderCache::Clear()
{
for (PSCache::iterator iter = PixelShaders.begin(); iter != PixelShaders.end(); iter++)
iter->second.Destroy();
PixelShaders.clear();
PixelShaders.clear();
pixel_uid_checker.Invalidate();
last_entry = NULL;
}
@ -450,8 +452,14 @@ void PixelShaderCache::Shutdown()
bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components)
{
PIXELSHADERUID uid;
GetPixelShaderId(&uid, dstAlphaMode, components);
PixelShaderUid uid;
GetPixelShaderUid(uid, dstAlphaMode, API_D3D11, components);
if (g_ActiveConfig.bEnableShaderDebugging)
{
PixelShaderCode code;
GeneratePixelShaderCode(code, dstAlphaMode, API_D3D11, components);
pixel_uid_checker.AddToIndexAndCheck(code, uid, "Pixel", "p");
}
// Check if the shader is already set
if (last_entry)
@ -459,7 +467,6 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components)
if (uid == last_uid)
{
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE,true);
ValidatePixelShaderIDs(API_D3D11, last_entry->safe_uid, last_entry->code, dstAlphaMode, components);
return (last_entry->shader != NULL);
}
}
@ -475,15 +482,15 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components)
last_entry = &entry;
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE,true);
ValidatePixelShaderIDs(API_D3D11, entry.safe_uid, entry.code, dstAlphaMode, components);
return (entry.shader != NULL);
}
// Need to compile a new shader
const char* code = GeneratePixelShaderCode(dstAlphaMode, API_D3D11, components);
PixelShaderCode code;
GeneratePixelShaderCode(code, dstAlphaMode, API_D3D11, components);
D3DBlob* pbytecode;
if (!D3D::CompilePixelShader(code, (unsigned int)strlen(code), &pbytecode))
if (!D3D::CompilePixelShader(code.GetBuffer(), (unsigned int)strlen(code.GetBuffer()), &pbytecode))
{
GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true);
return false;
@ -497,15 +504,14 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components)
if (g_ActiveConfig.bEnableShaderDebugging && success)
{
PixelShaders[uid].code = code;
GetSafePixelShaderId(&PixelShaders[uid].safe_uid, dstAlphaMode, components);
PixelShaders[uid].code = code.GetBuffer();
}
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
return success;
}
bool PixelShaderCache::InsertByteCode(const PIXELSHADERUID &uid, const void* bytecode, unsigned int bytecodelen)
bool PixelShaderCache::InsertByteCode(const PixelShaderUid &uid, const void* bytecode, unsigned int bytecodelen)
{
ID3D11PixelShader* shader = D3D::CreatePixelShaderFromByteCode(bytecode, bytecodelen);
if (shader == NULL)

View File

@ -22,7 +22,7 @@ public:
static void Clear();
static void Shutdown();
static bool SetShader(DSTALPHA_MODE dstAlphaMode, u32 components); // TODO: Should be renamed to LoadShader
static bool InsertByteCode(const PIXELSHADERUID &uid, const void* bytecode, unsigned int bytecodelen);
static bool InsertByteCode(const PixelShaderUid &uid, const void* bytecode, unsigned int bytecodelen);
static ID3D11PixelShader* GetActiveShader() { return last_entry->shader; }
static ID3D11Buffer* &GetConstantBuffer();
@ -41,18 +41,19 @@ private:
{
ID3D11PixelShader* shader;
PIXELSHADERUIDSAFE safe_uid;
std::string code;
PSCacheEntry() : shader(NULL) {}
void Destroy() { SAFE_RELEASE(shader); }
};
typedef std::map<PIXELSHADERUID, PSCacheEntry> PSCache;
typedef std::map<PixelShaderUid, PSCacheEntry> PSCache;
static PSCache PixelShaders;
static const PSCacheEntry* last_entry;
static PIXELSHADERUID last_uid;
static PixelShaderUid last_uid;
static UidChecker<PixelShaderUid,PixelShaderCode> pixel_uid_checker;
};
} // namespace DX11

View File

@ -163,10 +163,11 @@ bool PointGeometryShader::SetShader(u32 components, float pointSize,
if (shaderIt == m_shaders.end())
{
// Generate new shader. Warning: not thread-safe.
static char code[16384];
char* p = code;
p = GenerateVSOutputStruct(p, components, API_D3D11);
p += sprintf(p, "\n%s", POINT_GS_COMMON);
static char buffer[16384];
ShaderCode code;
code.SetBuffer(buffer);
GenerateVSOutputStructForGS(code, components, API_D3D11);
code.Write("\n%s", POINT_GS_COMMON);
std::stringstream numTexCoordsStream;
numTexCoordsStream << xfregs.numTexGen.numTexGens;
@ -179,7 +180,7 @@ bool PointGeometryShader::SetShader(u32 components, float pointSize,
{ "NUM_TEXCOORDS", numTexCoordsStr.c_str() },
{ NULL, NULL }
};
ID3D11GeometryShader* newShader = D3D::CompileAndCreateGeometryShader(code, unsigned int(strlen(code)), macros);
ID3D11GeometryShader* newShader = D3D::CompileAndCreateGeometryShader(code.GetBuffer(), unsigned int(strlen(code.GetBuffer())), macros);
if (!newShader)
{
WARN_LOG(VIDEO, "Point geometry shader for components 0x%.08X failed to compile", components);

View File

@ -238,7 +238,7 @@ void VertexManager::vFlush()
// set global constants
VertexShaderManager::SetConstants();
PixelShaderManager::SetConstants();
PixelShaderManager::SetConstants(g_nativeVertexFmt->m_components);
bool useDstAlpha = !g_ActiveConfig.bDstAlphaPass && bpmem.dstalpha.enable && bpmem.blendmode.alphaupdate &&
bpmem.zcontrol.pixel_format == PIXELFMT_RGBA6_Z24;

View File

@ -24,14 +24,15 @@ namespace DX11 {
VertexShaderCache::VSCache VertexShaderCache::vshaders;
const VertexShaderCache::VSCacheEntry *VertexShaderCache::last_entry;
VERTEXSHADERUID VertexShaderCache::last_uid;
VertexShaderUid VertexShaderCache::last_uid;
UidChecker<VertexShaderUid,VertexShaderCode> VertexShaderCache::vertex_uid_checker;
static ID3D11VertexShader* SimpleVertexShader = NULL;
static ID3D11VertexShader* ClearVertexShader = NULL;
static ID3D11InputLayout* SimpleLayout = NULL;
static ID3D11InputLayout* ClearLayout = NULL;
LinearDiskCache<VERTEXSHADERUID, u8> g_vs_disk_cache;
LinearDiskCache<VertexShaderUid, u8> g_vs_disk_cache;
ID3D11VertexShader* VertexShaderCache::GetSimpleVertexShader() { return SimpleVertexShader; }
ID3D11VertexShader* VertexShaderCache::GetClearVertexShader() { return ClearVertexShader; }
@ -57,10 +58,10 @@ ID3D11Buffer* &VertexShaderCache::GetConstantBuffer()
}
// this class will load the precompiled shaders into our cache
class VertexShaderCacheInserter : public LinearDiskCacheReader<VERTEXSHADERUID, u8>
class VertexShaderCacheInserter : public LinearDiskCacheReader<VertexShaderUid, u8>
{
public:
void Read(const VERTEXSHADERUID &key, const u8 *value, u32 value_size)
void Read(const VertexShaderUid &key, const u8 *value, u32 value_size)
{
D3DBlob* blob = new D3DBlob(value_size, value);
VertexShaderCache::InsertByteCode(key, blob);
@ -176,6 +177,7 @@ void VertexShaderCache::Clear()
for (VSCache::iterator iter = vshaders.begin(); iter != vshaders.end(); ++iter)
iter->second.Destroy();
vshaders.clear();
vertex_uid_checker.Invalidate();
last_entry = NULL;
}
@ -197,14 +199,20 @@ void VertexShaderCache::Shutdown()
bool VertexShaderCache::SetShader(u32 components)
{
VERTEXSHADERUID uid;
GetVertexShaderId(&uid, components);
VertexShaderUid uid;
GetVertexShaderUid(uid, components, API_D3D11);
if (g_ActiveConfig.bEnableShaderDebugging)
{
VertexShaderCode code;
GenerateVertexShaderCode(code, components, API_D3D11);
vertex_uid_checker.AddToIndexAndCheck(code, uid, "Vertex", "v");
}
if (last_entry)
{
if (uid == last_uid)
{
GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
ValidateVertexShaderIDs(API_D3D11, last_entry->safe_uid, last_entry->code, components);
return (last_entry->shader != NULL);
}
}
@ -218,14 +226,14 @@ bool VertexShaderCache::SetShader(u32 components)
last_entry = &entry;
GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
ValidateVertexShaderIDs(API_D3D11, entry.safe_uid, entry.code, components);
return (entry.shader != NULL);
}
const char *code = GenerateVertexShaderCode(components, API_D3D11);
VertexShaderCode code;
GenerateVertexShaderCode(code, components, API_D3D11);
D3DBlob* pbytecode = NULL;
D3D::CompileVertexShader(code, (int)strlen(code), &pbytecode);
D3D::CompileVertexShader(code.GetBuffer(), (int)strlen(code.GetBuffer()), &pbytecode);
if (pbytecode == NULL)
{
@ -239,15 +247,14 @@ bool VertexShaderCache::SetShader(u32 components)
if (g_ActiveConfig.bEnableShaderDebugging && success)
{
vshaders[uid].code = code;
GetSafeVertexShaderId(&vshaders[uid].safe_uid, components);
vshaders[uid].code = code.GetBuffer();
}
GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
return success;
}
bool VertexShaderCache::InsertByteCode(const VERTEXSHADERUID &uid, D3DBlob* bcodeblob)
bool VertexShaderCache::InsertByteCode(const VertexShaderUid &uid, D3DBlob* bcodeblob)
{
ID3D11VertexShader* shader = D3D::CreateVertexShaderFromByteCode(bcodeblob);
if (shader == NULL)

View File

@ -31,7 +31,7 @@ public:
static ID3D11InputLayout* GetSimpleInputLayout();
static ID3D11InputLayout* GetClearInputLayout();
static bool VertexShaderCache::InsertByteCode(const VERTEXSHADERUID &uid, D3DBlob* bcodeblob);
static bool VertexShaderCache::InsertByteCode(const VertexShaderUid &uid, D3DBlob* bcodeblob);
private:
struct VSCacheEntry
@ -39,7 +39,6 @@ private:
ID3D11VertexShader* shader;
D3DBlob* bytecode; // needed to initialize the input layout
VERTEXSHADERUIDSAFE safe_uid;
std::string code;
VSCacheEntry() : shader(NULL), bytecode(NULL) {}
@ -55,11 +54,13 @@ private:
SAFE_RELEASE(bytecode);
}
};
typedef std::map<VERTEXSHADERUID, VSCacheEntry> VSCache;
typedef std::map<VertexShaderUid, VSCacheEntry> VSCache;
static VSCache vshaders;
static const VSCacheEntry* last_entry;
static VERTEXSHADERUID last_uid;
static VertexShaderUid last_uid;
static UidChecker<VertexShaderUid,VertexShaderCode> vertex_uid_checker;
};
} // namespace DX11

View File

@ -31,9 +31,10 @@ namespace DX9
PixelShaderCache::PSCache PixelShaderCache::PixelShaders;
const PixelShaderCache::PSCacheEntry *PixelShaderCache::last_entry;
PIXELSHADERUID PixelShaderCache::last_uid;
PixelShaderUid PixelShaderCache::last_uid;
UidChecker<PixelShaderUid,PixelShaderCode> PixelShaderCache::pixel_uid_checker;
static LinearDiskCache<PIXELSHADERUID, u8> g_ps_disk_cache;
static LinearDiskCache<PixelShaderUid, u8> g_ps_disk_cache;
static std::set<u32> unique_shaders;
#define MAX_SSAA_SHADERS 3
@ -55,10 +56,10 @@ static LPDIRECT3DPIXELSHADER9 s_ClearProgram = NULL;
static LPDIRECT3DPIXELSHADER9 s_rgba6_to_rgb8 = NULL;
static LPDIRECT3DPIXELSHADER9 s_rgb8_to_rgba6 = NULL;
class PixelShaderCacheInserter : public LinearDiskCacheReader<PIXELSHADERUID, u8>
class PixelShaderCacheInserter : public LinearDiskCacheReader<PixelShaderUid, u8>
{
public:
void Read(const PIXELSHADERUID &key, const u8 *value, u32 value_size)
void Read(const PixelShaderUid &key, const u8 *value, u32 value_size)
{
PixelShaderCache::InsertByteCode(key, value, value_size, false);
}
@ -287,6 +288,7 @@ void PixelShaderCache::Clear()
for (PSCache::iterator iter = PixelShaders.begin(); iter != PixelShaders.end(); iter++)
iter->second.Destroy();
PixelShaders.clear();
pixel_uid_checker.Invalidate();
last_entry = NULL;
}
@ -323,8 +325,14 @@ void PixelShaderCache::Shutdown()
bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components)
{
const API_TYPE api = ((D3D::GetCaps().PixelShaderVersion >> 8) & 0xFF) < 3 ? API_D3D9_SM20 : API_D3D9_SM30;
PIXELSHADERUID uid;
GetPixelShaderId(&uid, dstAlphaMode, components);
PixelShaderUid uid;
GetPixelShaderUid(uid, dstAlphaMode, API_D3D9, components);
if (g_ActiveConfig.bEnableShaderDebugging)
{
PixelShaderCode code;
GeneratePixelShaderCode(code, dstAlphaMode, API_D3D9, components);
pixel_uid_checker.AddToIndexAndCheck(code, uid, "Pixel", "p");
}
// Check if the shader is already set
if (last_entry)
@ -332,7 +340,6 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components)
if (uid == last_uid)
{
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
ValidatePixelShaderIDs(api, last_entry->safe_uid, last_entry->code, dstAlphaMode, components);
return last_entry->shader != NULL;
}
}
@ -349,34 +356,34 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components)
if (entry.shader) D3D::SetPixelShader(entry.shader);
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
ValidatePixelShaderIDs(api, entry.safe_uid, entry.code, dstAlphaMode, components);
return (entry.shader != NULL);
}
// Need to compile a new shader
const char *code = GeneratePixelShaderCode(dstAlphaMode, api, components);
PixelShaderCode code;
GeneratePixelShaderCode(code, dstAlphaMode, api, components);
if (g_ActiveConfig.bEnableShaderDebugging)
{
u32 code_hash = HashAdler32((const u8 *)code, strlen(code));
u32 code_hash = HashAdler32((const u8 *)code.GetBuffer(), strlen(code.GetBuffer()));
unique_shaders.insert(code_hash);
SETSTAT(stats.numUniquePixelShaders, unique_shaders.size());
}
#if defined(_DEBUG) || defined(DEBUGFAST)
if (g_ActiveConfig.iLog & CONF_SAVESHADERS && code) {
if (g_ActiveConfig.iLog & CONF_SAVESHADERS) {
static int counter = 0;
char szTemp[MAX_PATH];
sprintf(szTemp, "%sps_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), counter++);
SaveData(szTemp, code);
SaveData(szTemp, code.GetBuffer());
}
#endif
u8 *bytecode = 0;
int bytecodelen = 0;
if (!D3D::CompilePixelShader(code, (int)strlen(code), &bytecode, &bytecodelen)) {
if (!D3D::CompilePixelShader(code.GetBuffer(), (int)strlen(code.GetBuffer()), &bytecode, &bytecodelen)) {
GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true);
return false;
}
@ -390,15 +397,14 @@ bool PixelShaderCache::SetShader(DSTALPHA_MODE dstAlphaMode, u32 components)
if (g_ActiveConfig.bEnableShaderDebugging && success)
{
PixelShaders[uid].code = code;
GetSafePixelShaderId(&PixelShaders[uid].safe_uid, dstAlphaMode, components);
PixelShaders[uid].code = code.GetBuffer();
}
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
return success;
}
bool PixelShaderCache::InsertByteCode(const PIXELSHADERUID &uid, const u8 *bytecode, int bytecodelen, bool activate)
bool PixelShaderCache::InsertByteCode(const PixelShaderUid &uid, const u8 *bytecode, int bytecodelen, bool activate)
{
LPDIRECT3DPIXELSHADER9 shader = D3D::CreatePixelShaderFromByteCode(bytecode, bytecodelen);

View File

@ -28,7 +28,6 @@ private:
LPDIRECT3DPIXELSHADER9 shader;
bool owns_shader;
PIXELSHADERUIDSAFE safe_uid;
std::string code;
PSCacheEntry() : shader(NULL), owns_shader(true) {}
@ -40,18 +39,20 @@ private:
}
};
typedef std::map<PIXELSHADERUID, PSCacheEntry> PSCache;
typedef std::map<PixelShaderUid, PSCacheEntry> PSCache;
static PSCache PixelShaders;
static const PSCacheEntry *last_entry;
static PIXELSHADERUID last_uid;
static PixelShaderUid last_uid;
static UidChecker<PixelShaderUid,PixelShaderCode> pixel_uid_checker;
static void Clear();
public:
static void Init();
static void Shutdown();
static bool SetShader(DSTALPHA_MODE dstAlphaMode, u32 componets);
static bool InsertByteCode(const PIXELSHADERUID &uid, const u8 *bytecode, int bytecodelen, bool activate);
static bool InsertByteCode(const PixelShaderUid &uid, const u8 *bytecode, int bytecodelen, bool activate);
static LPDIRECT3DPIXELSHADER9 GetColorMatrixProgram(int SSAAMode);
static LPDIRECT3DPIXELSHADER9 GetColorCopyProgram(int SSAAMode);
static LPDIRECT3DPIXELSHADER9 GetDepthMatrixProgram(int SSAAMode, bool depthConversion);

View File

@ -348,7 +348,7 @@ void VertexManager::vFlush()
// set global constants
VertexShaderManager::SetConstants();
PixelShaderManager::SetConstants();
PixelShaderManager::SetConstants(g_nativeVertexFmt->m_components);
u32 stride = g_nativeVertexFmt->GetVertexStride();
bool useDstAlpha = !g_ActiveConfig.bDstAlphaPass && bpmem.dstalpha.enable && bpmem.blendmode.alphaupdate &&
bpmem.zcontrol.pixel_format == PIXELFMT_RGBA6_Z24;

View File

@ -25,14 +25,15 @@ namespace DX9
VertexShaderCache::VSCache VertexShaderCache::vshaders;
const VertexShaderCache::VSCacheEntry *VertexShaderCache::last_entry;
VERTEXSHADERUID VertexShaderCache::last_uid;
VertexShaderUid VertexShaderCache::last_uid;
UidChecker<VertexShaderUid,VertexShaderCode> VertexShaderCache::vertex_uid_checker;
#define MAX_SSAA_SHADERS 3
static LPDIRECT3DVERTEXSHADER9 SimpleVertexShader[MAX_SSAA_SHADERS];
static LPDIRECT3DVERTEXSHADER9 ClearVertexShader;
LinearDiskCache<VERTEXSHADERUID, u8> g_vs_disk_cache;
LinearDiskCache<VertexShaderUid, u8> g_vs_disk_cache;
LPDIRECT3DVERTEXSHADER9 VertexShaderCache::GetSimpleVertexShader(int level)
{
@ -45,10 +46,10 @@ LPDIRECT3DVERTEXSHADER9 VertexShaderCache::GetClearVertexShader()
}
// this class will load the precompiled shaders into our cache
class VertexShaderCacheInserter : public LinearDiskCacheReader<VERTEXSHADERUID, u8>
class VertexShaderCacheInserter : public LinearDiskCacheReader<VertexShaderUid, u8>
{
public:
void Read(const VERTEXSHADERUID &key, const u8 *value, u32 value_size)
void Read(const VertexShaderUid &key, const u8 *value, u32 value_size)
{
VertexShaderCache::InsertByteCode(key, value, value_size, false);
}
@ -150,6 +151,7 @@ void VertexShaderCache::Clear()
for (VSCache::iterator iter = vshaders.begin(); iter != vshaders.end(); ++iter)
iter->second.Destroy();
vshaders.clear();
vertex_uid_checker.Invalidate();
last_entry = NULL;
}
@ -174,14 +176,20 @@ void VertexShaderCache::Shutdown()
bool VertexShaderCache::SetShader(u32 components)
{
VERTEXSHADERUID uid;
GetVertexShaderId(&uid, components);
VertexShaderUid uid;
GetVertexShaderUid(uid, components, API_D3D9);
if (g_ActiveConfig.bEnableShaderDebugging)
{
VertexShaderCode code;
GenerateVertexShaderCode(code, components, API_D3D9);
vertex_uid_checker.AddToIndexAndCheck(code, uid, "Vertex", "v");
}
if (last_entry)
{
if (uid == last_uid)
{
GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
ValidateVertexShaderIDs(API_D3D9, last_entry->safe_uid, last_entry->code, components);
return (last_entry->shader != NULL);
}
}
@ -196,14 +204,15 @@ bool VertexShaderCache::SetShader(u32 components)
if (entry.shader) D3D::SetVertexShader(entry.shader);
GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
ValidateVertexShaderIDs(API_D3D9, entry.safe_uid, entry.code, components);
return (entry.shader != NULL);
}
const char *code = GenerateVertexShaderCode(components, API_D3D9);
VertexShaderCode code;
GenerateVertexShaderCode(code, components, API_D3D9);
u8 *bytecode;
int bytecodelen;
if (!D3D::CompileVertexShader(code, (int)strlen(code), &bytecode, &bytecodelen))
if (!D3D::CompileVertexShader(code.GetBuffer(), (int)strlen(code.GetBuffer()), &bytecode, &bytecodelen))
{
GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true);
return false;
@ -213,15 +222,14 @@ bool VertexShaderCache::SetShader(u32 components)
bool success = InsertByteCode(uid, bytecode, bytecodelen, true);
if (g_ActiveConfig.bEnableShaderDebugging && success)
{
vshaders[uid].code = code;
GetSafeVertexShaderId(&vshaders[uid].safe_uid, components);
vshaders[uid].code = code.GetBuffer();
}
delete [] bytecode;
GFX_DEBUGGER_PAUSE_AT(NEXT_VERTEX_SHADER_CHANGE, true);
return success;
}
bool VertexShaderCache::InsertByteCode(const VERTEXSHADERUID &uid, const u8 *bytecode, int bytecodelen, bool activate) {
bool VertexShaderCache::InsertByteCode(const VertexShaderUid &uid, const u8 *bytecode, int bytecodelen, bool activate) {
LPDIRECT3DVERTEXSHADER9 shader = D3D::CreateVertexShaderFromByteCode(bytecode, bytecodelen);
// Make an entry in the table

View File

@ -23,7 +23,6 @@ private:
LPDIRECT3DVERTEXSHADER9 shader;
std::string code;
VERTEXSHADERUIDSAFE safe_uid;
VSCacheEntry() : shader(NULL) {}
void Destroy()
@ -34,11 +33,14 @@ private:
}
};
typedef std::map<VERTEXSHADERUID, VSCacheEntry> VSCache;
typedef std::map<VertexShaderUid, VSCacheEntry> VSCache;
static VSCache vshaders;
static const VSCacheEntry *last_entry;
static VERTEXSHADERUID last_uid;
static VertexShaderUid last_uid;
static UidChecker<VertexShaderUid,VertexShaderCode> vertex_uid_checker;
static void Clear();
public:
@ -47,7 +49,7 @@ public:
static bool SetShader(u32 components);
static LPDIRECT3DVERTEXSHADER9 GetSimpleVertexShader(int level);
static LPDIRECT3DVERTEXSHADER9 GetClearVertexShader();
static bool InsertByteCode(const VERTEXSHADERUID &uid, const u8 *bytecode, int bytecodelen, bool activate);
static bool InsertByteCode(const VertexShaderUid &uid, const u8 *bytecode, int bytecodelen, bool activate);
static std::string GetCurrentShaderCode();
};

View File

@ -31,6 +31,8 @@ static GLuint CurrentProgram = 0;
ProgramShaderCache::PCache ProgramShaderCache::pshaders;
ProgramShaderCache::PCacheEntry* ProgramShaderCache::last_entry;
SHADERUID ProgramShaderCache::last_uid;
UidChecker<PixelShaderUid,PixelShaderCode> ProgramShaderCache::pixel_uid_checker;
UidChecker<VertexShaderUid,VertexShaderCode> ProgramShaderCache::vertex_uid_checker;
static char s_glsl_header[1024] = "";
@ -186,21 +188,20 @@ SHADER* ProgramShaderCache::SetShader ( DSTALPHA_MODE dstAlphaMode, u32 componen
{
SHADERUID uid;
GetShaderId(&uid, dstAlphaMode, components);
// Check if the shader is already set
if (last_entry)
{
if (uid == last_uid)
{
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
ValidateShaderIDs(last_entry, dstAlphaMode, components);
last_entry->shader.Bind();
return &last_entry->shader;
}
}
last_uid = uid;
// Check if shader is already in cache
PCache::iterator iter = pshaders.find(uid);
if (iter != pshaders.end())
@ -209,24 +210,24 @@ SHADER* ProgramShaderCache::SetShader ( DSTALPHA_MODE dstAlphaMode, u32 componen
last_entry = entry;
GFX_DEBUGGER_PAUSE_AT(NEXT_PIXEL_SHADER_CHANGE, true);
ValidateShaderIDs(entry, dstAlphaMode, components);
last_entry->shader.Bind();
return &last_entry->shader;
}
// Make an entry in the table
PCacheEntry& newentry = pshaders[uid];
last_entry = &newentry;
newentry.in_cache = 0;
const char *vcode = GenerateVertexShaderCode(components, API_OPENGL);
const char *pcode = GeneratePixelShaderCode(dstAlphaMode, API_OPENGL, components);
VertexShaderCode vcode;
PixelShaderCode pcode;
GenerateVertexShaderCode(vcode, components, API_OPENGL);
GeneratePixelShaderCode(pcode, dstAlphaMode, API_OPENGL, components);
if (g_ActiveConfig.bEnableShaderDebugging)
{
GetSafeShaderId(&newentry.safe_uid, dstAlphaMode, components);
newentry.shader.strvprog = vcode;
newentry.shader.strpprog = pcode;
newentry.shader.strvprog = vcode.GetBuffer();
newentry.shader.strpprog = pcode.GetBuffer();
}
#if defined(_DEBUG) || defined(DEBUGFAST)
@ -234,13 +235,13 @@ SHADER* ProgramShaderCache::SetShader ( DSTALPHA_MODE dstAlphaMode, u32 componen
static int counter = 0;
char szTemp[MAX_PATH];
sprintf(szTemp, "%svs_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), counter++);
SaveData(szTemp, vcode);
SaveData(szTemp, vcode.GetBuffer());
sprintf(szTemp, "%sps_%04i.txt", File::GetUserPath(D_DUMP_IDX).c_str(), counter++);
SaveData(szTemp, pcode);
SaveData(szTemp, pcode.GetBuffer());
}
#endif
if (!CompileShader(newentry.shader, vcode, pcode)) {
if (!CompileShader(newentry.shader, vcode.GetBuffer(), pcode.GetBuffer())) {
GFX_DEBUGGER_PAUSE_AT(NEXT_ERROR, true);
return NULL;
}
@ -257,7 +258,7 @@ bool ProgramShaderCache::CompileShader ( SHADER& shader, const char* vcode, cons
{
GLuint vsid = CompileSingleShader(GL_VERTEX_SHADER, vcode);
GLuint psid = CompileSingleShader(GL_FRAGMENT_SHADER, pcode);
if(!vsid || !psid)
{
glDeleteShader(vsid);
@ -380,28 +381,23 @@ GLuint ProgramShaderCache::CompileSingleShader (GLuint type, const char* code )
return result;
}
void ProgramShaderCache::GetShaderId ( SHADERUID* uid, DSTALPHA_MODE dstAlphaMode, u32 components )
void ProgramShaderCache::GetShaderId(SHADERUID* uid, DSTALPHA_MODE dstAlphaMode, u32 components)
{
GetPixelShaderId(&uid->puid, dstAlphaMode, components);
GetVertexShaderId(&uid->vuid, components);
GetPixelShaderUid(uid->puid, dstAlphaMode, API_OPENGL, components);
GetVertexShaderUid(uid->vuid, components, API_OPENGL);
if (g_ActiveConfig.bEnableShaderDebugging)
{
PixelShaderCode pcode;
GeneratePixelShaderCode(pcode, dstAlphaMode, API_OPENGL, components);
pixel_uid_checker.AddToIndexAndCheck(pcode, uid->puid, "Pixel", "p");
VertexShaderCode vcode;
GenerateVertexShaderCode(vcode, components, API_OPENGL);
vertex_uid_checker.AddToIndexAndCheck(vcode, uid->vuid, "Vertex", "v");
}
}
void ProgramShaderCache::GetSafeShaderId ( SHADERUIDSAFE* uid, DSTALPHA_MODE dstAlphaMode, u32 components )
{
GetSafePixelShaderId(&uid->puid, dstAlphaMode, components);
GetSafeVertexShaderId(&uid->vuid, components);
}
void ProgramShaderCache::ValidateShaderIDs ( PCacheEntry *entry, DSTALPHA_MODE dstAlphaMode, u32 components )
{
ValidateVertexShaderIDs(API_OPENGL, entry->safe_uid.vuid, entry->shader.strvprog, components);
ValidatePixelShaderIDs(API_OPENGL, entry->safe_uid.puid, entry->shader.strpprog, dstAlphaMode, components);
}
ProgramShaderCache::PCacheEntry ProgramShaderCache::GetShaderProgram(void)
{
return *last_entry;
@ -497,6 +493,9 @@ void ProgramShaderCache::Shutdown(void)
iter->second.Destroy();
pshaders.clear();
pixel_uid_checker.Invalidate();
vertex_uid_checker.Invalidate();
if (g_ActiveConfig.backend_info.bSupportsGLSLUBO)
{
delete s_buffer;

View File

@ -16,18 +16,17 @@
namespace OGL
{
template<bool safe>
class _SHADERUID
class SHADERUID
{
public:
_VERTEXSHADERUID<safe> vuid;
_PIXELSHADERUID<safe> puid;
VertexShaderUid vuid;
PixelShaderUid puid;
_SHADERUID() {}
SHADERUID() {}
_SHADERUID(const _SHADERUID& r) : vuid(r.vuid), puid(r.puid) {}
SHADERUID(const SHADERUID& r) : vuid(r.vuid), puid(r.puid) {}
bool operator <(const _SHADERUID& r) const
bool operator <(const SHADERUID& r) const
{
if(puid < r.puid) return true;
if(r.puid < puid) return false;
@ -35,13 +34,11 @@ public:
return false;
}
bool operator ==(const _SHADERUID& r) const
bool operator ==(const SHADERUID& r) const
{
return puid == r.puid && vuid == r.vuid;
}
};
typedef _SHADERUID<false> SHADERUID;
typedef _SHADERUID<true> SHADERUIDSAFE;
const int NUM_UNIFORMS = 19;
@ -72,7 +69,6 @@ public:
struct PCacheEntry
{
SHADER shader;
SHADERUIDSAFE safe_uid;
bool in_cache;
void Destroy()
@ -81,12 +77,12 @@ public:
}
};
typedef std::map<SHADERUID, PCacheEntry> PCache;
static PCacheEntry GetShaderProgram(void);
static GLuint GetCurrentProgram(void);
static SHADER* SetShader(DSTALPHA_MODE dstAlphaMode, u32 components);
static void GetShaderId(SHADERUID *uid, DSTALPHA_MODE dstAlphaMode, u32 components);
static void GetSafeShaderId(SHADERUIDSAFE *uid, DSTALPHA_MODE dstAlphaMode, u32 components);
static void ValidateShaderIDs(PCacheEntry *entry, DSTALPHA_MODE dstAlphaMode, u32 components);
static bool CompileShader(SHADER &shader, const char* vcode, const char* pcode);
static GLuint CompileSingleShader(GLuint type, const char *code);
@ -106,12 +102,13 @@ private:
void Read(const SHADERUID &key, const u8 *value, u32 value_size);
};
typedef std::map<SHADERUID, PCacheEntry> PCache;
static PCache pshaders;
static PCacheEntry* last_entry;
static SHADERUID last_uid;
static UidChecker<PixelShaderUid,PixelShaderCode> pixel_uid_checker;
static UidChecker<VertexShaderUid,VertexShaderCode> vertex_uid_checker;
static GLintptr s_vs_data_size;
static GLintptr s_ps_data_size;
static GLintptr s_vs_data_offset;

View File

@ -257,7 +257,7 @@ void VertexManager::vFlush()
// set global constants
VertexShaderManager::SetConstants();
PixelShaderManager::SetConstants();
PixelShaderManager::SetConstants(g_nativeVertexFmt->m_components);
ProgramShaderCache::UploadConstants();
// setup the pointers
@ -278,7 +278,7 @@ void VertexManager::vFlush()
{
// Need to set these again, if we don't support UBO
VertexShaderManager::SetConstants();
PixelShaderManager::SetConstants();
PixelShaderManager::SetConstants(g_nativeVertexFmt->m_components);
}
// only update alpha