D3D: Use fixed constant registers for vertex shaders too.

This commit is contained in:
Henrik Rydgard 2014-09-10 13:22:05 +02:00
parent d2d563cd2c
commit bf7a4f9097
5 changed files with 135 additions and 77 deletions

View File

@ -229,61 +229,68 @@ void PSShader::SetColorUniform3Alpha255(int creg, u32 color, u8 alpha) {
pD3Ddevice->SetPixelShaderConstantF(creg, col, 1);
}
void VSShader::SetFloatArray(D3DXHANDLE uniform, const float* pArray, int len) {
constant->SetFloatArray(pD3Ddevice, uniform, pArray, len);
void VSShader::SetFloat(int creg, float value) {
const float f[4] = { value, 0.0f, 0.0f, 0.0f };
pD3Ddevice->SetVertexShaderConstantF(creg, f, 1);
}
void VSShader::SetFloat(D3DXHANDLE uniform, float value) {
constant->SetFloat(pD3Ddevice, uniform, value);
void VSShader::SetFloatArray(int creg, const float *value, int count) {
float f[4] = { 0.0f, 0.0f, 0.0f, 0.0f };
for (int i = 0; i < count; i++) {
f[i] = value[i];
}
pD3Ddevice->SetVertexShaderConstantF(creg, f, 1);
}
// Utility
void VSShader::SetColorUniform3(D3DXHANDLE uniform, u32 color) {
void VSShader::SetColorUniform3(int creg, u32 color) {
const float col[4] = {
((color & 0xFF)) / 255.0f,
((color & 0xFF00) >> 8) / 255.0f,
((color & 0xFF0000) >> 16) / 255.0f
((color & 0xFF0000) >> 16) / 255.0f,
0.0f
};
SetFloatArray(uniform, col, 4);
pD3Ddevice->SetVertexShaderConstantF(creg, col, 1);
}
void VSShader::SetFloat24Uniform3(D3DXHANDLE uniform, const u32 data[3]) {
void VSShader::SetFloat24Uniform3(int creg, const u32 data[3]) {
const u32 col[4] = {
data[0] >> 8, data[1] >> 8, data[2] >> 8,
data[0] >> 8, data[1] >> 8, data[2] >> 8, 0
};
SetFloatArray(uniform, (const float *)&col[0], 4);
pD3Ddevice->SetVertexShaderConstantF(creg, (const float *)&col[0], 1);
}
void VSShader::SetColorUniform3Alpha(D3DXHANDLE uniform, u32 color, u8 alpha) {
void VSShader::SetColorUniform3Alpha(int creg, u32 color, u8 alpha) {
const float col[4] = {
((color & 0xFF)) / 255.0f,
((color & 0xFF00) >> 8) / 255.0f,
((color & 0xFF0000) >> 16) / 255.0f,
alpha/255.0f
};
SetFloatArray(uniform, col, 4);
pD3Ddevice->SetVertexShaderConstantF(creg, col, 1);
}
void VSShader::SetColorUniform3ExtraFloat(D3DXHANDLE uniform, u32 color, float extra) {
void VSShader::SetColorUniform3ExtraFloat(int creg, u32 color, float extra) {
const float col[4] = {
((color & 0xFF)) / 255.0f,
((color & 0xFF00) >> 8) / 255.0f,
((color & 0xFF0000) >> 16) / 255.0f,
extra
};
SetFloatArray(uniform, col, 4);
pD3Ddevice->SetVertexShaderConstantF(creg, col, 1);
}
// Utility
void VSShader::SetMatrix4x3(D3DXHANDLE uniform, const float *m4x3) {
void VSShader::SetMatrix4x3(int creg, const float *m4x3) {
float m4x4[16];
ConvertMatrix4x3To4x4(m4x4, m4x3);
constant->SetMatrix(pD3Ddevice, uniform, (D3DXMATRIX*)m4x4);
ConvertMatrix4x3To4x4Transposed(m4x4, m4x3);
pD3Ddevice->SetVertexShaderConstantF(creg, m4x4, 4);
}
void VSShader::SetMatrix(D3DXHANDLE uniform, const float* pMatrix) {
D3DXMATRIX * pDxMat = (D3DXMATRIX*)pMatrix;
constant->SetMatrix(pD3Ddevice, uniform, pDxMat);
void VSShader::SetMatrix(int creg, const float* pMatrix) {
float transp[16];
Transpose4x4(transp, pMatrix);
pD3Ddevice->SetVertexShaderConstantF(creg, transp, 4);
}
// Depth in ogl is between -1;1 we need between 0;1 and optionally reverse it
@ -342,7 +349,7 @@ void VSShader::updateUniforms(int dirtyUniforms) {
bool invert = gstate_c.vpDepth < 0;
ConvertProjMatrixToD3D(flippedMatrix, invert);
SetMatrix(u_proj, flippedMatrix.getReadPtr());
SetMatrix(CONST_VS_PROJ, flippedMatrix.getReadPtr());
}
if (u_proj_through != 0 && (dirtyUniforms & DIRTY_PROJTHROUGHMATRIX)) {
Matrix4x4 proj_through;
@ -350,24 +357,24 @@ void VSShader::updateUniforms(int dirtyUniforms) {
ConvertProjMatrixToD3D(proj_through, false);
SetMatrix(u_proj_through, proj_through.getReadPtr());
SetMatrix(CONST_VS_PROJ_THROUGH, proj_through.getReadPtr());
}
// Transform
if (u_world != 0 && (dirtyUniforms & DIRTY_WORLDMATRIX)) {
SetMatrix4x3(u_world, gstate.worldMatrix);
SetMatrix4x3(CONST_VS_WORLD, gstate.worldMatrix);
}
if (u_view != 0 && (dirtyUniforms & DIRTY_VIEWMATRIX)) {
SetMatrix4x3(u_view, gstate.viewMatrix);
SetMatrix4x3(CONST_VS_VIEW, gstate.viewMatrix);
}
if (u_texmtx != 0 && (dirtyUniforms & DIRTY_TEXMATRIX)) {
SetMatrix4x3(u_texmtx, gstate.tgenMatrix);
SetMatrix4x3(CONST_VS_TEXMTX, gstate.tgenMatrix);
}
if (u_fogcoef != 0 && (dirtyUniforms & DIRTY_FOGCOEF)) {
const float fogcoef[2] = {
getFloat24(gstate.fog1),
getFloat24(gstate.fog2),
};
SetFloatArray(u_fogcoef, fogcoef, 2);
SetFloatArray(CONST_VS_FOGCOEF, fogcoef, 2);
}
// TODO: Could even set all bones in one go if they're all dirty.
#ifdef USE_BONE_ARRAY
@ -399,9 +406,8 @@ void VSShader::updateUniforms(int dirtyUniforms) {
for (int i = 0; i < numBones; i++) {
if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) {
ConvertMatrix4x3To4x4(bonetemp, gstate.boneMatrix + 12 * i);
if (u_bone[i] != 0)
SetMatrix(u_bone[i], bonetemp);
SetMatrix(CONST_VS_BONE0 + 4 * i, bonetemp);
}
}
#endif
@ -434,24 +440,24 @@ void VSShader::updateUniforms(int dirtyUniforms) {
uvscaleoff[3] = 0.0f;
}
}
SetFloatArray(u_uvscaleoffset, uvscaleoff, 4);
SetFloatArray(CONST_VS_UVSCALEOFFSET, uvscaleoff, 4);
}
// Lighting
if (u_ambient != 0 && (dirtyUniforms & DIRTY_AMBIENT)) {
SetColorUniform3Alpha(u_ambient, gstate.ambientcolor, gstate.getAmbientA());
SetColorUniform3Alpha(CONST_VS_AMBIENT, gstate.ambientcolor, gstate.getAmbientA());
}
if (u_matambientalpha != 0 && (dirtyUniforms & DIRTY_MATAMBIENTALPHA)) {
SetColorUniform3Alpha(u_matambientalpha, gstate.materialambient, gstate.getMaterialAmbientA());
SetColorUniform3Alpha(CONST_VS_MATAMBIENTALPHA, gstate.materialambient, gstate.getMaterialAmbientA());
}
if (u_matdiffuse != 0 && (dirtyUniforms & DIRTY_MATDIFFUSE)) {
SetColorUniform3(u_matdiffuse, gstate.materialdiffuse);
SetColorUniform3(CONST_VS_MATDIFFUSE, gstate.materialdiffuse);
}
if (u_matemissive != 0 && (dirtyUniforms & DIRTY_MATEMISSIVE)) {
SetColorUniform3(u_matemissive, gstate.materialemissive);
SetColorUniform3(CONST_VS_MATEMISSIVE, gstate.materialemissive);
}
if (u_matspecular != 0 && (dirtyUniforms & DIRTY_MATSPECULAR)) {
SetColorUniform3ExtraFloat(u_matspecular, gstate.materialspecular, getFloat24(gstate.materialspecularcoef));
SetColorUniform3ExtraFloat(CONST_VS_MATSPECULAR, gstate.materialspecular, getFloat24(gstate.materialspecularcoef));
}
for (int i = 0; i < 4; i++) {
if (dirtyUniforms & (DIRTY_LIGHT0 << i)) {
@ -467,18 +473,18 @@ void VSShader::updateUniforms(int dirtyUniforms) {
else
len = 1.0f / len;
float vec[3] = { x * len, y * len, z * len };
SetFloatArray(u_lightpos[i], vec, 3);
SetFloatArray(CONST_VS_LIGHTPOS + i, vec, 3);
} else {
SetFloat24Uniform3(u_lightpos[i], &gstate.lpos[i * 3]);
SetFloat24Uniform3(CONST_VS_LIGHTPOS + i, &gstate.lpos[i * 3]);
}
}
if (u_lightdir[i] != 0) SetFloat24Uniform3(u_lightdir[i], &gstate.ldir[i * 3]);
if (u_lightatt[i] != 0) SetFloat24Uniform3(u_lightatt[i], &gstate.latt[i * 3]);
if (u_lightangle[i] != 0) SetFloat(u_lightangle[i], getFloat24(gstate.lcutoff[i]));
if (u_lightspotCoef[i] != 0) SetFloat(u_lightspotCoef[i], getFloat24(gstate.lconv[i]));
if (u_lightambient[i] != 0) SetColorUniform3(u_lightambient[i], gstate.lcolor[i * 3]);
if (u_lightdiffuse[i] != 0) SetColorUniform3(u_lightdiffuse[i], gstate.lcolor[i * 3 + 1]);
if (u_lightspecular[i] != 0) SetColorUniform3(u_lightspecular[i], gstate.lcolor[i * 3 + 2]);
if (u_lightdir[i] != 0) SetFloat24Uniform3(CONST_VS_LIGHTDIR + i, &gstate.ldir[i * 3]);
if (u_lightatt[i] != 0) SetFloat24Uniform3(CONST_VS_LIGHTATT + i, &gstate.latt[i * 3]);
if (u_lightangle[i] != 0) SetFloat(CONST_VS_LIGHTANGLE + i, getFloat24(gstate.lcutoff[i]));
if (u_lightspotCoef[i] != 0) SetFloat(CONST_VS_LIGHTSPOTCOEF + i, getFloat24(gstate.lconv[i]));
if (u_lightambient[i] != 0) SetColorUniform3(CONST_VS_LIGHTAMBIENT + i, gstate.lcolor[i * 3]);
if (u_lightdiffuse[i] != 0) SetColorUniform3(CONST_VS_LIGHTDIFFUSE + i, gstate.lcolor[i * 3 + 1]);
if (u_lightspecular[i] != 0) SetColorUniform3(CONST_VS_LIGHTSPECULAR + i, gstate.lcolor[i * 3 + 2]);
}
}
}

View File

@ -154,14 +154,14 @@ public:
void updateUniforms(int dirtyUniforms);
void SetMatrix4x3(D3DXHANDLE uniform, const float *m4x3);
void SetColorUniform3(D3DXHANDLE uniform, u32 color);
void SetColorUniform3ExtraFloat(D3DXHANDLE uniform, u32 color, float extra);
void SetColorUniform3Alpha(D3DXHANDLE uniform, u32 color, u8 alpha);
void SetMatrix(D3DXHANDLE uniform, const float* pMatrix);
void SetFloatArray(D3DXHANDLE uniform, const float* pArray, int len);
void SetFloat(D3DXHANDLE uniform, float value);
void SetFloat24Uniform3(D3DXHANDLE uniform, const u32 data[3]);
void SetMatrix4x3(int creg, const float *m4x3);
void SetColorUniform3(int creg, u32 color);
void SetColorUniform3ExtraFloat(int creg, u32 color, float extra);
void SetColorUniform3Alpha(int creg, u32 color, u8 alpha);
void SetMatrix(int creg, const float* pMatrix);
void SetFloat(int creg, float value);
void SetFloatArray(int creg, const float *value, int count);
void SetFloat24Uniform3(int creg, const u32 data[3]);
D3DXHANDLE GetConstantByName(LPCSTR pName);
LPDIRECT3DVERTEXSHADER9 shader;

View File

@ -161,74 +161,72 @@ void GenerateVertexShaderDX9(int prim, char *buffer, bool useHWTransform) {
WRITE(p, "#pragma warning( disable : 3571 )\n");
if (gstate.isModeThrough()) {
WRITE(p, "float4x4 u_proj_through;\n");
WRITE(p, "float4x4 u_proj_through : register(c%i);\n", CONST_VS_PROJ_THROUGH);
} else {
WRITE(p, "float4x4 u_proj;\n");
WRITE(p, "float4x4 u_proj : register(c%i);\n", CONST_VS_PROJ);
// Add all the uniforms we'll need to transform properly.
}
if (enableFog) {
WRITE(p, "float2 u_fogcoef;\n");
WRITE(p, "float2 u_fogcoef : register(c%i);\n", CONST_VS_FOGCOEF);
}
if (useHWTransform || !hasColor)
WRITE(p, "float4 u_matambientalpha;\n"); // matambient + matalpha
if (useHWTransform) {
// When transforming by hardware, we need a great deal more uniforms...
WRITE(p, "float4x4 u_world;\n");
WRITE(p, "float4x4 u_view;\n");
WRITE(p, "float4x4 u_world : register(c%i);\n", CONST_VS_WORLD);
WRITE(p, "float4x4 u_view : register(c%i);\n", CONST_VS_VIEW);
if (gstate.getUVGenMode() == 1)
WRITE(p, "float4x4 u_texmtx;\n");
WRITE(p, "float4x4 u_texmtx : register(c%i);\n", CONST_VS_TEXMTX);
if (vertTypeGetWeightMask(vertType) != GE_VTYPE_WEIGHT_NONE) {
int numBones = TranslateNumBonesDX9(vertTypeGetNumBoneWeights(vertType));
#ifdef USE_BONE_ARRAY
WRITE(p, "float4x4 u_bone[%i];\n", numBones);
#else
for (int i = 0; i < numBones; i++) {
WRITE(p, "float4x4 u_bone%i;\n", i);
WRITE(p, "float4x4 u_bone%i : register(c%i);\n", i, CONST_VS_BONE0 + i * 4);
}
#endif
}
if (doTexture) {
WRITE(p, "float4 u_uvscaleoffset;\n");
WRITE(p, "float4 u_uvscaleoffset : register(c%i);\n", CONST_VS_UVSCALEOFFSET);
}
for (int i = 0; i < 4; i++) {
if (doLight[i] != LIGHT_OFF) {
// This is needed for shade mapping
WRITE(p, "float3 u_lightpos%i;\n", i);
WRITE(p, "float3 u_lightpos%i : register(c%i);\n", i, CONST_VS_LIGHTPOS + i);
}
if (doLight[i] == LIGHT_FULL) {
GELightType type = gstate.getLightType(i);
if (type != GE_LIGHTTYPE_DIRECTIONAL)
WRITE(p, "float3 u_lightatt%i;\n", i);
WRITE(p, "float3 u_lightatt%i : register(c%i);\n", i, CONST_VS_LIGHTATT + i);
if (type == GE_LIGHTTYPE_SPOT || type == GE_LIGHTTYPE_UNKNOWN) {
WRITE(p, "float3 u_lightdir%i;\n", i);
WRITE(p, "float u_lightangle%i;\n", i);
WRITE(p, "float u_lightspotCoef%i;\n", i);
WRITE(p, "float3 u_lightdir%i : register(c%i);\n", i, CONST_VS_LIGHTDIR + i);
WRITE(p, "float u_lightangle%i : register(c%i);\n", i, CONST_VS_LIGHTANGLE + i);
WRITE(p, "float u_lightspotCoef%i : register(c%i);\n", i, CONST_VS_LIGHTSPOTCOEF + i);
}
WRITE(p, "float3 u_lightambient%i;\n", i);
WRITE(p, "float3 u_lightdiffuse%i;\n", i);
WRITE(p, "float3 u_lightambient%i : register(c%i);\n", i, CONST_VS_LIGHTAMBIENT + i);
WRITE(p, "float3 u_lightdiffuse%i : register(c%i);\n", i, CONST_VS_LIGHTDIFFUSE + i);
if (gstate.isUsingSpecularLight(i))
WRITE(p, "float3 u_lightspecular%i;\n", i);
WRITE(p, "float3 u_lightspecular%i : register(c%i);\n", i, CONST_VS_LIGHTSPECULAR + i);
}
}
if (gstate.isLightingEnabled()) {
WRITE(p, "float4 u_ambient;\n");
WRITE(p, "float4 u_ambient : register(c%i);\n", CONST_VS_AMBIENT);
if ((gstate.materialupdate & 2) == 0 || !hasColor)
WRITE(p, "float3 u_matdiffuse;\n");
WRITE(p, "float3 u_matdiffuse : register(c%i);\n", CONST_VS_MATDIFFUSE);
// if ((gstate.materialupdate & 4) == 0)
WRITE(p, "float4 u_matspecular;\n"); // Specular coef is contained in alpha
WRITE(p, "float3 u_matemissive;\n");
WRITE(p, "float4 u_matspecular : register(c%i);\n", CONST_VS_MATSPECULAR); // Specular coef is contained in alpha
WRITE(p, "float3 u_matemissive : register(c%i);\n", CONST_VS_MATEMISSIVE);
}
}
if (useHWTransform) {
WRITE(p, " struct VS_IN \n");
WRITE(p, " \n");
WRITE(p, " { \n");
WRITE(p, " struct VS_IN { \n");
if (vertTypeGetWeightMask(vertType) != GE_VTYPE_WEIGHT_NONE) {
WRITE(p, "%s", boneWeightAttrDecl[TranslateNumBonesDX9(vertTypeGetNumBoneWeights(vertType))]);
}
@ -249,9 +247,7 @@ void GenerateVertexShaderDX9(int prim, char *buffer, bool useHWTransform) {
WRITE(p, " \n");
} else {
WRITE(p, " struct VS_IN \n");
WRITE(p, " \n");
WRITE(p, " { \n");
WRITE(p, " struct VS_IN { \n");
WRITE(p, " float4 position : POSITION; \n");
WRITE(p, " float3 texcoord : TEXCOORD0; \n");
WRITE(p, " float4 color0 : COLOR0; \n");

View File

@ -58,4 +58,34 @@ void GenerateVertexShaderDX9(int prim, char *buffer, bool useHWTransform);
// Collapse to less skinning shaders to reduce shader switching, which is expensive.
int TranslateNumBonesDX9(int bones);
#define CONST_VS_PROJ 0
#define CONST_VS_PROJ_THROUGH 4
#define CONST_VS_VIEW 8
#define CONST_VS_WORLD 12
#define CONST_VS_TEXMTX 16
#define CONST_VS_BONE0 20
#define CONST_VS_BONE1 24
#define CONST_VS_BONE2 28
#define CONST_VS_BONE3 32
#define CONST_VS_BONE4 36
#define CONST_VS_BONE5 40
#define CONST_VS_BONE6 44
#define CONST_VS_BONE7 48
#define CONST_VS_BONE8 52
#define CONST_VS_FOGCOEF 56
#define CONST_VS_UVSCALEOFFSET 57
#define CONST_VS_AMBIENT 58
#define CONST_VS_MATAMBIENTALPHA 59
#define CONST_VS_MATDIFFUSE 60
#define CONST_VS_MATSPECULAR 61
#define CONST_VS_MATEMISSIVE 62
#define CONST_VS_LIGHTPOS 64
#define CONST_VS_LIGHTDIR 68
#define CONST_VS_LIGHTATT 72
#define CONST_VS_LIGHTANGLE 76
#define CONST_VS_LIGHTSPOTCOEF 80
#define CONST_VS_LIGHTDIFFUSE 84
#define CONST_VS_LIGHTSPECULAR 88
#define CONST_VS_LIGHTAMBIENT 92
};

View File

@ -852,6 +852,32 @@ inline void ConvertMatrix4x3To4x4(float *m4x4, const float *m4x3) {
m4x4[15] = 1.0f;
}
inline void ConvertMatrix4x3To4x4Transposed(float *m4x4, const float *m4x3) {
m4x4[0] = m4x3[0];
m4x4[1] = m4x3[3];
m4x4[2] = m4x3[6];
m4x4[3] = m4x3[9];
m4x4[4] = m4x3[1];
m4x4[5] = m4x3[4];
m4x4[6] = m4x3[7];
m4x4[7] = m4x3[10];
m4x4[8] = m4x3[2];
m4x4[9] = m4x3[5];
m4x4[10] = m4x3[8];
m4x4[11] = m4x3[11];
m4x4[12] = 0.0f;
m4x4[13] = 0.0f;
m4x4[14] = 0.0f;
m4x4[15] = 1.0f;
}
inline void Transpose4x4(float out[16], const float in[16]) {
for (int i = 0; i < 4; i++) {
for (int j = 0; j < 4; j++) {
out[i * 4 + j] = in[j * 4 + i];
}
}
}
inline float Vec3Dot(const float v1[3], const float v2[3])
{