Merge pull request #6873 from hrydgard/d3d-shader-cleanup

D3D shader cleanup - remove LinkedShaders and use fixed constant slots
This commit is contained in:
Henrik Rydgård 2014-09-11 19:59:55 +02:00
commit 7ffb4602e3
11 changed files with 386 additions and 571 deletions

View File

@ -493,9 +493,6 @@ void DIRECTX9_GPU::BeginFrameInternal() {
}
shaderManager_->DirtyShader();
// Not sure if this is really needed.
shaderManager_->DirtyUniform(DIRTY_ALL);
framebufferManager_.BeginFrame();
}
@ -1413,7 +1410,7 @@ void DIRECTX9_GPU::ExecuteOpInternal(u32 op, u32 diff) {
void DIRECTX9_GPU::UpdateStats() {
gpuStats.numVertexShaders = shaderManager_->NumVertexShaders();
gpuStats.numFragmentShaders = shaderManager_->NumFragmentShaders();
gpuStats.numShaders = shaderManager_->NumPrograms();
gpuStats.numShaders = -1;
gpuStats.numTextures = (int)textureCache_.NumLoadedTextures();
gpuStats.numFBOs = (int)framebufferManager_.NumVFBs();
}

View File

@ -219,15 +219,16 @@ void GenerateFragmentShaderDX9(char *buffer) {
WRITE(p, "sampler tex: register(s0);\n");
if (enableAlphaTest || enableColorTest) {
WRITE(p, "float4 u_alphacolorref;\n");
WRITE(p, "float4 u_alphacolormask;\n");
WRITE(p, "float4 u_alphacolorref : register(c%i);\n", CONST_PS_ALPHACOLORREF);
WRITE(p, "float4 u_alphacolormask : register(c%i);\n", CONST_PS_ALPHACOLORMASK);
}
if (gstate.isTextureMapEnabled() && gstate.getTextureFunction() == GE_TEXFUNC_BLEND) {
WRITE(p, "float3 u_texenv : register(c%i);\n", CONST_PS_TEXENV);
}
if (gstate.isTextureMapEnabled() && gstate.getTextureFunction() == GE_TEXFUNC_BLEND)
WRITE(p, "float3 u_texenv;\n");
if (enableFog) {
WRITE(p, "float3 u_fogcolor;\n");
WRITE(p, "float3 u_fogcolor : register(c%i);\n", CONST_PS_FOGCOLOR);
}
if (enableAlphaTest) {
WRITE(p, "float roundAndScaleTo255f(float x) { return floor(x * 255.0f + 0.5f); }\n");
@ -236,26 +237,23 @@ void GenerateFragmentShaderDX9(char *buffer) {
WRITE(p, "float3 roundAndScaleTo255v(float3 x) { return floor(x * 255.0f + 0.5f); }\n");
}
WRITE(p, " struct PS_IN \n");
WRITE(p, " { \n");
if (doTexture)
{
WRITE(p, "struct PS_IN {\n");
if (doTexture) {
if (doTextureProjection)
WRITE(p, " float3 v_texcoord: TEXCOORD0; \n");
WRITE(p, " float3 v_texcoord: TEXCOORD0;\n");
else
WRITE(p, " float2 v_texcoord: TEXCOORD0; \n");
WRITE(p, " float2 v_texcoord: TEXCOORD0;\n");
}
WRITE(p, " float4 v_color0: COLOR0; \n");
WRITE(p, " float4 v_color0: COLOR0;\n");
if (lmode) {
WRITE(p, " float3 v_color1: COLOR1; \n");
WRITE(p, " float3 v_color1: COLOR1;\n");
}
if (enableFog) {
WRITE(p, "float2 v_fogdepth: TEXCOORD1;\n");
WRITE(p, " float2 v_fogdepth: TEXCOORD1;\n");
}
WRITE(p, " }; \n");
WRITE(p, " \n");
WRITE(p, " float4 main( PS_IN In ) : COLOR \n");
WRITE(p, " { \n");
WRITE(p, "};\n");
WRITE(p, "float4 main( PS_IN In ) : COLOR\n");
WRITE(p, "{\n");
if (gstate.isModeClear()) {
// Clear mode does not allow any fancy shading.

View File

@ -53,4 +53,9 @@ bool IsAlphaTestAgainstZero();
bool IsAlphaTestTriviallyTrue();
bool IsColorTestTriviallyTrue();
#define CONST_PS_TEXENV 0
#define CONST_PS_ALPHACOLORREF 1
#define CONST_PS_ALPHACOLORMASK 2
#define CONST_PS_FOGCOLOR 3
};

View File

@ -37,7 +37,7 @@
namespace DX9 {
PSShader::PSShader(const char *code, bool useHWTransform) : failed_(false), useHWTransform_(useHWTransform) {
PSShader::PSShader(const char *code, bool useHWTransform) : shader(nullptr), failed_(false), useHWTransform_(useHWTransform) {
source_ = code;
#ifdef SHADERLOG
OutputDebugString(ConvertUTF8ToWString(code).c_str());
@ -45,7 +45,7 @@ PSShader::PSShader(const char *code, bool useHWTransform) : failed_(false), useH
bool success;
std::string errorMessage;
success = CompilePixelShader(code, &shader, &constant, errorMessage);
success = CompilePixelShader(code, &shader, NULL, errorMessage);
if (!errorMessage.empty()) {
if (success) {
@ -65,6 +65,7 @@ PSShader::PSShader(const char *code, bool useHWTransform) : failed_(false), useH
if (shader)
shader->Release();
shader = NULL;
return;
} else {
DEBUG_LOG(G3D, "Compiled shader:\n%s\n", (const char *)code);
}
@ -72,13 +73,11 @@ PSShader::PSShader(const char *code, bool useHWTransform) : failed_(false), useH
PSShader::~PSShader() {
pD3Ddevice->SetPixelShader(NULL);
if (constant)
constant->Release();
if (shader)
shader->Release();
}
VSShader::VSShader(const char *code, bool useHWTransform) : failed_(false), useHWTransform_(useHWTransform) {
VSShader::VSShader(const char *code, int vertType, bool useHWTransform) : shader(nullptr), failed_(false), useHWTransform_(useHWTransform) {
source_ = code;
#ifdef SHADERLOG
OutputDebugString(ConvertUTF8ToWString(code).c_str());
@ -86,8 +85,7 @@ VSShader::VSShader(const char *code, bool useHWTransform) : failed_(false), useH
bool success;
std::string errorMessage;
success = CompileVertexShader(code, &shader, &constant, errorMessage);
success = CompileVertexShader(code, &shader, NULL, errorMessage);
if (!errorMessage.empty()) {
if (success) {
ERROR_LOG(G3D, "Warnings in shader compilation!");
@ -106,6 +104,7 @@ VSShader::VSShader(const char *code, bool useHWTransform) : failed_(false), useH
if (shader)
shader->Release();
shader = NULL;
return;
} else {
DEBUG_LOG(G3D, "Compiled shader:\n%s\n", (const char *)code);
}
@ -113,172 +112,93 @@ VSShader::VSShader(const char *code, bool useHWTransform) : failed_(false), useH
VSShader::~VSShader() {
pD3Ddevice->SetVertexShader(NULL);
if (constant)
constant->Release();
if (shader)
shader->Release();
}
// Helper
D3DXHANDLE LinkedShaderDX9::GetConstantByName(LPCSTR pName) {
D3DXHANDLE ret = NULL;
if ((ret = m_fs->constant->GetConstantByName(NULL, pName)) != NULL) {
} else if ((ret = m_vs->constant->GetConstantByName(NULL, pName)) != NULL) {}
return ret;
}
LinkedShaderDX9::LinkedShaderDX9(VSShader *vs, PSShader *fs, u32 vertType, bool useHWTransform)
:dirtyUniforms(0), useHWTransform_(useHWTransform) {
INFO_LOG(G3D, "Linked shader: vs %i fs %i", (int)vs->shader, (int)fs->shader);
m_vs = vs;
m_fs = fs;
u_tex = GetConstantByName("tex");
u_proj = GetConstantByName("u_proj");
u_proj_through = GetConstantByName("u_proj_through");
u_texenv = GetConstantByName("u_texenv");
u_fogcolor = GetConstantByName("u_fogcolor");
u_fogcoef = GetConstantByName("u_fogcoef");
u_alphacolorref = GetConstantByName("u_alphacolorref");
u_alphacolormask = GetConstantByName("u_alphacolormask");
// Transform
u_view = GetConstantByName("u_view");
u_world = GetConstantByName("u_world");
u_texmtx = GetConstantByName("u_texmtx");
if (vertTypeGetWeightMask(vertType) != 0)
numBones = TranslateNumBonesDX9(vertTypeGetNumBoneWeights(vertType));
else
numBones = 0;
#ifdef USE_BONE_ARRAY
u_bone = glGetUniformLocation(program, "u_bone");
#else
for (int i = 0; i < 8; i++) {
char name[10];
sprintf(name, "u_bone%i", i);
u_bone[i] = GetConstantByName(name);
}
#endif
// Lighting, texturing
u_ambient = GetConstantByName("u_ambient");
u_matambientalpha = GetConstantByName("u_matambientalpha");
u_matdiffuse = GetConstantByName("u_matdiffuse");
u_matspecular = GetConstantByName("u_matspecular");
u_matemissive = GetConstantByName("u_matemissive");
u_uvscaleoffset = GetConstantByName("u_uvscaleoffset");
for (int i = 0; i < 4; i++) {
char temp[64];
sprintf(temp, "u_lightpos%i", i);
u_lightpos[i] = GetConstantByName(temp);
sprintf(temp, "u_lightdir%i", i);
u_lightdir[i] = GetConstantByName(temp);
sprintf(temp, "u_lightatt%i", i);
u_lightatt[i] = GetConstantByName(temp);
sprintf(temp, "u_lightangle%i", i);
u_lightangle[i] = GetConstantByName(temp);
sprintf(temp, "u_lightspotCoef%i", i);
u_lightspotCoef[i] = GetConstantByName(temp);
sprintf(temp, "u_lightambient%i", i);
u_lightambient[i] = GetConstantByName(temp);
sprintf(temp, "u_lightdiffuse%i", i);
u_lightdiffuse[i] = GetConstantByName(temp);
sprintf(temp, "u_lightspecular%i", i);
u_lightspecular[i] = GetConstantByName(temp);
}
//glUseProgram(program);
pD3Ddevice->SetPixelShader(fs->shader);
pD3Ddevice->SetVertexShader(vs->shader);
// Default uniform values
//glUniform1i(u_tex, 0);
// The rest, use the "dirty" mechanism.
dirtyUniforms = DIRTY_ALL;
use();
}
LinkedShaderDX9::~LinkedShaderDX9() {
// glDeleteProgram(program);
}
void LinkedShaderDX9::SetFloatArray(D3DXHANDLE uniform, const float* pArray, int len) {
if (m_fs->constant->SetFloatArray(pD3Ddevice, uniform, pArray, len) == D3D_OK);
else
m_vs->constant->SetFloatArray(pD3Ddevice, uniform, pArray, len);
}
void LinkedShaderDX9::SetFloat(D3DXHANDLE uniform, float value) {
if (m_fs->constant->SetFloat(pD3Ddevice, uniform, value) == D3D_OK);
else
m_vs->constant->SetFloat(pD3Ddevice, uniform, value);
}
// Utility
void LinkedShaderDX9::SetColorUniform3(D3DXHANDLE uniform, u32 color) {
void ShaderManagerDX9::PSSetColorUniform3(int creg, u32 color) {
const float col[4] = {
((color & 0xFF)) / 255.0f,
((color & 0xFF00) >> 8) / 255.0f,
((color & 0xFF0000) >> 16) / 255.0f
((color & 0xFF)) * (1.0f / 255.0f),
((color & 0xFF00) >> 8) * (1.0f / 255.0f),
((color & 0xFF0000) >> 16) * (1.0f / 255.0f),
0.0f
};
SetFloatArray(uniform, col, 4);
pD3Ddevice->SetPixelShaderConstantF(creg, col, 1);
}
void LinkedShaderDX9::SetFloat24Uniform3(D3DXHANDLE uniform, const u32 data[3]) {
const u32 col[4] = {
data[0] >> 8, data[1] >> 8, data[2] >> 8,
};
SetFloatArray(uniform, (const float *)&col[0], 4);
}
void LinkedShaderDX9::SetColorUniform3Alpha(D3DXHANDLE uniform, u32 color, u8 alpha) {
const float col[4] = {
((color & 0xFF)) / 255.0f,
((color & 0xFF00) >> 8) / 255.0f,
((color & 0xFF0000) >> 16) / 255.0f,
alpha/255.0f
};
SetFloatArray(uniform, col, 4);
}
void LinkedShaderDX9::SetColorUniform3Alpha255(D3DXHANDLE uniform, u32 color, u8 alpha) {
void ShaderManagerDX9::PSSetColorUniform3Alpha255(int creg, u32 color, u8 alpha) {
const float col[4] = {
(float)((color & 0xFF)),
(float)((color & 0xFF00) >> 8),
(float)((color & 0xFF0000) >> 16),
(float)alpha,
};
SetFloatArray(uniform, col, 4);
pD3Ddevice->SetPixelShaderConstantF(creg, col, 1);
}
void LinkedShaderDX9::SetColorUniform3ExtraFloat(D3DXHANDLE uniform, u32 color, float extra) {
void ShaderManagerDX9::VSSetFloat(int creg, float value) {
const float f[4] = { value, 0.0f, 0.0f, 0.0f };
pD3Ddevice->SetVertexShaderConstantF(creg, f, 1);
}
void ShaderManagerDX9::VSSetFloatArray(int creg, const float *value, int count) {
float f[4] = { 0.0f, 0.0f, 0.0f, 0.0f };
for (int i = 0; i < count; i++) {
f[i] = value[i];
}
pD3Ddevice->SetVertexShaderConstantF(creg, f, 1);
}
// Utility
void ShaderManagerDX9::VSSetColorUniform3(int creg, u32 color) {
const float col[4] = {
((color & 0xFF)) / 255.0f,
((color & 0xFF00) >> 8) / 255.0f,
((color & 0xFF0000) >> 16) / 255.0f,
0.0f
};
pD3Ddevice->SetVertexShaderConstantF(creg, col, 1);
}
void ShaderManagerDX9::VSSetFloat24Uniform3(int creg, const u32 data[3]) {
const u32 col[4] = {
data[0] >> 8, data[1] >> 8, data[2] >> 8, 0
};
pD3Ddevice->SetVertexShaderConstantF(creg, (const float *)&col[0], 1);
}
void ShaderManagerDX9::VSSetColorUniform3Alpha(int creg, u32 color, u8 alpha) {
const float col[4] = {
((color & 0xFF)) / 255.0f,
((color & 0xFF00) >> 8) / 255.0f,
((color & 0xFF0000) >> 16) / 255.0f,
alpha/255.0f
};
pD3Ddevice->SetVertexShaderConstantF(creg, col, 1);
}
void ShaderManagerDX9::VSSetColorUniform3ExtraFloat(int creg, u32 color, float extra) {
const float col[4] = {
((color & 0xFF)) / 255.0f,
((color & 0xFF00) >> 8) / 255.0f,
((color & 0xFF0000) >> 16) / 255.0f,
extra
};
SetFloatArray(uniform, col, 4);
pD3Ddevice->SetVertexShaderConstantF(creg, col, 1);
}
// Utility
void LinkedShaderDX9::SetMatrix4x3(D3DXHANDLE uniform, const float *m4x3) {
void ShaderManagerDX9::VSSetMatrix4x3(int creg, const float *m4x3) {
float m4x4[16];
ConvertMatrix4x3To4x4(m4x4, m4x3);
m_vs->constant->SetMatrix(pD3Ddevice, uniform, (D3DXMATRIX*)m4x4);
ConvertMatrix4x3To4x4Transposed(m4x4, m4x3);
pD3Ddevice->SetVertexShaderConstantF(creg, m4x4, 4);
}
void LinkedShaderDX9::SetMatrix(D3DXHANDLE uniform, const float* pMatrix) {
D3DXMATRIX * pDxMat = (D3DXMATRIX*)pMatrix;
m_vs->constant->SetMatrix(pD3Ddevice, uniform, pDxMat);
void ShaderManagerDX9::VSSetMatrix(int creg, const float* pMatrix) {
float transp[16];
Transpose4x4(transp, pMatrix);
pD3Ddevice->SetVertexShaderConstantF(creg, transp, 4);
}
// Depth in ogl is between -1;1 we need between 0;1 and optionally reverse it
@ -290,23 +210,24 @@ void ConvertProjMatrixToD3D(Matrix4x4 & in, bool invert) {
in = in * s * t;
}
void LinkedShaderDX9::use() {
updateUniforms();
pD3Ddevice->SetPixelShader(m_fs->shader);
pD3Ddevice->SetVertexShader(m_vs->shader);
void ShaderManagerDX9::PSUpdateUniforms(int dirtyUniforms) {
if (dirtyUniforms & DIRTY_TEXENV) {
PSSetColorUniform3(CONST_PS_TEXENV, gstate.texenvcolor);
}
if (dirtyUniforms & DIRTY_ALPHACOLORREF) {
PSSetColorUniform3Alpha255(CONST_PS_ALPHACOLORREF, gstate.getColorTestRef(), gstate.getAlphaTestRef());
}
if (dirtyUniforms & DIRTY_ALPHACOLORMASK) {
PSSetColorUniform3(CONST_PS_ALPHACOLORMASK, gstate.colortestmask);
}
if (dirtyUniforms & DIRTY_FOGCOLOR) {
PSSetColorUniform3(CONST_PS_FOGCOLOR, gstate.fogcolor);
}
}
void LinkedShaderDX9::stop() {
}
void LinkedShaderDX9::updateUniforms() {
if (!dirtyUniforms)
return;
void ShaderManagerDX9::VSUpdateUniforms(int dirtyUniforms) {
// Update any dirty uniforms before we draw
if (u_proj != 0 && (dirtyUniforms & DIRTY_PROJMATRIX)) {
if (dirtyUniforms & DIRTY_PROJMATRIX) {
Matrix4x4 flippedMatrix;
memcpy(&flippedMatrix, gstate.projMatrix, 16 * sizeof(float));
if (gstate_c.vpHeight < 0) {
@ -321,79 +242,33 @@ void LinkedShaderDX9::updateUniforms() {
bool invert = gstate_c.vpDepth < 0;
ConvertProjMatrixToD3D(flippedMatrix, invert);
SetMatrix(u_proj, flippedMatrix.getReadPtr());
VSSetMatrix(CONST_VS_PROJ, flippedMatrix.getReadPtr());
}
if (u_proj_through != 0 && (dirtyUniforms & DIRTY_PROJTHROUGHMATRIX))
{
if (dirtyUniforms & DIRTY_PROJTHROUGHMATRIX) {
Matrix4x4 proj_through;
proj_through.setOrtho(0.0f, gstate_c.curRTWidth, gstate_c.curRTHeight, 0, 0, 1);
ConvertProjMatrixToD3D(proj_through, false);
SetMatrix(u_proj_through, proj_through.getReadPtr());
VSSetMatrix(CONST_VS_PROJ_THROUGH, proj_through.getReadPtr());
}
if (u_texenv != 0 && (dirtyUniforms & DIRTY_TEXENV)) {
SetColorUniform3(u_texenv, gstate.texenvcolor);
// Transform
if (dirtyUniforms & DIRTY_WORLDMATRIX) {
VSSetMatrix4x3(CONST_VS_WORLD, gstate.worldMatrix);
}
if (u_alphacolorref != 0 && (dirtyUniforms & DIRTY_ALPHACOLORREF)) {
SetColorUniform3Alpha255(u_alphacolorref, gstate.getColorTestRef(), gstate.getAlphaTestRef());
if (dirtyUniforms & DIRTY_VIEWMATRIX) {
VSSetMatrix4x3(CONST_VS_VIEW, gstate.viewMatrix);
}
if (u_alphacolormask != 0 && (dirtyUniforms & DIRTY_ALPHACOLORMASK)) {
SetColorUniform3(u_alphacolormask, gstate.colortestmask);
if (dirtyUniforms & DIRTY_TEXMATRIX) {
VSSetMatrix4x3(CONST_VS_TEXMTX, gstate.tgenMatrix);
}
if (u_fogcolor != 0 && (dirtyUniforms & DIRTY_FOGCOLOR)) {
SetColorUniform3(u_fogcolor, gstate.fogcolor);
}
if (u_fogcoef != 0 && (dirtyUniforms & DIRTY_FOGCOEF)) {
if (dirtyUniforms & DIRTY_FOGCOEF) {
const float fogcoef[2] = {
getFloat24(gstate.fog1),
getFloat24(gstate.fog2),
};
SetFloatArray(u_fogcoef, fogcoef, 2);
VSSetFloatArray(CONST_VS_FOGCOEF, fogcoef, 2);
}
// Texturing
if (u_uvscaleoffset != 0 && (dirtyUniforms & DIRTY_UVSCALEOFFSET)) {
float uvscaleoff[4];
if (gstate.isModeThrough()) {
// We never get here because we don't use HW transform with through mode.
// Although - why don't we?
uvscaleoff[0] = gstate_c.uv.uScale / gstate_c.curTextureWidth;
uvscaleoff[1] = gstate_c.uv.vScale / gstate_c.curTextureHeight;
uvscaleoff[2] = gstate_c.uv.uOff / gstate_c.curTextureWidth;
uvscaleoff[3] = gstate_c.uv.vOff / gstate_c.curTextureHeight;
} else {
int w = gstate.getTextureWidth(0);
int h = gstate.getTextureHeight(0);
float widthFactor = (float)w / (float)gstate_c.curTextureWidth;
float heightFactor = (float)h / (float)gstate_c.curTextureHeight;
// Not sure what GE_TEXMAP_UNKNOWN is, but seen in Riviera. Treating the same as GE_TEXMAP_TEXTURE_COORDS works.
if (gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_COORDS || gstate.getUVGenMode() == GE_TEXMAP_UNKNOWN) {
uvscaleoff[0] = gstate_c.uv.uScale * widthFactor;
uvscaleoff[1] = gstate_c.uv.vScale * heightFactor;
uvscaleoff[2] = gstate_c.uv.uOff * widthFactor;
uvscaleoff[3] = gstate_c.uv.vOff * heightFactor;
} else {
uvscaleoff[0] = widthFactor;
uvscaleoff[1] = heightFactor;
uvscaleoff[2] = 0.0f;
uvscaleoff[3] = 0.0f;
}
}
SetFloatArray(u_uvscaleoffset, uvscaleoff, 4);
}
// Transform
if (u_world != 0 && (dirtyUniforms & DIRTY_WORLDMATRIX)) {
SetMatrix4x3(u_world, gstate.worldMatrix);
}
if (u_view != 0 && (dirtyUniforms & DIRTY_VIEWMATRIX)) {
SetMatrix4x3(u_view, gstate.viewMatrix);
}
if (u_texmtx != 0 && (dirtyUniforms & DIRTY_TEXMATRIX)) {
SetMatrix4x3(u_texmtx, gstate.tgenMatrix);
}
// TODO: Could even set all bones in one go if they're all dirty.
#ifdef USE_BONE_ARRAY
if (u_bone != 0) {
@ -420,66 +295,89 @@ void LinkedShaderDX9::updateUniforms() {
}
}
#else
float bonetemp[16];
for (int i = 0; i < numBones; i++) {
for (int i = 0; i < 8; i++) {
if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) {
ConvertMatrix4x3To4x4(bonetemp, gstate.boneMatrix + 12 * i);
if (u_bone[i] != 0)
SetMatrix(u_bone[i], bonetemp);
VSSetMatrix4x3(CONST_VS_BONE0 + 4 * i, gstate.boneMatrix + 12 * i);
}
}
#endif
// Texturing
if (dirtyUniforms & DIRTY_UVSCALEOFFSET) {
float uvscaleoff[4];
if (gstate.isModeThrough()) {
// We never get here because we don't use HW transform with through mode.
// Although - why don't we?
uvscaleoff[0] = gstate_c.uv.uScale / gstate_c.curTextureWidth;
uvscaleoff[1] = gstate_c.uv.vScale / gstate_c.curTextureHeight;
uvscaleoff[2] = gstate_c.uv.uOff / gstate_c.curTextureWidth;
uvscaleoff[3] = gstate_c.uv.vOff / gstate_c.curTextureHeight;
} else {
int w = gstate.getTextureWidth(0);
int h = gstate.getTextureHeight(0);
float widthFactor = (float)w / (float)gstate_c.curTextureWidth;
float heightFactor = (float)h / (float)gstate_c.curTextureHeight;
// Not sure what GE_TEXMAP_UNKNOWN is, but seen in Riviera. Treating the same as GE_TEXMAP_TEXTURE_COORDS works.
if (gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_COORDS || gstate.getUVGenMode() == GE_TEXMAP_UNKNOWN) {
uvscaleoff[0] = gstate_c.uv.uScale * widthFactor;
uvscaleoff[1] = gstate_c.uv.vScale * heightFactor;
uvscaleoff[2] = gstate_c.uv.uOff * widthFactor;
uvscaleoff[3] = gstate_c.uv.vOff * heightFactor;
} else {
uvscaleoff[0] = widthFactor;
uvscaleoff[1] = heightFactor;
uvscaleoff[2] = 0.0f;
uvscaleoff[3] = 0.0f;
}
}
VSSetFloatArray(CONST_VS_UVSCALEOFFSET, uvscaleoff, 4);
}
// Lighting
if (u_ambient != 0 && (dirtyUniforms & DIRTY_AMBIENT)) {
SetColorUniform3Alpha(u_ambient, gstate.ambientcolor, gstate.getAmbientA());
if (dirtyUniforms & DIRTY_AMBIENT) {
VSSetColorUniform3Alpha(CONST_VS_AMBIENT, gstate.ambientcolor, gstate.getAmbientA());
}
if (u_matambientalpha != 0 && (dirtyUniforms & DIRTY_MATAMBIENTALPHA)) {
SetColorUniform3Alpha(u_matambientalpha, gstate.materialambient, gstate.getMaterialAmbientA());
if (dirtyUniforms & DIRTY_MATAMBIENTALPHA) {
VSSetColorUniform3Alpha(CONST_VS_MATAMBIENTALPHA, gstate.materialambient, gstate.getMaterialAmbientA());
}
if (u_matdiffuse != 0 && (dirtyUniforms & DIRTY_MATDIFFUSE)) {
SetColorUniform3(u_matdiffuse, gstate.materialdiffuse);
if (dirtyUniforms & DIRTY_MATDIFFUSE) {
VSSetColorUniform3(CONST_VS_MATDIFFUSE, gstate.materialdiffuse);
}
if (u_matemissive != 0 && (dirtyUniforms & DIRTY_MATEMISSIVE)) {
SetColorUniform3(u_matemissive, gstate.materialemissive);
if (dirtyUniforms & DIRTY_MATEMISSIVE) {
VSSetColorUniform3(CONST_VS_MATEMISSIVE, gstate.materialemissive);
}
if (u_matspecular != 0 && (dirtyUniforms & DIRTY_MATSPECULAR)) {
SetColorUniform3ExtraFloat(u_matspecular, gstate.materialspecular, getFloat24(gstate.materialspecularcoef));
if (dirtyUniforms & DIRTY_MATSPECULAR) {
VSSetColorUniform3ExtraFloat(CONST_VS_MATSPECULAR, gstate.materialspecular, getFloat24(gstate.materialspecularcoef));
}
for (int i = 0; i < 4; i++) {
if (dirtyUniforms & (DIRTY_LIGHT0 << i)) {
if (u_lightpos[i] != 0) {
if (gstate.isDirectionalLight(i)) {
// Prenormalize
float x = getFloat24(gstate.lpos[i * 3 + 0]);
float y = getFloat24(gstate.lpos[i * 3 + 1]);
float z = getFloat24(gstate.lpos[i * 3 + 2]);
float len = sqrtf(x*x + y*y + z*z);
if (len == 0.0f)
len = 1.0f;
else
len = 1.0f / len;
float vec[3] = { x * len, y * len, z * len };
SetFloatArray(u_lightpos[i], vec, 3);
} else {
SetFloat24Uniform3(u_lightpos[i], &gstate.lpos[i * 3]);
}
if (gstate.isDirectionalLight(i)) {
// Prenormalize
float x = getFloat24(gstate.lpos[i * 3 + 0]);
float y = getFloat24(gstate.lpos[i * 3 + 1]);
float z = getFloat24(gstate.lpos[i * 3 + 2]);
float len = sqrtf(x*x + y*y + z*z);
if (len == 0.0f)
len = 1.0f;
else
len = 1.0f / len;
float vec[3] = { x * len, y * len, z * len };
VSSetFloatArray(CONST_VS_LIGHTPOS + i, vec, 3);
} else {
VSSetFloat24Uniform3(CONST_VS_LIGHTPOS + i, &gstate.lpos[i * 3]);
}
if (u_lightdir[i] != 0) SetFloat24Uniform3(u_lightdir[i], &gstate.ldir[i * 3]);
if (u_lightatt[i] != 0) SetFloat24Uniform3(u_lightatt[i], &gstate.latt[i * 3]);
if (u_lightangle[i] != 0) SetFloat(u_lightangle[i], getFloat24(gstate.lcutoff[i]));
if (u_lightspotCoef[i] != 0) SetFloat(u_lightspotCoef[i], getFloat24(gstate.lconv[i]));
if (u_lightambient[i] != 0) SetColorUniform3(u_lightambient[i], gstate.lcolor[i * 3]);
if (u_lightdiffuse[i] != 0) SetColorUniform3(u_lightdiffuse[i], gstate.lcolor[i * 3 + 1]);
if (u_lightspecular[i] != 0) SetColorUniform3(u_lightspecular[i], gstate.lcolor[i * 3 + 2]);
VSSetFloat24Uniform3(CONST_VS_LIGHTDIR + i, &gstate.ldir[i * 3]);
VSSetFloat24Uniform3(CONST_VS_LIGHTATT + i, &gstate.latt[i * 3]);
VSSetFloat(CONST_VS_LIGHTANGLE + i, getFloat24(gstate.lcutoff[i]));
VSSetFloat(CONST_VS_LIGHTSPOTCOEF + i, getFloat24(gstate.lconv[i]));
VSSetColorUniform3(CONST_VS_LIGHTAMBIENT + i, gstate.lcolor[i * 3]);
VSSetColorUniform3(CONST_VS_LIGHTDIFFUSE + i, gstate.lcolor[i * 3 + 1]);
VSSetColorUniform3(CONST_VS_LIGHTSPECULAR + i, gstate.lcolor[i * 3 + 2]);
}
}
dirtyUniforms = 0;
}
ShaderManagerDX9::ShaderManagerDX9() : lastShader_(NULL), globalDirty_(0xFFFFFFFF), shaderSwitchDirty_(0) {
ShaderManagerDX9::ShaderManagerDX9() : lastVShader_(nullptr), lastPShader_(nullptr), globalDirty_(0xFFFFFFFF) {
codeBuffer_ = new char[16384];
}
@ -488,16 +386,12 @@ ShaderManagerDX9::~ShaderManagerDX9() {
}
void ShaderManagerDX9::Clear() {
for (auto iter = linkedShaderCache_.begin(); iter != linkedShaderCache_.end(); ++iter) {
delete iter->ls;
}
for (auto iter = fsCache_.begin(); iter != fsCache_.end(); ++iter) {
delete iter->second;
}
for (auto iter = vsCache_.begin(); iter != vsCache_.end(); ++iter) {
delete iter->second;
}
linkedShaderCache_.clear();
fsCache_.clear();
vsCache_.clear();
globalDirty_ = 0xFFFFFFFF;
@ -515,53 +409,41 @@ void ShaderManagerDX9::DirtyShader() {
// Forget the last shader ID
lastFSID_.clear();
lastVSID_.clear();
lastShader_ = 0;
lastVShader_ = nullptr;
lastPShader_ = nullptr;
globalDirty_ = 0xFFFFFFFF;
shaderSwitchDirty_ = 0;
}
void ShaderManagerDX9::DirtyLastShader() { // disables vertex arrays
if (lastShader_)
lastShader_->stop();
lastShader_ = 0;
lastVShader_ = nullptr;
lastPShader_ = nullptr;
}
LinkedShaderDX9 *ShaderManagerDX9::ApplyShader(int prim, u32 vertType) {
if (globalDirty_) {
if (lastShader_)
lastShader_->dirtyUniforms |= globalDirty_;
shaderSwitchDirty_ |= globalDirty_;
globalDirty_ = 0;
}
VSShader *ShaderManagerDX9::ApplyShader(int prim, u32 vertType) {
bool useHWTransform = CanUseHardwareTransformDX9(prim);
VertexShaderIDDX9 VSID;
FragmentShaderIDDX9 FSID;
ComputeVertexShaderIDDX9(&VSID, vertType, prim, useHWTransform);
FragmentShaderIDDX9 FSID;
ComputeFragmentShaderIDDX9(&FSID);
// Just update uniforms if this is the same shader as last time.
if (lastShader_ != 0 && VSID == lastVSID_ && FSID == lastFSID_) {
lastShader_->updateUniforms();
return lastShader_; // Already all set.
if (lastVShader_ != nullptr && lastPShader_ != nullptr && VSID == lastVSID_ && FSID == lastFSID_) {
if (globalDirty_) {
PSUpdateUniforms(globalDirty_);
VSUpdateUniforms(globalDirty_);
globalDirty_ = 0;
}
return lastVShader_; // Already all set.
}
if (lastShader_ != 0) {
// There was a previous shader and we're switching.
lastShader_->stop();
}
lastVSID_ = VSID;
lastFSID_ = FSID;
VSCache::iterator vsIter = vsCache_.find(VSID);
VSShader *vs;
if (vsIter == vsCache_.end()) {
// Vertex shader not in cache. Let's compile it.
GenerateVertexShaderDX9(prim, codeBuffer_, useHWTransform);
vs = new VSShader(codeBuffer_, useHWTransform);
vs = new VSShader(codeBuffer_, vertType, useHWTransform);
if (vs->Failed()) {
ERROR_LOG(HLE, "Shader compilation failed, falling back to software transform");
@ -574,13 +456,14 @@ LinkedShaderDX9 *ShaderManagerDX9::ApplyShader(int prim, u32 vertType) {
// Can still work with software transform.
GenerateVertexShaderDX9(prim, codeBuffer_, false);
vs = new VSShader(codeBuffer_, false);
vs = new VSShader(codeBuffer_, vertType, false);
}
vsCache_[VSID] = vs;
} else {
vs = vsIter->second;
}
lastVSID_ = VSID;
FSCache::iterator fsIter = fsCache_.find(FSID);
PSShader *fs;
@ -593,34 +476,20 @@ LinkedShaderDX9 *ShaderManagerDX9::ApplyShader(int prim, u32 vertType) {
fs = fsIter->second;
}
// Okay, we have both shaders. Let's see if there's a linked one.
LinkedShaderDX9 *ls = NULL;
lastFSID_ = FSID;
for (auto iter = linkedShaderCache_.begin(); iter != linkedShaderCache_.end(); ++iter) {
// Deferred dirtying! Let's see if we can make this even more clever later.
iter->ls->dirtyUniforms |= shaderSwitchDirty_;
if (iter->vs == vs && iter->fs == fs) {
ls = iter->ls;
}
}
shaderSwitchDirty_ = 0;
if (ls == NULL) {
ls = new LinkedShaderDX9(vs, fs, vertType, vs->UseHWTransform()); // This does "use" automatically
const LinkedShaderCacheEntry entry(vs, fs, ls);
linkedShaderCache_.push_back(entry);
} else {
// If shader changed we need to update all uniforms
if (lastShader_ != ls) {
ls->dirtyUniforms = DIRTY_ALL;
}
ls->use();
if (globalDirty_) {
PSUpdateUniforms(globalDirty_);
VSUpdateUniforms(globalDirty_);
globalDirty_ = 0;
}
lastShader_ = ls;
return ls;
pD3Ddevice->SetPixelShader(fs->shader);
pD3Ddevice->SetVertexShader(vs->shader);
lastPShader_ = fs;
lastVShader_ = vs;
return vs;
}
};
} // namespace

View File

@ -29,114 +29,19 @@ namespace DX9 {
class PSShader;
class VSShader;
void ConvertProjMatrixToD3D(Matrix4x4 & in);
// Pre-fetched attrs and uniforms
// Pretty much full. Will need more bits for more fine grained dirty tracking for lights.
enum {
ATTR_POSITION = 0,
ATTR_TEXCOORD = 1,
ATTR_NORMAL = 2,
ATTR_W1 = 3,
ATTR_W2 = 4,
ATTR_COLOR0 = 5,
ATTR_COLOR1 = 6,
ATTR_COUNT,
};
class LinkedShaderDX9
{
protected:
// Helper
D3DXHANDLE GetConstantByName(LPCSTR pName);
void SetMatrix4x3(D3DXHANDLE uniform, const float *m4x3);
void SetColorUniform3(D3DXHANDLE uniform, u32 color);
void SetColorUniform3ExtraFloat(D3DXHANDLE uniform, u32 color, float extra);
void SetColorUniform3Alpha(D3DXHANDLE uniform, u32 color, u8 alpha);
void SetColorUniform3Alpha255(D3DXHANDLE uniform, u32 color, u8 alpha);
void SetMatrix(D3DXHANDLE uniform, const float* pMatrix);
void SetFloatArray(D3DXHANDLE uniform, const float* pArray, int len);
void SetFloat(D3DXHANDLE uniform, float value);
void SetFloat24Uniform3(D3DXHANDLE uniform, const u32 data[3]);
public:
LinkedShaderDX9(VSShader *vs, PSShader *fs, u32 vertType, bool useHWTransform);
~LinkedShaderDX9();
void use();
void stop();
void updateUniforms();
// Set to false if the VS failed, happens on Mali-400 a lot for complex shaders.
bool useHWTransform_;
VSShader *m_vs;
PSShader *m_fs;
u32 dirtyUniforms;
// Present attributes in the shader.
int attrMask; // 1 << ATTR_ ... or-ed together.
// Pre-fetched attrs and uniforms
D3DXHANDLE a_position;
D3DXHANDLE a_color0;
D3DXHANDLE a_color1;
D3DXHANDLE a_texcoord;
D3DXHANDLE a_normal;
D3DXHANDLE a_weight0123;
D3DXHANDLE a_weight4567;
D3DXHANDLE u_tex;
D3DXHANDLE u_proj;
D3DXHANDLE u_proj_through;
D3DXHANDLE u_texenv;
D3DXHANDLE u_view;
D3DXHANDLE u_texmtx;
D3DXHANDLE u_world;
#ifdef USE_BONE_ARRAY
D3DXHANDLE u_bone; // array, size is numBones
#else
D3DXHANDLE u_bone[8];
#endif
int numBones;
// Fragment processing inputs
D3DXHANDLE u_alphacolorref;
D3DXHANDLE u_alphacolormask;
D3DXHANDLE u_fogcolor;
D3DXHANDLE u_fogcoef;
// Texturing
D3DXHANDLE u_uvscaleoffset;
// Lighting
D3DXHANDLE u_ambient;
D3DXHANDLE u_matambientalpha;
D3DXHANDLE u_matdiffuse;
D3DXHANDLE u_matspecular;
D3DXHANDLE u_matemissive;
D3DXHANDLE u_lightpos[4];
D3DXHANDLE u_lightdir[4];
D3DXHANDLE u_lightatt[4]; // attenuation
D3DXHANDLE u_lightangle[4]; // spotlight cone angle (cosine)
D3DXHANDLE u_lightspotCoef[4]; // spotlight dropoff
D3DXHANDLE u_lightdiffuse[4]; // each light consist of vec4[3]
D3DXHANDLE u_lightspecular[4]; // attenuation
D3DXHANDLE u_lightambient[4]; // attenuation
};
// Will reach 32 bits soon :P
enum
{
DIRTY_PROJMATRIX = (1 << 0),
DIRTY_PROJTHROUGHMATRIX = (1 << 1),
DIRTY_FOGCOLOR = (1 << 2),
DIRTY_FOGCOEF = (1 << 3),
DIRTY_TEXENV = (1 << 4),
DIRTY_ALPHACOLORREF = (1 << 5),
DIRTY_COLORREF = (1 << 6),
DIRTY_ALPHACOLORMASK = (1 << 7),
DIRTY_FOGCOLOR = (1 << 2),
DIRTY_FOGCOEF = (1 << 3),
DIRTY_TEXENV = (1 << 4),
DIRTY_ALPHACOLORREF = (1 << 5),
DIRTY_COLORREF = (1 << 6),
DIRTY_ALPHACOLORMASK = (1 << 7),
DIRTY_LIGHT0 = (1 << 8),
DIRTY_LIGHT1 = (1 << 9),
DIRTY_LIGHT2 = (1 << 10),
@ -178,7 +83,7 @@ public:
bool UseHWTransform() const { return useHWTransform_; }
LPDIRECT3DPIXELSHADER9 shader;
LPD3DXCONSTANTTABLE constant;
protected:
std::string source_;
bool failed_;
@ -187,7 +92,7 @@ protected:
class VSShader {
public:
VSShader(const char *code, bool useHWTransform);
VSShader(const char *code, int vertType, bool useHWTransform);
~VSShader();
const std::string &source() const { return source_; }
@ -196,7 +101,7 @@ public:
bool UseHWTransform() const { return useHWTransform_; }
LPDIRECT3DVERTEXSHADER9 shader;
LPD3DXCONSTANTTABLE constant;
protected:
std::string source_;
bool failed_;
@ -210,46 +115,47 @@ public:
~ShaderManagerDX9();
void ClearCache(bool deleteThem); // TODO: deleteThem currently not respected
LinkedShaderDX9 *ApplyShader(int prim, u32 vertType);
VSShader *ApplyShader(int prim, u32 vertType);
void DirtyShader();
void DirtyUniform(u32 what) {
globalDirty_ |= what;
}
void DirtyLastShader(); // disables vertex arrays
void DirtyLastShader();
int NumVertexShaders() const { return (int)vsCache_.size(); }
int NumFragmentShaders() const { return (int)fsCache_.size(); }
int NumPrograms() const { return (int)linkedShaderCache_.size(); }
private:
void PSUpdateUniforms(int dirtyUniforms);
void VSUpdateUniforms(int dirtyUniforms);
void PSSetColorUniform3Alpha255(int creg, u32 color, u8 alpha);
void PSSetColorUniform3(int creg, u32 color);
void VSSetMatrix4x3(int creg, const float *m4x3);
void VSSetColorUniform3(int creg, u32 color);
void VSSetColorUniform3ExtraFloat(int creg, u32 color, float extra);
void VSSetColorUniform3Alpha(int creg, u32 color, u8 alpha);
void VSSetMatrix(int creg, const float* pMatrix);
void VSSetFloat(int creg, float value);
void VSSetFloatArray(int creg, const float *value, int count);
void VSSetFloat24Uniform3(int creg, const u32 data[3]);
void Clear();
struct LinkedShaderCacheEntry {
LinkedShaderCacheEntry(VSShader *vs_, PSShader *fs_, LinkedShaderDX9 *ls_)
: vs(vs_), fs(fs_), ls(ls_) { }
VSShader *vs;
PSShader *fs;
LinkedShaderDX9 *ls;
};
typedef std::vector<LinkedShaderCacheEntry> LinkedShaderCache;
LinkedShaderCache linkedShaderCache_;
FragmentShaderIDDX9 lastFSID_;
VertexShaderIDDX9 lastVSID_;
LinkedShaderDX9 *lastShader_;
u32 globalDirty_;
u32 shaderSwitchDirty_;
char *codeBuffer_;
VSShader *lastVShader_;
PSShader *lastPShader_;
typedef std::map<FragmentShaderIDDX9, PSShader *> FSCache;
FSCache fsCache_;
typedef std::map<VertexShaderIDDX9, VSShader *> VSCache;
VSCache vsCache_;
};
};

View File

@ -271,7 +271,7 @@ static void LogDecFmtForDraw(const DecVtxFormat &decFmt) {
//pD3Ddevice->SetRenderState(D3DRS_FILLMODE, D3DFILL_WIREFRAME);
}
IDirect3DVertexDeclaration9 *TransformDrawEngineDX9::SetupDecFmtForDraw(LinkedShaderDX9 *program, const DecVtxFormat &decFmt, u32 pspFmt) {
IDirect3DVertexDeclaration9 *TransformDrawEngineDX9::SetupDecFmtForDraw(VSShader *vshader, const DecVtxFormat &decFmt, u32 pspFmt) {
auto vertexDeclCached = vertexDeclMap_.find(pspFmt);
if (vertexDeclCached == vertexDeclMap_.end()) {
@ -281,12 +281,12 @@ IDirect3DVertexDeclaration9 *TransformDrawEngineDX9::SetupDecFmtForDraw(LinkedSh
// Vertices Elements orders
// WEIGHT
if (decFmt.w0fmt != 0) {
VertexAttribSetup(VertexElement, decFmt.w0fmt, decFmt.w0off, D3DDECLUSAGE_BLENDWEIGHT, 0);
VertexAttribSetup(VertexElement, decFmt.w0fmt, decFmt.w0off, D3DDECLUSAGE_TEXCOORD, 1);
VertexElement++;
}
if (decFmt.w1fmt != 0) {
VertexAttribSetup(VertexElement, decFmt.w1fmt, decFmt.w1off, D3DDECLUSAGE_BLENDWEIGHT, 1);
VertexAttribSetup(VertexElement, decFmt.w1fmt, decFmt.w1off, D3DDECLUSAGE_TEXCOORD, 2);
VertexElement++;
}
@ -437,7 +437,7 @@ bool TransformDrawEngineDX9::IsReallyAClear(int numVerts) const {
// Actually again, single quads could be drawn more efficiently using GL_TRIANGLE_STRIP, no need to duplicate verts as for
// GL_TRIANGLES. Still need to sw transform to compute the extra two corners though.
void TransformDrawEngineDX9::SoftwareTransformAndDraw(
int prim, u8 *decoded, LinkedShaderDX9 *program, int vertexCount, u32 vertType, void *inds, int indexType, const DecVtxFormat &decVtxFormat, int maxIndex) {
int prim, u8 *decoded, int vertexCount, u32 vertType, void *inds, int indexType, const DecVtxFormat &decVtxFormat, int maxIndex) {
bool throughmode = (vertType & GE_VTYPE_THROUGH_MASK) != 0;
bool lmode = gstate.isUsingSecondaryColor() && gstate.isLightingEnabled();
@ -1057,9 +1057,9 @@ void TransformDrawEngineDX9::DoFlush() {
GEPrimitiveType prim = prevPrim_;
ApplyDrawState(prim);
LinkedShaderDX9 *program = shaderManager_->ApplyShader(prim, lastVType_);
VSShader *vshader = shaderManager_->ApplyShader(prim, lastVType_);
if (program->useHWTransform_) {
if (vshader->UseHWTransform()) {
LPDIRECT3DVERTEXBUFFER9 vb_ = NULL;
LPDIRECT3DINDEXBUFFER9 ib_ = NULL;
@ -1233,7 +1233,7 @@ rotateVBO:
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && ((hasColor && (gstate.materialupdate & 1)) || gstate.getMaterialAmbientA() == 255) && (!gstate.isLightingEnabled() || gstate.getAmbientA() == 255);
}
IDirect3DVertexDeclaration9 *pHardwareVertexDecl = SetupDecFmtForDraw(program, dec_->GetDecVtxFmt(), dec_->VertexType());
IDirect3DVertexDeclaration9 *pHardwareVertexDecl = SetupDecFmtForDraw(vshader, dec_->GetDecVtxFmt(), dec_->VertexType());
if (pHardwareVertexDecl) {
pD3Ddevice->SetVertexDeclaration(pHardwareVertexDecl);
@ -1272,7 +1272,7 @@ rotateVBO:
DEBUG_LOG(G3D, "Flush prim %i SW! %i verts in one go", prim, indexGen.VertexCount());
SoftwareTransformAndDraw(
prim, decoded, program, indexGen.VertexCount(),
prim, decoded, indexGen.VertexCount(),
dec_->VertexType(), (void *)decIndex, GE_VTYPE_IDX_16BIT, dec_->GetDecVtxFmt(),
indexGen.MaxIndex());
}

View File

@ -28,7 +28,7 @@ struct DecVtxFormat;
namespace DX9 {
class LinkedShaderDX9;
class VSShader;
class ShaderManagerDX9;
class TextureCacheDX9;
class FramebufferManagerDX9;
@ -144,10 +144,10 @@ public:
private:
void DoFlush();
void SoftwareTransformAndDraw(int prim, u8 *decoded, LinkedShaderDX9 *program, int vertexCount, u32 vertexType, void *inds, int indexType, const DecVtxFormat &decVtxFormat, int maxIndex);
void SoftwareTransformAndDraw(int prim, u8 *decoded, int vertexCount, u32 vertexType, void *inds, int indexType, const DecVtxFormat &decVtxFormat, int maxIndex);
void ApplyDrawState(int prim);
bool IsReallyAClear(int numVerts) const;
IDirect3DVertexDeclaration9 *SetupDecFmtForDraw(LinkedShaderDX9 *program, const DecVtxFormat &decFmt, u32 pspFmt);
IDirect3DVertexDeclaration9 *SetupDecFmtForDraw(VSShader *vshader, const DecVtxFormat &decFmt, u32 pspFmt);
// Preprocessing for spline/bezier
u32 NormalizeVertices(u8 *outPtr, u8 *bufPtr, const u8 *inPtr, VertexDecoderDX9 *dec, int lowerBound, int upperBound, u32 vertType);

View File

@ -112,14 +112,14 @@ void ComputeVertexShaderIDDX9(VertexShaderIDDX9 *id, u32 vertType, int prim, boo
static const char * const boneWeightAttrDecl[9] = {
"#ERROR#",
"float a_w1 :BLENDWEIGHT0;\n",
"float2 a_w1:BLENDWEIGHT0;\n",
"float3 a_w1:BLENDWEIGHT0;\n",
"float4 a_w1:BLENDWEIGHT0;\n",
"float4 a_w1:BLENDWEIGHT0;\n float a_w2 :BLENDWEIGHT1;\n",
"float4 a_w1:BLENDWEIGHT0;\n float2 a_w2:BLENDWEIGHT1;\n",
"float4 a_w1:BLENDWEIGHT0;\n float3 a_w2:BLENDWEIGHT1;\n",
"float4 a_w1:BLENDWEIGHT0;\n float4 a_w2:BLENDWEIGHT1;\n",
"float a_w1:TEXCOORD1;\n",
"float2 a_w1:TEXCOORD1;\n",
"float3 a_w1:TEXCOORD1;\n",
"float4 a_w1:TEXCOORD1;\n",
"float4 a_w1:TEXCOORD1;\n float a_w2:TEXCOORD2;\n",
"float4 a_w1:TEXCOORD1;\n float2 a_w2:TEXCOORD2;\n",
"float4 a_w1:TEXCOORD1;\n float3 a_w2:TEXCOORD2;\n",
"float4 a_w1:TEXCOORD1;\n float4 a_w2:TEXCOORD2;\n",
};
enum DoLightComputation {
@ -161,127 +161,119 @@ void GenerateVertexShaderDX9(int prim, char *buffer, bool useHWTransform) {
WRITE(p, "#pragma warning( disable : 3571 )\n");
if (gstate.isModeThrough()) {
WRITE(p, "float4x4 u_proj_through;\n");
WRITE(p, "float4x4 u_proj_through : register(c%i);\n", CONST_VS_PROJ_THROUGH);
} else {
WRITE(p, "float4x4 u_proj;\n");
WRITE(p, "float4x4 u_proj : register(c%i);\n", CONST_VS_PROJ);
// Add all the uniforms we'll need to transform properly.
}
if (enableFog) {
WRITE(p, "float2 u_fogcoef;\n");
WRITE(p, "float2 u_fogcoef : register(c%i);\n", CONST_VS_FOGCOEF);
}
if (useHWTransform || !hasColor)
WRITE(p, "float4 u_matambientalpha;\n"); // matambient + matalpha
WRITE(p, "float4 u_matambientalpha : register(c%i);\n", CONST_VS_MATAMBIENTALPHA); // matambient + matalpha
if (useHWTransform) {
// When transforming by hardware, we need a great deal more uniforms...
WRITE(p, "float4x4 u_world;\n");
WRITE(p, "float4x4 u_view;\n");
WRITE(p, "float4x4 u_world : register(c%i);\n", CONST_VS_WORLD);
WRITE(p, "float4x4 u_view : register(c%i);\n", CONST_VS_VIEW);
if (gstate.getUVGenMode() == 1)
WRITE(p, "float4x4 u_texmtx;\n");
if (vertTypeGetWeightMask(vertType) != GE_VTYPE_WEIGHT_NONE) {
WRITE(p, "float4x4 u_texmtx : register(c%i);\n", CONST_VS_TEXMTX);
if (vertTypeIsSkinningEnabled(vertType)) {
int numBones = TranslateNumBonesDX9(vertTypeGetNumBoneWeights(vertType));
#ifdef USE_BONE_ARRAY
WRITE(p, "float4x4 u_bone[%i];\n", numBones);
WRITE(p, "float4x4 u_bone[%i] : register(c%i);\n", numBones, CONST_VS_BONE0);
#else
for (int i = 0; i < numBones; i++) {
WRITE(p, "float4x4 u_bone%i;\n", i);
WRITE(p, "float4x4 u_bone%i : register(c%i);\n", i, CONST_VS_BONE0 + i * 4);
}
#endif
}
if (doTexture) {
WRITE(p, "float4 u_uvscaleoffset;\n");
WRITE(p, "float4 u_uvscaleoffset : register(c%i);\n", CONST_VS_UVSCALEOFFSET);
}
for (int i = 0; i < 4; i++) {
if (doLight[i] != LIGHT_OFF) {
// This is needed for shade mapping
WRITE(p, "float3 u_lightpos%i;\n", i);
WRITE(p, "float3 u_lightpos%i : register(c%i);\n", i, CONST_VS_LIGHTPOS + i);
}
if (doLight[i] == LIGHT_FULL) {
GELightType type = gstate.getLightType(i);
if (type != GE_LIGHTTYPE_DIRECTIONAL)
WRITE(p, "float3 u_lightatt%i;\n", i);
WRITE(p, "float3 u_lightatt%i : register(c%i);\n", i, CONST_VS_LIGHTATT + i);
if (type == GE_LIGHTTYPE_SPOT || type == GE_LIGHTTYPE_UNKNOWN) {
WRITE(p, "float3 u_lightdir%i;\n", i);
WRITE(p, "float u_lightangle%i;\n", i);
WRITE(p, "float u_lightspotCoef%i;\n", i);
WRITE(p, "float3 u_lightdir%i : register(c%i);\n", i, CONST_VS_LIGHTDIR + i);
WRITE(p, "float u_lightangle%i : register(c%i);\n", i, CONST_VS_LIGHTANGLE + i);
WRITE(p, "float u_lightspotCoef%i : register(c%i);\n", i, CONST_VS_LIGHTSPOTCOEF + i);
}
WRITE(p, "float3 u_lightambient%i;\n", i);
WRITE(p, "float3 u_lightdiffuse%i;\n", i);
WRITE(p, "float3 u_lightambient%i : register(c%i);\n", i, CONST_VS_LIGHTAMBIENT + i);
WRITE(p, "float3 u_lightdiffuse%i : register(c%i);\n", i, CONST_VS_LIGHTDIFFUSE + i);
if (gstate.isUsingSpecularLight(i))
WRITE(p, "float3 u_lightspecular%i;\n", i);
WRITE(p, "float3 u_lightspecular%i : register(c%i);\n", i, CONST_VS_LIGHTSPECULAR + i);
}
}
if (gstate.isLightingEnabled()) {
WRITE(p, "float4 u_ambient;\n");
WRITE(p, "float4 u_ambient : register(c%i);\n", CONST_VS_AMBIENT);
if ((gstate.materialupdate & 2) == 0 || !hasColor)
WRITE(p, "float3 u_matdiffuse;\n");
WRITE(p, "float3 u_matdiffuse : register(c%i);\n", CONST_VS_MATDIFFUSE);
// if ((gstate.materialupdate & 4) == 0)
WRITE(p, "float4 u_matspecular;\n"); // Specular coef is contained in alpha
WRITE(p, "float3 u_matemissive;\n");
WRITE(p, "float4 u_matspecular : register(c%i);\n", CONST_VS_MATSPECULAR); // Specular coef is contained in alpha
WRITE(p, "float3 u_matemissive : register(c%i);\n", CONST_VS_MATEMISSIVE);
}
}
if (useHWTransform) {
WRITE(p, " struct VS_IN \n");
WRITE(p, " \n");
WRITE(p, " { \n");
if (vertTypeGetWeightMask(vertType) != GE_VTYPE_WEIGHT_NONE) {
WRITE(p, "struct VS_IN { \n");
if (vertTypeIsSkinningEnabled(vertType)) {
WRITE(p, "%s", boneWeightAttrDecl[TranslateNumBonesDX9(vertTypeGetNumBoneWeights(vertType))]);
}
if (doTexture && hasTexcoord) {
if (doTextureProjection)
WRITE(p, " float3 texcoord: TEXCOORD0; \n");
WRITE(p, " float3 texcoord : TEXCOORD0;\n");
else
WRITE(p, " float2 texcoord: TEXCOORD0; \n");
WRITE(p, " float2 texcoord : TEXCOORD0;\n");
}
if (hasColor) {
WRITE(p, " float4 color0: COLOR0; \n");
WRITE(p, " float4 color0 : COLOR0;\n");
}
if (hasNormal) {
WRITE(p, " float3 normal: NORMAL; \n");
WRITE(p, " float3 normal : NORMAL;\n");
}
WRITE(p, " float3 position: POSITION; \n");
WRITE(p, " }; \n");
WRITE(p, " \n");
WRITE(p, " float3 position : POSITION;\n");
WRITE(p, "};\n");
} else {
WRITE(p, " struct VS_IN \n");
WRITE(p, " \n");
WRITE(p, " { \n");
WRITE(p, " float4 position : POSITION; \n");
WRITE(p, " float3 texcoord : TEXCOORD0; \n");
WRITE(p, " float4 color0 : COLOR0; \n");
WRITE(p, "struct VS_IN {\n");
WRITE(p, " float4 position : POSITION;\n");
WRITE(p, " float3 texcoord : TEXCOORD0;\n");
WRITE(p, " float4 color0 : COLOR0;\n");
// only software transform supplies color1 as vertex data
WRITE(p, " float4 color1 : COLOR1; \n");
WRITE(p, " }; \n");
WRITE(p, " float4 color1 : COLOR1;\n");
WRITE(p, "};\n");
}
WRITE(p, " struct VS_OUT \n");
WRITE(p, " { \n");
WRITE(p, " float4 gl_Position : POSITION; \n");
WRITE(p, "struct VS_OUT {\n");
WRITE(p, " float4 gl_Position : POSITION;\n");
if (doTexture) {
if (doTextureProjection)
WRITE(p, " float3 v_texcoord: TEXCOORD0; \n");
WRITE(p, " float3 v_texcoord: TEXCOORD0;\n");
else
WRITE(p, " float2 v_texcoord: TEXCOORD0; \n");
WRITE(p, " float2 v_texcoord: TEXCOORD0;\n");
}
WRITE(p, " float4 v_color0 : COLOR0; \n");
WRITE(p, " float4 v_color0 : COLOR0;\n");
if (lmode)
WRITE(p, " float3 v_color1 : COLOR1; \n");
WRITE(p, " float3 v_color1 : COLOR1;\n");
if (enableFog) {
WRITE(p, "float2 v_fogdepth: TEXCOORD1;\n");
WRITE(p, " float2 v_fogdepth: TEXCOORD1;\n");
}
WRITE(p, " }; \n");
WRITE(p, " \n");
WRITE(p, "};\n");
WRITE(p, " VS_OUT main( VS_IN In ) \n");
WRITE(p, " { \n");
WRITE(p, " VS_OUT Out = (VS_OUT)0; \n");
WRITE(p, "VS_OUT main(VS_IN In) {\n");
WRITE(p, " VS_OUT Out = (VS_OUT)0; \n");
if (!useHWTransform) {
// Simple pass-through of vertex data to fragment shader
if (doTexture) {
@ -310,7 +302,7 @@ void GenerateVertexShaderDX9(int prim, char *buffer, bool useHWTransform) {
}
} else {
// Step 1: World Transform / Skinning
if (vertTypeGetWeightMask(vertType) == GE_VTYPE_WEIGHT_NONE) {
if (!vertTypeIsSkinningEnabled(vertType)) {
// No skinning, just standard T&L.
WRITE(p, " float3 worldpos = mul(float4(In.position.xyz, 1.0), u_world).xyz;\n");
if (hasNormal)
@ -598,7 +590,7 @@ void GenerateVertexShaderDX9(int prim, char *buffer, bool useHWTransform) {
}
// WRITE(p, "Out.gl_Position.z = (Out.gl_Position.z + Out.gl_Position.w) * 0.5f;");
WRITE(p, " return Out; ");
WRITE(p, " return Out;\n");
WRITE(p, "}\n");
}

View File

@ -58,4 +58,34 @@ void GenerateVertexShaderDX9(int prim, char *buffer, bool useHWTransform);
// Collapse to less skinning shaders to reduce shader switching, which is expensive.
int TranslateNumBonesDX9(int bones);
#define CONST_VS_PROJ 0
#define CONST_VS_PROJ_THROUGH 4
#define CONST_VS_VIEW 8
#define CONST_VS_WORLD 12
#define CONST_VS_TEXMTX 16
#define CONST_VS_BONE0 20
#define CONST_VS_BONE1 24
#define CONST_VS_BONE2 28
#define CONST_VS_BONE3 32
#define CONST_VS_BONE4 36
#define CONST_VS_BONE5 40
#define CONST_VS_BONE6 44
#define CONST_VS_BONE7 48
#define CONST_VS_BONE8 52
#define CONST_VS_FOGCOEF 56
#define CONST_VS_UVSCALEOFFSET 57
#define CONST_VS_AMBIENT 58
#define CONST_VS_MATAMBIENTALPHA 59
#define CONST_VS_MATDIFFUSE 60
#define CONST_VS_MATSPECULAR 61
#define CONST_VS_MATEMISSIVE 62
#define CONST_VS_LIGHTPOS 64
#define CONST_VS_LIGHTDIR 68
#define CONST_VS_LIGHTATT 72
#define CONST_VS_LIGHTANGLE 76
#define CONST_VS_LIGHTSPOTCOEF 80
#define CONST_VS_LIGHTDIFFUSE 84
#define CONST_VS_LIGHTSPECULAR 88
#define CONST_VS_LIGHTAMBIENT 92
};

View File

@ -9,44 +9,38 @@ LPDIRECT3DDEVICE9EX pD3DdeviceEx = NULL;
LPDIRECT3D9 pD3D = NULL;
static const char * vscode =
" float4x4 matWVP : register(c0); "
" "
" struct VS_IN { "
" float4 ObjPos : POSITION; "
" float2 Uv : TEXCOORD0; " // Vertex color
" }; "
" "
" struct VS_OUT { "
" float4 ProjPos : POSITION; "
" float2 Uv : TEXCOORD0; " // Vertex color
" }; "
" "
" VS_OUT main( VS_IN In ) { "
" VS_OUT Out; "
" Out.ProjPos = In.ObjPos; " // Transform vertex into
" Out.Uv = In.Uv; "
" return Out; " // Transfer color
" } ";
"struct VS_IN {\n"
" float4 ObjPos : POSITION;\n"
" float2 Uv : TEXCOORD0;\n"
"};"
"struct VS_OUT {\n"
" float4 ProjPos : POSITION;\n"
" float2 Uv : TEXCOORD0;\n"
"};\n"
"VS_OUT main( VS_IN In ) {\n"
" VS_OUT Out;\n"
" Out.ProjPos = In.ObjPos;\n"
" Out.Uv = In.Uv;\n"
" return Out;\n"
"}\n";
//--------------------------------------------------------------------------------------
// Pixel shader
//--------------------------------------------------------------------------------------
static const char * pscode =
" sampler s: register(s0); "
" struct PS_IN { "
" float2 Uv : TEXCOORD0; "
" }; "
" "
" float4 main( PS_IN In ) : COLOR { "
" float4 c = tex2D(s, In.Uv) ; "
" c.a = 1.0f;"
" return c; "
" } ";
"sampler s: register(s0);\n"
"struct PS_IN {\n"
" float2 Uv : TEXCOORD0;\n"
"};\n"
"float4 main( PS_IN In ) : COLOR {\n"
" float4 c = tex2D(s, In.Uv);\n"
" c.a = 1.0f;\n"
" return c;\n"
"}\n";
IDirect3DVertexDeclaration9* pFramebufferVertexDecl = NULL;
static const D3DVERTEXELEMENT9 VertexElements[] =
{
static const D3DVERTEXELEMENT9 VertexElements[] = {
{ 0, 0, D3DDECLTYPE_FLOAT3, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_POSITION, 0 },
{ 0, 12, D3DDECLTYPE_FLOAT2, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD, 0 },
D3DDECL_END()
@ -54,8 +48,7 @@ static const D3DVERTEXELEMENT9 VertexElements[] =
IDirect3DVertexDeclaration9* pSoftVertexDecl = NULL;
static const D3DVERTEXELEMENT9 SoftTransVertexElements[] =
{
static const D3DVERTEXELEMENT9 SoftTransVertexElements[] = {
{ 0, 0, D3DDECLTYPE_FLOAT4, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_POSITION, 0 },
{ 0, 16, D3DDECLTYPE_FLOAT3, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_TEXCOORD, 0 },
{ 0, 28, D3DDECLTYPE_UBYTE4N, D3DDECLMETHOD_DEFAULT, D3DDECLUSAGE_COLOR, 0 },
@ -263,7 +256,6 @@ void DirectxInit(HWND window) {
// TODO
}
#ifdef _XBOX
pD3Ddevice->SetRingBufferParameters( &d3dr );
#endif

View File

@ -852,6 +852,32 @@ inline void ConvertMatrix4x3To4x4(float *m4x4, const float *m4x3) {
m4x4[15] = 1.0f;
}
inline void ConvertMatrix4x3To4x4Transposed(float *m4x4, const float *m4x3) {
m4x4[0] = m4x3[0];
m4x4[1] = m4x3[3];
m4x4[2] = m4x3[6];
m4x4[3] = m4x3[9];
m4x4[4] = m4x3[1];
m4x4[5] = m4x3[4];
m4x4[6] = m4x3[7];
m4x4[7] = m4x3[10];
m4x4[8] = m4x3[2];
m4x4[9] = m4x3[5];
m4x4[10] = m4x3[8];
m4x4[11] = m4x3[11];
m4x4[12] = 0.0f;
m4x4[13] = 0.0f;
m4x4[14] = 0.0f;
m4x4[15] = 1.0f;
}
inline void Transpose4x4(float out[16], const float in[16]) {
for (int i = 0; i < 4; i++) {
for (int j = 0; j < 4; j++) {
out[i * 4 + j] = in[j * 4 + i];
}
}
}
inline float Vec3Dot(const float v1[3], const float v2[3])
{