Collapse skinning shaders with #bones < 4 to a single one.

Significant perf win for skinned characters in FF:CC and maybe other games.
This commit is contained in:
Henrik Rydgard 2013-07-27 20:09:22 +02:00
parent 9add78722d
commit e36e976877
5 changed files with 50 additions and 21 deletions

View File

@ -115,14 +115,14 @@ LinkedShader::LinkedShader(Shader *vs, Shader *fs, bool useHWTransform)
u_world = glGetUniformLocation(program, "u_world");
u_texmtx = glGetUniformLocation(program, "u_texmtx");
if ((gstate.vertType & GE_VTYPE_WEIGHT_MASK) != 0)
numBones = gstate.getNumBoneWeights();
numBones = TranslateNumBones(gstate.getNumBoneWeights());
else
numBones = 0;
#ifdef USE_BONE_ARRAY
u_bone = glGetUniformLocation(program, "u_bone");
#else
for (int i = 0; i < numBones; i++) {
for (int i = 0; i < 8; i++) {
char name[10];
sprintf(name, "u_bone%i", i);
u_bone[i] = glGetUniformLocation(program, name);
@ -366,6 +366,7 @@ void LinkedShader::updateUniforms() {
// TODO: Could even set all bones in one go if they're all dirty.
#ifdef USE_BONE_ARRAY
if (u_bone != -1) {
float allBones[8 * 16];
@ -392,8 +393,7 @@ void LinkedShader::updateUniforms() {
#else
float bonetemp[16];
for (int i = 0; i < numBones; i++) {
// I've seen the -1 happen but I don't get it..
if ((dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) && u_bone[i] != -1) {
if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) {
ConvertMatrix4x3To4x4(gstate.boneMatrix + 12 * i, bonetemp);
glUniformMatrix4fv(u_bone[i], 1, GL_FALSE, bonetemp);
}

View File

@ -21,6 +21,7 @@
#include "../ge_constants.h"
#include "VertexDecoder.h"
#include "VertexShaderGenerator.h"
void PrintDecodedVertex(VertexReader &vtx) {
if (vtx.hasNormal())
@ -118,16 +119,22 @@ void VertexDecoder::Step_WeightsU8() const
{
u8 *wt = (u8 *)(decoded_ + decFmt.w0off);
const u8 *wdata = (const u8*)(ptr_);
for (int j = 0; j < nweights; j++)
int j;
for (j = 0; j < nweights; j++)
wt[j] = wdata[j];
while (j & 3) // Zero additional weights rounding up to 4.
wt[j++] = 0;
}
void VertexDecoder::Step_WeightsU16() const
{
u16 *wt = (u16 *)(decoded_ + decFmt.w0off);
const u16 *wdata = (const u16*)(ptr_);
for (int j = 0; j < nweights; j++)
int j;
for (j = 0; j < nweights; j++)
wt[j] = wdata[j];
while (j & 3) // Zero additional weights rounding up to 4.
wt[j++] = 0;
}
// Float weights should be uncommon, we can live with having to multiply these by 2.0
@ -137,9 +144,12 @@ void VertexDecoder::Step_WeightsFloat() const
{
float *wt = (float *)(decoded_ + decFmt.w0off);
const float *wdata = (const float*)(ptr_);
for (int i = 0; i < nweights; i++) {
wt[i] = wdata[i] * 0.5f;
int j;
for (j = 0; j < nweights; j++) {
wt[j] = wdata[j] * 0.5f;
}
while (j & 3) // Zero additional weights rounding up to 4.
wt[j++] = 0.0f;
}
void VertexDecoder::Step_TcU8() const
@ -562,6 +572,10 @@ static const StepFunction posstep_through[4] = {
};
int RoundUp4(int x) {
return (x + 3) & ~3;
}
void VertexDecoder::SetVertexType(u32 fmt) {
fmt_ = fmt;
throughmode = (fmt & GE_VTYPE_THROUGH) != 0;
@ -597,18 +611,22 @@ void VertexDecoder::SetVertexType(u32 fmt) {
fmtBase = DEC_U8_1;
} else if (weighttype == GE_VTYPE_WEIGHT_16BIT >> GE_VTYPE_WEIGHT_SHIFT) {
fmtBase = DEC_U16_1;
} else if (weighttype == GE_VTYPE_WEIGHT_FLOAT >> GE_VTYPE_WEIGHT_SHIFT) {
fmtBase = DEC_FLOAT_1;
}
if (nweights < 5) {
int numWeights = TranslateNumBones(nweights);
if (numWeights <= 4) {
decFmt.w0off = decOff;
decFmt.w0fmt = fmtBase + nweights - 1;
decFmt.w0fmt = fmtBase + numWeights - 1;
decOff += DecFmtSize(decFmt.w0fmt);
} else {
decFmt.w0off = decOff;
decFmt.w0fmt = fmtBase + 3;
decOff += DecFmtSize(decFmt.w0fmt);
decFmt.w1off = decOff;
decFmt.w1fmt = fmtBase + nweights - 5;
decFmt.w1fmt = fmtBase + numWeights - 5;
decOff += DecFmtSize(decFmt.w1fmt);
}
}

View File

@ -44,6 +44,13 @@ bool CanUseHardwareTransform(int prim) {
return !gstate.isModeThrough() && prim != GE_PRIM_RECTANGLES;
}
int TranslateNumBones(int bones) {
if (!bones) return 0;
if (bones < 4) return 4;
// if (bones < 8) return 8; I get drawing problems in FF:CC with this!
return bones;
}
// prim so we can special case for RECTANGLES :(
void ComputeVertexShaderID(VertexShaderID *id, int prim, bool useHWTransform) {
const u32 vertType = gstate.vertType;
@ -70,7 +77,6 @@ void ComputeVertexShaderID(VertexShaderID *id, int prim, bool useHWTransform) {
if (useHWTransform) {
id->d[0] |= 1 << 8;
id->d[0] |= (hasNormal & 1) << 9;
id->d[0] |= (hasBones & 1) << 10;
// UV generation mode
id->d[0] |= gstate.getUVGenMode() << 16;
@ -84,12 +90,11 @@ void ComputeVertexShaderID(VertexShaderID *id, int prim, bool useHWTransform) {
}
// Bones
id->d[0] |= (gstate.getNumBoneWeights() - 1) << 22;
if (hasBones)
id->d[0] |= (TranslateNumBones(gstate.getNumBoneWeights()) - 1) << 22;
// Okay, d[1] coming up. ==============
id->d[1] |= gstate.isLightingEnabled() << 24;
id->d[1] |= ((vertType & GE_VTYPE_WEIGHT_MASK) >> GE_VTYPE_WEIGHT_SHIFT) << 25;
if (gstate.isLightingEnabled() || gstate.getUVGenMode() == 2) {
// Light bits
for (int i = 0; i < 4; i++) {
@ -101,10 +106,13 @@ void ComputeVertexShaderID(VertexShaderID *id, int prim, bool useHWTransform) {
id->d[1] |= (gstate.isLightChanEnabled(i) & 1) << (20 + i);
}
}
id->d[1] |= gstate.isLightingEnabled() << 24;
id->d[1] |= ((vertType & GE_VTYPE_WEIGHT_MASK) >> GE_VTYPE_WEIGHT_SHIFT) << 25;
}
}
static const char * const boneWeightAttrDecl[8] = {
static const char * const boneWeightAttrDecl[9] = {
"#ERROR#",
"attribute mediump float a_w1;\n",
"attribute mediump vec2 a_w1;\n",
"attribute mediump vec3 a_w1;\n",
@ -112,7 +120,7 @@ static const char * const boneWeightAttrDecl[8] = {
"attribute mediump vec4 a_w1;\nattribute mediump float a_w2;\n",
"attribute mediump vec4 a_w1;\nattribute mediump vec2 a_w2;\n",
"attribute mediump vec4 a_w1;\nattribute mediump vec3 a_w2;\n",
"attribute mediump vec4 a_w1;\nattribute mediump vec4 a_w2;\n",
"attribute mediump vec4 a_w1, a_w2;\n",
};
enum DoLightComputation {
@ -165,7 +173,7 @@ void GenerateVertexShader(int prim, char *buffer, bool useHWTransform) {
}
if ((vertType & GE_VTYPE_WEIGHT_MASK) != GE_VTYPE_WEIGHT_NONE) {
WRITE(p, "%s", boneWeightAttrDecl[gstate.getNumBoneWeights() - 1]);
WRITE(p, "%s", boneWeightAttrDecl[TranslateNumBones(gstate.getNumBoneWeights())]);
}
if (useHWTransform)
@ -202,7 +210,7 @@ void GenerateVertexShader(int prim, char *buffer, bool useHWTransform) {
if (gstate.getUVGenMode() == 1)
WRITE(p, "uniform mediump mat4 u_texmtx;\n");
if ((vertType & GE_VTYPE_WEIGHT_MASK) != GE_VTYPE_WEIGHT_NONE) {
int numBones = 1 + ((vertType & GE_VTYPE_WEIGHTCOUNT_MASK) >> GE_VTYPE_WEIGHTCOUNT_SHIFT);
int numBones = TranslateNumBones(gstate.getNumBoneWeights());
#ifdef USE_BONE_ARRAY
WRITE(p, "uniform mediump mat4 u_bone[%i];\n", numBones);
#else
@ -298,7 +306,7 @@ void GenerateVertexShader(int prim, char *buffer, bool useHWTransform) {
else
WRITE(p, " vec3 worldnormal = vec3(0.0, 0.0, 1.0);\n");
} else {
int numWeights = 1 + ((vertType & GE_VTYPE_WEIGHTCOUNT_MASK) >> GE_VTYPE_WEIGHTCOUNT_SHIFT);
int numWeights = TranslateNumBones(gstate.getNumBoneWeights());
static const float rescale[4] = {0, 2*127.5f/128.f, 2*32767.5f/32768.f, 2.0f};
float factor = rescale[(vertType & GE_VTYPE_WEIGHT_MASK) >> GE_VTYPE_WEIGHT_SHIFT];

View File

@ -52,3 +52,6 @@ bool CanUseHardwareTransform(int prim);
void ComputeVertexShaderID(VertexShaderID *id, int prim, bool useHWTransform);
void GenerateVertexShader(int prim, char *buffer, bool useHWTransform);
// Collapse to less skinning shaders to reduce shader switching, which is expensive.
int TranslateNumBones(int bones);

2
native

@ -1 +1 @@
Subproject commit 31274a78c53fe0609ec4f50fb3daccdb4c89ceac
Subproject commit cdfa331775a8edc170f89d3b4af5b0c51ed6195c