mirror of
https://github.com/hrydgard/ppsspp.git
synced 2024-11-23 13:30:02 +00:00
Collapse skinning shaders with #bones < 4 to a single one.
Significant perf win for skinned characters in FF:CC and maybe other games.
This commit is contained in:
parent
9add78722d
commit
e36e976877
@ -115,14 +115,14 @@ LinkedShader::LinkedShader(Shader *vs, Shader *fs, bool useHWTransform)
|
||||
u_world = glGetUniformLocation(program, "u_world");
|
||||
u_texmtx = glGetUniformLocation(program, "u_texmtx");
|
||||
if ((gstate.vertType & GE_VTYPE_WEIGHT_MASK) != 0)
|
||||
numBones = gstate.getNumBoneWeights();
|
||||
numBones = TranslateNumBones(gstate.getNumBoneWeights());
|
||||
else
|
||||
numBones = 0;
|
||||
|
||||
#ifdef USE_BONE_ARRAY
|
||||
u_bone = glGetUniformLocation(program, "u_bone");
|
||||
#else
|
||||
for (int i = 0; i < numBones; i++) {
|
||||
for (int i = 0; i < 8; i++) {
|
||||
char name[10];
|
||||
sprintf(name, "u_bone%i", i);
|
||||
u_bone[i] = glGetUniformLocation(program, name);
|
||||
@ -366,6 +366,7 @@ void LinkedShader::updateUniforms() {
|
||||
|
||||
// TODO: Could even set all bones in one go if they're all dirty.
|
||||
#ifdef USE_BONE_ARRAY
|
||||
|
||||
if (u_bone != -1) {
|
||||
float allBones[8 * 16];
|
||||
|
||||
@ -392,8 +393,7 @@ void LinkedShader::updateUniforms() {
|
||||
#else
|
||||
float bonetemp[16];
|
||||
for (int i = 0; i < numBones; i++) {
|
||||
// I've seen the -1 happen but I don't get it..
|
||||
if ((dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) && u_bone[i] != -1) {
|
||||
if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) {
|
||||
ConvertMatrix4x3To4x4(gstate.boneMatrix + 12 * i, bonetemp);
|
||||
glUniformMatrix4fv(u_bone[i], 1, GL_FALSE, bonetemp);
|
||||
}
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include "../ge_constants.h"
|
||||
|
||||
#include "VertexDecoder.h"
|
||||
#include "VertexShaderGenerator.h"
|
||||
|
||||
void PrintDecodedVertex(VertexReader &vtx) {
|
||||
if (vtx.hasNormal())
|
||||
@ -118,16 +119,22 @@ void VertexDecoder::Step_WeightsU8() const
|
||||
{
|
||||
u8 *wt = (u8 *)(decoded_ + decFmt.w0off);
|
||||
const u8 *wdata = (const u8*)(ptr_);
|
||||
for (int j = 0; j < nweights; j++)
|
||||
int j;
|
||||
for (j = 0; j < nweights; j++)
|
||||
wt[j] = wdata[j];
|
||||
while (j & 3) // Zero additional weights rounding up to 4.
|
||||
wt[j++] = 0;
|
||||
}
|
||||
|
||||
void VertexDecoder::Step_WeightsU16() const
|
||||
{
|
||||
u16 *wt = (u16 *)(decoded_ + decFmt.w0off);
|
||||
const u16 *wdata = (const u16*)(ptr_);
|
||||
for (int j = 0; j < nweights; j++)
|
||||
int j;
|
||||
for (j = 0; j < nweights; j++)
|
||||
wt[j] = wdata[j];
|
||||
while (j & 3) // Zero additional weights rounding up to 4.
|
||||
wt[j++] = 0;
|
||||
}
|
||||
|
||||
// Float weights should be uncommon, we can live with having to multiply these by 2.0
|
||||
@ -137,9 +144,12 @@ void VertexDecoder::Step_WeightsFloat() const
|
||||
{
|
||||
float *wt = (float *)(decoded_ + decFmt.w0off);
|
||||
const float *wdata = (const float*)(ptr_);
|
||||
for (int i = 0; i < nweights; i++) {
|
||||
wt[i] = wdata[i] * 0.5f;
|
||||
int j;
|
||||
for (j = 0; j < nweights; j++) {
|
||||
wt[j] = wdata[j] * 0.5f;
|
||||
}
|
||||
while (j & 3) // Zero additional weights rounding up to 4.
|
||||
wt[j++] = 0.0f;
|
||||
}
|
||||
|
||||
void VertexDecoder::Step_TcU8() const
|
||||
@ -562,6 +572,10 @@ static const StepFunction posstep_through[4] = {
|
||||
};
|
||||
|
||||
|
||||
int RoundUp4(int x) {
|
||||
return (x + 3) & ~3;
|
||||
}
|
||||
|
||||
void VertexDecoder::SetVertexType(u32 fmt) {
|
||||
fmt_ = fmt;
|
||||
throughmode = (fmt & GE_VTYPE_THROUGH) != 0;
|
||||
@ -597,18 +611,22 @@ void VertexDecoder::SetVertexType(u32 fmt) {
|
||||
fmtBase = DEC_U8_1;
|
||||
} else if (weighttype == GE_VTYPE_WEIGHT_16BIT >> GE_VTYPE_WEIGHT_SHIFT) {
|
||||
fmtBase = DEC_U16_1;
|
||||
} else if (weighttype == GE_VTYPE_WEIGHT_FLOAT >> GE_VTYPE_WEIGHT_SHIFT) {
|
||||
fmtBase = DEC_FLOAT_1;
|
||||
}
|
||||
|
||||
if (nweights < 5) {
|
||||
int numWeights = TranslateNumBones(nweights);
|
||||
|
||||
if (numWeights <= 4) {
|
||||
decFmt.w0off = decOff;
|
||||
decFmt.w0fmt = fmtBase + nweights - 1;
|
||||
decFmt.w0fmt = fmtBase + numWeights - 1;
|
||||
decOff += DecFmtSize(decFmt.w0fmt);
|
||||
} else {
|
||||
decFmt.w0off = decOff;
|
||||
decFmt.w0fmt = fmtBase + 3;
|
||||
decOff += DecFmtSize(decFmt.w0fmt);
|
||||
decFmt.w1off = decOff;
|
||||
decFmt.w1fmt = fmtBase + nweights - 5;
|
||||
decFmt.w1fmt = fmtBase + numWeights - 5;
|
||||
decOff += DecFmtSize(decFmt.w1fmt);
|
||||
}
|
||||
}
|
||||
|
@ -44,6 +44,13 @@ bool CanUseHardwareTransform(int prim) {
|
||||
return !gstate.isModeThrough() && prim != GE_PRIM_RECTANGLES;
|
||||
}
|
||||
|
||||
int TranslateNumBones(int bones) {
|
||||
if (!bones) return 0;
|
||||
if (bones < 4) return 4;
|
||||
// if (bones < 8) return 8; I get drawing problems in FF:CC with this!
|
||||
return bones;
|
||||
}
|
||||
|
||||
// prim so we can special case for RECTANGLES :(
|
||||
void ComputeVertexShaderID(VertexShaderID *id, int prim, bool useHWTransform) {
|
||||
const u32 vertType = gstate.vertType;
|
||||
@ -70,7 +77,6 @@ void ComputeVertexShaderID(VertexShaderID *id, int prim, bool useHWTransform) {
|
||||
if (useHWTransform) {
|
||||
id->d[0] |= 1 << 8;
|
||||
id->d[0] |= (hasNormal & 1) << 9;
|
||||
id->d[0] |= (hasBones & 1) << 10;
|
||||
|
||||
// UV generation mode
|
||||
id->d[0] |= gstate.getUVGenMode() << 16;
|
||||
@ -84,12 +90,11 @@ void ComputeVertexShaderID(VertexShaderID *id, int prim, bool useHWTransform) {
|
||||
}
|
||||
|
||||
// Bones
|
||||
id->d[0] |= (gstate.getNumBoneWeights() - 1) << 22;
|
||||
if (hasBones)
|
||||
id->d[0] |= (TranslateNumBones(gstate.getNumBoneWeights()) - 1) << 22;
|
||||
|
||||
// Okay, d[1] coming up. ==============
|
||||
|
||||
id->d[1] |= gstate.isLightingEnabled() << 24;
|
||||
id->d[1] |= ((vertType & GE_VTYPE_WEIGHT_MASK) >> GE_VTYPE_WEIGHT_SHIFT) << 25;
|
||||
if (gstate.isLightingEnabled() || gstate.getUVGenMode() == 2) {
|
||||
// Light bits
|
||||
for (int i = 0; i < 4; i++) {
|
||||
@ -101,10 +106,13 @@ void ComputeVertexShaderID(VertexShaderID *id, int prim, bool useHWTransform) {
|
||||
id->d[1] |= (gstate.isLightChanEnabled(i) & 1) << (20 + i);
|
||||
}
|
||||
}
|
||||
id->d[1] |= gstate.isLightingEnabled() << 24;
|
||||
id->d[1] |= ((vertType & GE_VTYPE_WEIGHT_MASK) >> GE_VTYPE_WEIGHT_SHIFT) << 25;
|
||||
}
|
||||
}
|
||||
|
||||
static const char * const boneWeightAttrDecl[8] = {
|
||||
static const char * const boneWeightAttrDecl[9] = {
|
||||
"#ERROR#",
|
||||
"attribute mediump float a_w1;\n",
|
||||
"attribute mediump vec2 a_w1;\n",
|
||||
"attribute mediump vec3 a_w1;\n",
|
||||
@ -112,7 +120,7 @@ static const char * const boneWeightAttrDecl[8] = {
|
||||
"attribute mediump vec4 a_w1;\nattribute mediump float a_w2;\n",
|
||||
"attribute mediump vec4 a_w1;\nattribute mediump vec2 a_w2;\n",
|
||||
"attribute mediump vec4 a_w1;\nattribute mediump vec3 a_w2;\n",
|
||||
"attribute mediump vec4 a_w1;\nattribute mediump vec4 a_w2;\n",
|
||||
"attribute mediump vec4 a_w1, a_w2;\n",
|
||||
};
|
||||
|
||||
enum DoLightComputation {
|
||||
@ -165,7 +173,7 @@ void GenerateVertexShader(int prim, char *buffer, bool useHWTransform) {
|
||||
}
|
||||
|
||||
if ((vertType & GE_VTYPE_WEIGHT_MASK) != GE_VTYPE_WEIGHT_NONE) {
|
||||
WRITE(p, "%s", boneWeightAttrDecl[gstate.getNumBoneWeights() - 1]);
|
||||
WRITE(p, "%s", boneWeightAttrDecl[TranslateNumBones(gstate.getNumBoneWeights())]);
|
||||
}
|
||||
|
||||
if (useHWTransform)
|
||||
@ -202,7 +210,7 @@ void GenerateVertexShader(int prim, char *buffer, bool useHWTransform) {
|
||||
if (gstate.getUVGenMode() == 1)
|
||||
WRITE(p, "uniform mediump mat4 u_texmtx;\n");
|
||||
if ((vertType & GE_VTYPE_WEIGHT_MASK) != GE_VTYPE_WEIGHT_NONE) {
|
||||
int numBones = 1 + ((vertType & GE_VTYPE_WEIGHTCOUNT_MASK) >> GE_VTYPE_WEIGHTCOUNT_SHIFT);
|
||||
int numBones = TranslateNumBones(gstate.getNumBoneWeights());
|
||||
#ifdef USE_BONE_ARRAY
|
||||
WRITE(p, "uniform mediump mat4 u_bone[%i];\n", numBones);
|
||||
#else
|
||||
@ -298,7 +306,7 @@ void GenerateVertexShader(int prim, char *buffer, bool useHWTransform) {
|
||||
else
|
||||
WRITE(p, " vec3 worldnormal = vec3(0.0, 0.0, 1.0);\n");
|
||||
} else {
|
||||
int numWeights = 1 + ((vertType & GE_VTYPE_WEIGHTCOUNT_MASK) >> GE_VTYPE_WEIGHTCOUNT_SHIFT);
|
||||
int numWeights = TranslateNumBones(gstate.getNumBoneWeights());
|
||||
|
||||
static const float rescale[4] = {0, 2*127.5f/128.f, 2*32767.5f/32768.f, 2.0f};
|
||||
float factor = rescale[(vertType & GE_VTYPE_WEIGHT_MASK) >> GE_VTYPE_WEIGHT_SHIFT];
|
||||
|
@ -52,3 +52,6 @@ bool CanUseHardwareTransform(int prim);
|
||||
|
||||
void ComputeVertexShaderID(VertexShaderID *id, int prim, bool useHWTransform);
|
||||
void GenerateVertexShader(int prim, char *buffer, bool useHWTransform);
|
||||
|
||||
// Collapse to less skinning shaders to reduce shader switching, which is expensive.
|
||||
int TranslateNumBones(int bones);
|
||||
|
2
native
2
native
@ -1 +1 @@
|
||||
Subproject commit 31274a78c53fe0609ec4f50fb3daccdb4c89ceac
|
||||
Subproject commit cdfa331775a8edc170f89d3b4af5b0c51ed6195c
|
Loading…
Reference in New Issue
Block a user