D3D vtxdec: Convert weights and normals to float

This commit is contained in:
Henrik Rydgard 2014-09-12 01:38:45 +02:00
parent 0727df6f0a
commit 1407648b59
5 changed files with 94 additions and 19 deletions

View File

@ -160,6 +160,30 @@ void VertexDecoder::Step_WeightsU16() const
wt[j++] = 0; wt[j++] = 0;
} }
void VertexDecoder::Step_WeightsU8ToFloat() const
{
float *wt = (float *)(decoded_ + decFmt.w0off);
const u8 *wdata = (const u8*)(ptr_);
int j;
for (j = 0; j < nweights; j++) {
wt[j] = (float)wdata[j] * (1.0f / 128.0f);
}
while (j & 3) // Zero additional weights rounding up to 4.
wt[j++] = 0;
}
void VertexDecoder::Step_WeightsU16ToFloat() const
{
float *wt = (float *)(decoded_ + decFmt.w0off);
const u16 *wdata = (const u16*)(ptr_);
int j;
for (j = 0; j < nweights; j++) {
wt[j] = (float)wdata[j] * (1.0f / 32768.0f);
}
while (j & 3) // Zero additional weights rounding up to 4.
wt[j++] = 0;
}
// Float weights should be uncommon, we can live with having to multiply these by 2.0 // Float weights should be uncommon, we can live with having to multiply these by 2.0
// to avoid special checks in the vertex shader generator. // to avoid special checks in the vertex shader generator.
// (PSP uses 0.0-2.0 fixed point numbers for weights) // (PSP uses 0.0-2.0 fixed point numbers for weights)
@ -459,6 +483,15 @@ void VertexDecoder::Step_NormalS8() const
normal[3] = 0; normal[3] = 0;
} }
void VertexDecoder::Step_NormalS8ToFloat() const
{
float *normal = (float *)(decoded_ + decFmt.nrmoff);
const s8 *sv = (const s8*)(ptr_ + nrmoff);
normal[0] = sv[0] * (1.0f / 128.0f);
normal[1] = sv[1] * (1.0f / 128.0f);
normal[2] = sv[2] * (1.0f / 128.0f);
}
void VertexDecoder::Step_NormalS16() const void VertexDecoder::Step_NormalS16() const
{ {
s16 *normal = (s16 *)(decoded_ + decFmt.nrmoff); s16 *normal = (s16 *)(decoded_ + decFmt.nrmoff);
@ -649,6 +682,13 @@ static const StepFunction wtstep[4] = {
&VertexDecoder::Step_WeightsFloat, &VertexDecoder::Step_WeightsFloat,
}; };
static const StepFunction wtstepToFloat[4] = {
0,
&VertexDecoder::Step_WeightsU8ToFloat,
&VertexDecoder::Step_WeightsU16ToFloat,
&VertexDecoder::Step_WeightsFloat,
};
static const StepFunction wtstep_skin[4] = { static const StepFunction wtstep_skin[4] = {
0, 0,
&VertexDecoder::Step_WeightsU8Skin, &VertexDecoder::Step_WeightsU8Skin,
@ -746,6 +786,13 @@ static const StepFunction nrmstep[4] = {
&VertexDecoder::Step_NormalFloat, &VertexDecoder::Step_NormalFloat,
}; };
static const StepFunction nrmstep8BitToFloat[4] = {
0,
&VertexDecoder::Step_NormalS8ToFloat,
&VertexDecoder::Step_NormalS16,
&VertexDecoder::Step_NormalFloat,
};
static const StepFunction nrmstep_skin[4] = { static const StepFunction nrmstep_skin[4] = {
0, 0,
&VertexDecoder::Step_NormalS8Skin, &VertexDecoder::Step_NormalS8Skin,
@ -825,17 +872,21 @@ void VertexDecoder::SetVertexType(u32 fmt, const VertexDecoderOptions &options,
if (skinInDecode) { if (skinInDecode) {
steps_[numSteps_++] = wtstep_skin[weighttype]; steps_[numSteps_++] = wtstep_skin[weighttype];
// No visible output // No visible output, passed in register/external memory to the "pos" step.
} else { } else {
steps_[numSteps_++] = wtstep[weighttype];
int fmtBase = DEC_FLOAT_1; int fmtBase = DEC_FLOAT_1;
if (weighttype == GE_VTYPE_WEIGHT_8BIT >> GE_VTYPE_WEIGHT_SHIFT) { if (options.expandAllWeightsToFloat) {
fmtBase = DEC_U8_1; steps_[numSteps_++] = wtstepToFloat[weighttype];
} else if (weighttype == GE_VTYPE_WEIGHT_16BIT >> GE_VTYPE_WEIGHT_SHIFT) {
fmtBase = DEC_U16_1;
} else if (weighttype == GE_VTYPE_WEIGHT_FLOAT >> GE_VTYPE_WEIGHT_SHIFT) {
fmtBase = DEC_FLOAT_1; fmtBase = DEC_FLOAT_1;
} else {
steps_[numSteps_++] = wtstep[weighttype];
if (weighttype == GE_VTYPE_WEIGHT_8BIT >> GE_VTYPE_WEIGHT_SHIFT) {
fmtBase = DEC_U8_1;
} else if (weighttype == GE_VTYPE_WEIGHT_16BIT >> GE_VTYPE_WEIGHT_SHIFT) {
fmtBase = DEC_U16_1;
} else if (weighttype == GE_VTYPE_WEIGHT_FLOAT >> GE_VTYPE_WEIGHT_SHIFT) {
fmtBase = DEC_FLOAT_1;
}
} }
int numWeights = TranslateNumBones(nweights); int numWeights = TranslateNumBones(nweights);
@ -927,14 +978,26 @@ void VertexDecoder::SetVertexType(u32 fmt, const VertexDecoderOptions &options,
// After skinning, we always have three floats. // After skinning, we always have three floats.
decFmt.nrmfmt = DEC_FLOAT_3; decFmt.nrmfmt = DEC_FLOAT_3;
} else { } else {
steps_[numSteps_++] = morphcount == 1 ? nrmstep[nrm] : nrmstep_morph[nrm];
if (morphcount == 1) { if (morphcount == 1) {
// The normal formats match the gl formats perfectly, let's use 'em. // The 8-bit and 16-bit normal formats match GL formats nicely, and the 16-bit normal format matches a D3D format so let's use them where possible.
switch (nrm) { switch (nrm) {
case GE_VTYPE_NRM_8BIT >> GE_VTYPE_NRM_SHIFT: decFmt.nrmfmt = DEC_S8_3; break; case GE_VTYPE_NRM_8BIT >> GE_VTYPE_NRM_SHIFT:
case GE_VTYPE_NRM_16BIT >> GE_VTYPE_NRM_SHIFT: decFmt.nrmfmt = DEC_S16_3; break; if (options.expand8BitNormalsToFloat) {
case GE_VTYPE_NRM_FLOAT >> GE_VTYPE_NRM_SHIFT: decFmt.nrmfmt = DEC_FLOAT_3; break; decFmt.nrmfmt = DEC_FLOAT_3;
steps_[numSteps_++] = morphcount == 1 ? nrmstep8BitToFloat[nrm] : nrmstep_morph[nrm];
} else {
decFmt.nrmfmt = DEC_S8_3;
steps_[numSteps_++] = morphcount == 1 ? nrmstep[nrm] : nrmstep_morph[nrm];
}
break;
case GE_VTYPE_NRM_16BIT >> GE_VTYPE_NRM_SHIFT:
decFmt.nrmfmt = DEC_S16_3;
steps_[numSteps_++] = morphcount == 1 ? nrmstep[nrm] : nrmstep_morph[nrm];
break;
case GE_VTYPE_NRM_FLOAT >> GE_VTYPE_NRM_SHIFT:
decFmt.nrmfmt = DEC_FLOAT_3;
steps_[numSteps_++] = morphcount == 1 ? nrmstep[nrm] : nrmstep_morph[nrm];
break;
} }
} else { } else {
decFmt.nrmfmt = DEC_FLOAT_3; decFmt.nrmfmt = DEC_FLOAT_3;

View File

@ -435,6 +435,8 @@ typedef void(*JittedVertexDecoder)(const u8 *src, u8 *dst, int count);
struct VertexDecoderOptions { struct VertexDecoderOptions {
bool expandAllUVtoFloat; bool expandAllUVtoFloat;
bool expandAllWeightsToFloat;
bool expand8BitNormalsToFloat;
}; };
class VertexDecoder class VertexDecoder
@ -457,6 +459,8 @@ public:
void Step_WeightsU8() const; void Step_WeightsU8() const;
void Step_WeightsU16() const; void Step_WeightsU16() const;
void Step_WeightsU8ToFloat() const;
void Step_WeightsU16ToFloat() const;
void Step_WeightsFloat() const; void Step_WeightsFloat() const;
void Step_WeightsU8Skin() const; void Step_WeightsU8Skin() const;
@ -492,6 +496,7 @@ public:
void Step_Color8888Morph() const; void Step_Color8888Morph() const;
void Step_NormalS8() const; void Step_NormalS8() const;
void Step_NormalS8ToFloat() const;
void Step_NormalS16() const; void Step_NormalS16() const;
void Step_NormalFloat() const; void Step_NormalFloat() const;
@ -627,6 +632,7 @@ public:
void Jit_Color5551(); void Jit_Color5551();
void Jit_NormalS8(); void Jit_NormalS8();
void Jit_NormalS8ToFloat();
void Jit_NormalS16(); void Jit_NormalS16();
void Jit_NormalFloat(); void Jit_NormalFloat();
@ -635,6 +641,7 @@ public:
void Jit_NormalFloatSkin(); void Jit_NormalFloatSkin();
void Jit_PosS8(); void Jit_PosS8();
void Jit_PosS8ToFloat();
void Jit_PosS16(); void Jit_PosS16();
void Jit_PosFloat(); void Jit_PosFloat();
void Jit_PosS8Through(); void Jit_PosS8Through();

View File

@ -109,6 +109,7 @@ static const JitLookup jitLookup[] = {
{&VertexDecoder::Step_TcU16ThroughDouble, &VertexDecoderJitCache::Jit_TcU16ThroughDouble}, {&VertexDecoder::Step_TcU16ThroughDouble, &VertexDecoderJitCache::Jit_TcU16ThroughDouble},
{&VertexDecoder::Step_NormalS8, &VertexDecoderJitCache::Jit_NormalS8}, {&VertexDecoder::Step_NormalS8, &VertexDecoderJitCache::Jit_NormalS8},
{&VertexDecoder::Step_NormalS8ToFloat, &VertexDecoderJitCache::Jit_NormalS8ToFloat},
{&VertexDecoder::Step_NormalS16, &VertexDecoderJitCache::Jit_NormalS16}, {&VertexDecoder::Step_NormalS16, &VertexDecoderJitCache::Jit_NormalS16},
{&VertexDecoder::Step_NormalFloat, &VertexDecoderJitCache::Jit_NormalFloat}, {&VertexDecoder::Step_NormalFloat, &VertexDecoderJitCache::Jit_NormalFloat},
@ -956,6 +957,11 @@ void VertexDecoderJitCache::Jit_NormalS8() {
MOV(32, MDisp(dstReg, dec_->decFmt.nrmoff), R(tempReg1)); MOV(32, MDisp(dstReg, dec_->decFmt.nrmoff), R(tempReg1));
} }
void VertexDecoderJitCache::Jit_NormalS8ToFloat() {
Jit_AnyS8ToFloat(dec_->nrmoff);
MOVUPS(MDisp(dstReg, dec_->decFmt.nrmoff), XMM3);
}
// Copy 6 bytes and then 2 zeroes. // Copy 6 bytes and then 2 zeroes.
void VertexDecoderJitCache::Jit_NormalS16() { void VertexDecoderJitCache::Jit_NormalS16() {
MOV(32, R(tempReg1), MDisp(srcReg, dec_->nrmoff)); MOV(32, R(tempReg1), MDisp(srcReg, dec_->nrmoff));

View File

@ -147,6 +147,8 @@ TransformDrawEngineDX9::TransformDrawEngineDX9()
memset(&decOptions_, 0, sizeof(decOptions_)); memset(&decOptions_, 0, sizeof(decOptions_));
decOptions_.expandAllUVtoFloat = true; decOptions_.expandAllUVtoFloat = true;
decOptions_.expandAllWeightsToFloat = true;
decOptions_.expand8BitNormalsToFloat = true;
decimationCounter_ = VERTEXCACHE_DECIMATION_INTERVAL; decimationCounter_ = VERTEXCACHE_DECIMATION_INTERVAL;
// Allocate nicely aligned memory. Maybe graphics drivers will // Allocate nicely aligned memory. Maybe graphics drivers will

View File

@ -312,9 +312,6 @@ void GenerateVertexShaderDX9(int prim, char *buffer, bool useHWTransform) {
} else { } else {
int numWeights = TranslateNumBonesDX9(vertTypeGetNumBoneWeights(vertType)); int numWeights = TranslateNumBonesDX9(vertTypeGetNumBoneWeights(vertType));
static const char *rescale[4] = {"", " * 1.9921875", " * 1.999969482421875", ""}; // 2*127.5f/128.f, 2*32767.5f/32768.f, 1.0f};
const char *factor = rescale[vertTypeGetWeightMask(vertType) >> GE_VTYPE_WEIGHT_SHIFT];
static const char * const boneWeightAttr[8] = { static const char * const boneWeightAttr[8] = {
"a_w1.x", "a_w1.y", "a_w1.z", "a_w1.w", "a_w1.x", "a_w1.y", "a_w1.z", "a_w1.w",
"a_w2.x", "a_w2.y", "a_w2.z", "a_w2.w", "a_w2.x", "a_w2.y", "a_w2.z", "a_w2.w",
@ -377,11 +374,11 @@ void GenerateVertexShaderDX9(int prim, char *buffer, bool useHWTransform) {
WRITE(p, ";\n"); WRITE(p, ";\n");
// Trying to simplify this results in bugs in LBP... // Trying to simplify this results in bugs in LBP...
WRITE(p, " float3 skinnedpos = mul(float4(In.position.xyz, 1.0), skinMatrix).xyz %s;\n", factor); WRITE(p, " float3 skinnedpos = mul(float4(In.position.xyz, 1.0), skinMatrix).xyz;\n");
WRITE(p, " float3 worldpos = mul(float4(skinnedpos, 1.0), u_world).xyz;\n"); WRITE(p, " float3 worldpos = mul(float4(skinnedpos, 1.0), u_world).xyz;\n");
if (hasNormal) { if (hasNormal) {
WRITE(p, " float3 skinnednormal = mul(float4(%sIn.normal, 0.0), skinMatrix).xyz %s;\n", flipNormal ? "-" : "", factor); WRITE(p, " float3 skinnednormal = mul(float4(%sIn.normal, 0.0), skinMatrix).xyz;\n", flipNormal ? "-" : "");
WRITE(p, " float3 worldnormal = normalize(mul(float4(skinnednormal, 0.0), u_world).xyz);\n"); WRITE(p, " float3 worldnormal = normalize(mul(float4(skinnednormal, 0.0), u_world).xyz);\n");
} else { } else {
WRITE(p, " float3 worldnormal = mul( mul( float4(0.0, 0.0, 1.0, 0.0), skinMatrix), u_world).xyz;\n"); WRITE(p, " float3 worldnormal = mul( mul( float4(0.0, 0.0, 1.0, 0.0), skinMatrix), u_world).xyz;\n");