mirror of
https://github.com/libretro/ppsspp.git
synced 2024-11-24 16:49:50 +00:00
D3D vtxdec: Convert weights and normals to float
This commit is contained in:
parent
0727df6f0a
commit
1407648b59
@ -160,6 +160,30 @@ void VertexDecoder::Step_WeightsU16() const
|
|||||||
wt[j++] = 0;
|
wt[j++] = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void VertexDecoder::Step_WeightsU8ToFloat() const
|
||||||
|
{
|
||||||
|
float *wt = (float *)(decoded_ + decFmt.w0off);
|
||||||
|
const u8 *wdata = (const u8*)(ptr_);
|
||||||
|
int j;
|
||||||
|
for (j = 0; j < nweights; j++) {
|
||||||
|
wt[j] = (float)wdata[j] * (1.0f / 128.0f);
|
||||||
|
}
|
||||||
|
while (j & 3) // Zero additional weights rounding up to 4.
|
||||||
|
wt[j++] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
void VertexDecoder::Step_WeightsU16ToFloat() const
|
||||||
|
{
|
||||||
|
float *wt = (float *)(decoded_ + decFmt.w0off);
|
||||||
|
const u16 *wdata = (const u16*)(ptr_);
|
||||||
|
int j;
|
||||||
|
for (j = 0; j < nweights; j++) {
|
||||||
|
wt[j] = (float)wdata[j] * (1.0f / 32768.0f);
|
||||||
|
}
|
||||||
|
while (j & 3) // Zero additional weights rounding up to 4.
|
||||||
|
wt[j++] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
// Float weights should be uncommon, we can live with having to multiply these by 2.0
|
// Float weights should be uncommon, we can live with having to multiply these by 2.0
|
||||||
// to avoid special checks in the vertex shader generator.
|
// to avoid special checks in the vertex shader generator.
|
||||||
// (PSP uses 0.0-2.0 fixed point numbers for weights)
|
// (PSP uses 0.0-2.0 fixed point numbers for weights)
|
||||||
@ -459,6 +483,15 @@ void VertexDecoder::Step_NormalS8() const
|
|||||||
normal[3] = 0;
|
normal[3] = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void VertexDecoder::Step_NormalS8ToFloat() const
|
||||||
|
{
|
||||||
|
float *normal = (float *)(decoded_ + decFmt.nrmoff);
|
||||||
|
const s8 *sv = (const s8*)(ptr_ + nrmoff);
|
||||||
|
normal[0] = sv[0] * (1.0f / 128.0f);
|
||||||
|
normal[1] = sv[1] * (1.0f / 128.0f);
|
||||||
|
normal[2] = sv[2] * (1.0f / 128.0f);
|
||||||
|
}
|
||||||
|
|
||||||
void VertexDecoder::Step_NormalS16() const
|
void VertexDecoder::Step_NormalS16() const
|
||||||
{
|
{
|
||||||
s16 *normal = (s16 *)(decoded_ + decFmt.nrmoff);
|
s16 *normal = (s16 *)(decoded_ + decFmt.nrmoff);
|
||||||
@ -649,6 +682,13 @@ static const StepFunction wtstep[4] = {
|
|||||||
&VertexDecoder::Step_WeightsFloat,
|
&VertexDecoder::Step_WeightsFloat,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const StepFunction wtstepToFloat[4] = {
|
||||||
|
0,
|
||||||
|
&VertexDecoder::Step_WeightsU8ToFloat,
|
||||||
|
&VertexDecoder::Step_WeightsU16ToFloat,
|
||||||
|
&VertexDecoder::Step_WeightsFloat,
|
||||||
|
};
|
||||||
|
|
||||||
static const StepFunction wtstep_skin[4] = {
|
static const StepFunction wtstep_skin[4] = {
|
||||||
0,
|
0,
|
||||||
&VertexDecoder::Step_WeightsU8Skin,
|
&VertexDecoder::Step_WeightsU8Skin,
|
||||||
@ -746,6 +786,13 @@ static const StepFunction nrmstep[4] = {
|
|||||||
&VertexDecoder::Step_NormalFloat,
|
&VertexDecoder::Step_NormalFloat,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static const StepFunction nrmstep8BitToFloat[4] = {
|
||||||
|
0,
|
||||||
|
&VertexDecoder::Step_NormalS8ToFloat,
|
||||||
|
&VertexDecoder::Step_NormalS16,
|
||||||
|
&VertexDecoder::Step_NormalFloat,
|
||||||
|
};
|
||||||
|
|
||||||
static const StepFunction nrmstep_skin[4] = {
|
static const StepFunction nrmstep_skin[4] = {
|
||||||
0,
|
0,
|
||||||
&VertexDecoder::Step_NormalS8Skin,
|
&VertexDecoder::Step_NormalS8Skin,
|
||||||
@ -825,17 +872,21 @@ void VertexDecoder::SetVertexType(u32 fmt, const VertexDecoderOptions &options,
|
|||||||
|
|
||||||
if (skinInDecode) {
|
if (skinInDecode) {
|
||||||
steps_[numSteps_++] = wtstep_skin[weighttype];
|
steps_[numSteps_++] = wtstep_skin[weighttype];
|
||||||
// No visible output
|
// No visible output, passed in register/external memory to the "pos" step.
|
||||||
} else {
|
} else {
|
||||||
steps_[numSteps_++] = wtstep[weighttype];
|
|
||||||
|
|
||||||
int fmtBase = DEC_FLOAT_1;
|
int fmtBase = DEC_FLOAT_1;
|
||||||
if (weighttype == GE_VTYPE_WEIGHT_8BIT >> GE_VTYPE_WEIGHT_SHIFT) {
|
if (options.expandAllWeightsToFloat) {
|
||||||
fmtBase = DEC_U8_1;
|
steps_[numSteps_++] = wtstepToFloat[weighttype];
|
||||||
} else if (weighttype == GE_VTYPE_WEIGHT_16BIT >> GE_VTYPE_WEIGHT_SHIFT) {
|
|
||||||
fmtBase = DEC_U16_1;
|
|
||||||
} else if (weighttype == GE_VTYPE_WEIGHT_FLOAT >> GE_VTYPE_WEIGHT_SHIFT) {
|
|
||||||
fmtBase = DEC_FLOAT_1;
|
fmtBase = DEC_FLOAT_1;
|
||||||
|
} else {
|
||||||
|
steps_[numSteps_++] = wtstep[weighttype];
|
||||||
|
if (weighttype == GE_VTYPE_WEIGHT_8BIT >> GE_VTYPE_WEIGHT_SHIFT) {
|
||||||
|
fmtBase = DEC_U8_1;
|
||||||
|
} else if (weighttype == GE_VTYPE_WEIGHT_16BIT >> GE_VTYPE_WEIGHT_SHIFT) {
|
||||||
|
fmtBase = DEC_U16_1;
|
||||||
|
} else if (weighttype == GE_VTYPE_WEIGHT_FLOAT >> GE_VTYPE_WEIGHT_SHIFT) {
|
||||||
|
fmtBase = DEC_FLOAT_1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int numWeights = TranslateNumBones(nweights);
|
int numWeights = TranslateNumBones(nweights);
|
||||||
@ -927,14 +978,26 @@ void VertexDecoder::SetVertexType(u32 fmt, const VertexDecoderOptions &options,
|
|||||||
// After skinning, we always have three floats.
|
// After skinning, we always have three floats.
|
||||||
decFmt.nrmfmt = DEC_FLOAT_3;
|
decFmt.nrmfmt = DEC_FLOAT_3;
|
||||||
} else {
|
} else {
|
||||||
steps_[numSteps_++] = morphcount == 1 ? nrmstep[nrm] : nrmstep_morph[nrm];
|
|
||||||
|
|
||||||
if (morphcount == 1) {
|
if (morphcount == 1) {
|
||||||
// The normal formats match the gl formats perfectly, let's use 'em.
|
// The 8-bit and 16-bit normal formats match GL formats nicely, and the 16-bit normal format matches a D3D format so let's use them where possible.
|
||||||
switch (nrm) {
|
switch (nrm) {
|
||||||
case GE_VTYPE_NRM_8BIT >> GE_VTYPE_NRM_SHIFT: decFmt.nrmfmt = DEC_S8_3; break;
|
case GE_VTYPE_NRM_8BIT >> GE_VTYPE_NRM_SHIFT:
|
||||||
case GE_VTYPE_NRM_16BIT >> GE_VTYPE_NRM_SHIFT: decFmt.nrmfmt = DEC_S16_3; break;
|
if (options.expand8BitNormalsToFloat) {
|
||||||
case GE_VTYPE_NRM_FLOAT >> GE_VTYPE_NRM_SHIFT: decFmt.nrmfmt = DEC_FLOAT_3; break;
|
decFmt.nrmfmt = DEC_FLOAT_3;
|
||||||
|
steps_[numSteps_++] = morphcount == 1 ? nrmstep8BitToFloat[nrm] : nrmstep_morph[nrm];
|
||||||
|
} else {
|
||||||
|
decFmt.nrmfmt = DEC_S8_3;
|
||||||
|
steps_[numSteps_++] = morphcount == 1 ? nrmstep[nrm] : nrmstep_morph[nrm];
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
case GE_VTYPE_NRM_16BIT >> GE_VTYPE_NRM_SHIFT:
|
||||||
|
decFmt.nrmfmt = DEC_S16_3;
|
||||||
|
steps_[numSteps_++] = morphcount == 1 ? nrmstep[nrm] : nrmstep_morph[nrm];
|
||||||
|
break;
|
||||||
|
case GE_VTYPE_NRM_FLOAT >> GE_VTYPE_NRM_SHIFT:
|
||||||
|
decFmt.nrmfmt = DEC_FLOAT_3;
|
||||||
|
steps_[numSteps_++] = morphcount == 1 ? nrmstep[nrm] : nrmstep_morph[nrm];
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
decFmt.nrmfmt = DEC_FLOAT_3;
|
decFmt.nrmfmt = DEC_FLOAT_3;
|
||||||
|
@ -435,6 +435,8 @@ typedef void(*JittedVertexDecoder)(const u8 *src, u8 *dst, int count);
|
|||||||
|
|
||||||
struct VertexDecoderOptions {
|
struct VertexDecoderOptions {
|
||||||
bool expandAllUVtoFloat;
|
bool expandAllUVtoFloat;
|
||||||
|
bool expandAllWeightsToFloat;
|
||||||
|
bool expand8BitNormalsToFloat;
|
||||||
};
|
};
|
||||||
|
|
||||||
class VertexDecoder
|
class VertexDecoder
|
||||||
@ -457,6 +459,8 @@ public:
|
|||||||
|
|
||||||
void Step_WeightsU8() const;
|
void Step_WeightsU8() const;
|
||||||
void Step_WeightsU16() const;
|
void Step_WeightsU16() const;
|
||||||
|
void Step_WeightsU8ToFloat() const;
|
||||||
|
void Step_WeightsU16ToFloat() const;
|
||||||
void Step_WeightsFloat() const;
|
void Step_WeightsFloat() const;
|
||||||
|
|
||||||
void Step_WeightsU8Skin() const;
|
void Step_WeightsU8Skin() const;
|
||||||
@ -492,6 +496,7 @@ public:
|
|||||||
void Step_Color8888Morph() const;
|
void Step_Color8888Morph() const;
|
||||||
|
|
||||||
void Step_NormalS8() const;
|
void Step_NormalS8() const;
|
||||||
|
void Step_NormalS8ToFloat() const;
|
||||||
void Step_NormalS16() const;
|
void Step_NormalS16() const;
|
||||||
void Step_NormalFloat() const;
|
void Step_NormalFloat() const;
|
||||||
|
|
||||||
@ -627,6 +632,7 @@ public:
|
|||||||
void Jit_Color5551();
|
void Jit_Color5551();
|
||||||
|
|
||||||
void Jit_NormalS8();
|
void Jit_NormalS8();
|
||||||
|
void Jit_NormalS8ToFloat();
|
||||||
void Jit_NormalS16();
|
void Jit_NormalS16();
|
||||||
void Jit_NormalFloat();
|
void Jit_NormalFloat();
|
||||||
|
|
||||||
@ -635,6 +641,7 @@ public:
|
|||||||
void Jit_NormalFloatSkin();
|
void Jit_NormalFloatSkin();
|
||||||
|
|
||||||
void Jit_PosS8();
|
void Jit_PosS8();
|
||||||
|
void Jit_PosS8ToFloat();
|
||||||
void Jit_PosS16();
|
void Jit_PosS16();
|
||||||
void Jit_PosFloat();
|
void Jit_PosFloat();
|
||||||
void Jit_PosS8Through();
|
void Jit_PosS8Through();
|
||||||
|
@ -109,6 +109,7 @@ static const JitLookup jitLookup[] = {
|
|||||||
{&VertexDecoder::Step_TcU16ThroughDouble, &VertexDecoderJitCache::Jit_TcU16ThroughDouble},
|
{&VertexDecoder::Step_TcU16ThroughDouble, &VertexDecoderJitCache::Jit_TcU16ThroughDouble},
|
||||||
|
|
||||||
{&VertexDecoder::Step_NormalS8, &VertexDecoderJitCache::Jit_NormalS8},
|
{&VertexDecoder::Step_NormalS8, &VertexDecoderJitCache::Jit_NormalS8},
|
||||||
|
{&VertexDecoder::Step_NormalS8ToFloat, &VertexDecoderJitCache::Jit_NormalS8ToFloat},
|
||||||
{&VertexDecoder::Step_NormalS16, &VertexDecoderJitCache::Jit_NormalS16},
|
{&VertexDecoder::Step_NormalS16, &VertexDecoderJitCache::Jit_NormalS16},
|
||||||
{&VertexDecoder::Step_NormalFloat, &VertexDecoderJitCache::Jit_NormalFloat},
|
{&VertexDecoder::Step_NormalFloat, &VertexDecoderJitCache::Jit_NormalFloat},
|
||||||
|
|
||||||
@ -956,6 +957,11 @@ void VertexDecoderJitCache::Jit_NormalS8() {
|
|||||||
MOV(32, MDisp(dstReg, dec_->decFmt.nrmoff), R(tempReg1));
|
MOV(32, MDisp(dstReg, dec_->decFmt.nrmoff), R(tempReg1));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void VertexDecoderJitCache::Jit_NormalS8ToFloat() {
|
||||||
|
Jit_AnyS8ToFloat(dec_->nrmoff);
|
||||||
|
MOVUPS(MDisp(dstReg, dec_->decFmt.nrmoff), XMM3);
|
||||||
|
}
|
||||||
|
|
||||||
// Copy 6 bytes and then 2 zeroes.
|
// Copy 6 bytes and then 2 zeroes.
|
||||||
void VertexDecoderJitCache::Jit_NormalS16() {
|
void VertexDecoderJitCache::Jit_NormalS16() {
|
||||||
MOV(32, R(tempReg1), MDisp(srcReg, dec_->nrmoff));
|
MOV(32, R(tempReg1), MDisp(srcReg, dec_->nrmoff));
|
||||||
|
@ -147,6 +147,8 @@ TransformDrawEngineDX9::TransformDrawEngineDX9()
|
|||||||
|
|
||||||
memset(&decOptions_, 0, sizeof(decOptions_));
|
memset(&decOptions_, 0, sizeof(decOptions_));
|
||||||
decOptions_.expandAllUVtoFloat = true;
|
decOptions_.expandAllUVtoFloat = true;
|
||||||
|
decOptions_.expandAllWeightsToFloat = true;
|
||||||
|
decOptions_.expand8BitNormalsToFloat = true;
|
||||||
|
|
||||||
decimationCounter_ = VERTEXCACHE_DECIMATION_INTERVAL;
|
decimationCounter_ = VERTEXCACHE_DECIMATION_INTERVAL;
|
||||||
// Allocate nicely aligned memory. Maybe graphics drivers will
|
// Allocate nicely aligned memory. Maybe graphics drivers will
|
||||||
|
@ -312,9 +312,6 @@ void GenerateVertexShaderDX9(int prim, char *buffer, bool useHWTransform) {
|
|||||||
} else {
|
} else {
|
||||||
int numWeights = TranslateNumBonesDX9(vertTypeGetNumBoneWeights(vertType));
|
int numWeights = TranslateNumBonesDX9(vertTypeGetNumBoneWeights(vertType));
|
||||||
|
|
||||||
static const char *rescale[4] = {"", " * 1.9921875", " * 1.999969482421875", ""}; // 2*127.5f/128.f, 2*32767.5f/32768.f, 1.0f};
|
|
||||||
const char *factor = rescale[vertTypeGetWeightMask(vertType) >> GE_VTYPE_WEIGHT_SHIFT];
|
|
||||||
|
|
||||||
static const char * const boneWeightAttr[8] = {
|
static const char * const boneWeightAttr[8] = {
|
||||||
"a_w1.x", "a_w1.y", "a_w1.z", "a_w1.w",
|
"a_w1.x", "a_w1.y", "a_w1.z", "a_w1.w",
|
||||||
"a_w2.x", "a_w2.y", "a_w2.z", "a_w2.w",
|
"a_w2.x", "a_w2.y", "a_w2.z", "a_w2.w",
|
||||||
@ -377,11 +374,11 @@ void GenerateVertexShaderDX9(int prim, char *buffer, bool useHWTransform) {
|
|||||||
WRITE(p, ";\n");
|
WRITE(p, ";\n");
|
||||||
|
|
||||||
// Trying to simplify this results in bugs in LBP...
|
// Trying to simplify this results in bugs in LBP...
|
||||||
WRITE(p, " float3 skinnedpos = mul(float4(In.position.xyz, 1.0), skinMatrix).xyz %s;\n", factor);
|
WRITE(p, " float3 skinnedpos = mul(float4(In.position.xyz, 1.0), skinMatrix).xyz;\n");
|
||||||
WRITE(p, " float3 worldpos = mul(float4(skinnedpos, 1.0), u_world).xyz;\n");
|
WRITE(p, " float3 worldpos = mul(float4(skinnedpos, 1.0), u_world).xyz;\n");
|
||||||
|
|
||||||
if (hasNormal) {
|
if (hasNormal) {
|
||||||
WRITE(p, " float3 skinnednormal = mul(float4(%sIn.normal, 0.0), skinMatrix).xyz %s;\n", flipNormal ? "-" : "", factor);
|
WRITE(p, " float3 skinnednormal = mul(float4(%sIn.normal, 0.0), skinMatrix).xyz;\n", flipNormal ? "-" : "");
|
||||||
WRITE(p, " float3 worldnormal = normalize(mul(float4(skinnednormal, 0.0), u_world).xyz);\n");
|
WRITE(p, " float3 worldnormal = normalize(mul(float4(skinnednormal, 0.0), u_world).xyz);\n");
|
||||||
} else {
|
} else {
|
||||||
WRITE(p, " float3 worldnormal = mul( mul( float4(0.0, 0.0, 1.0, 0.0), skinMatrix), u_world).xyz;\n");
|
WRITE(p, " float3 worldnormal = mul( mul( float4(0.0, 0.0, 1.0, 0.0), skinMatrix), u_world).xyz;\n");
|
||||||
|
Loading…
Reference in New Issue
Block a user