mirror of
https://github.com/libretro/ppsspp.git
synced 2024-11-24 16:49:50 +00:00
D3D vtxdec: Convert weights and normals to float
This commit is contained in:
parent
0727df6f0a
commit
1407648b59
@ -160,6 +160,30 @@ void VertexDecoder::Step_WeightsU16() const
|
||||
wt[j++] = 0;
|
||||
}
|
||||
|
||||
void VertexDecoder::Step_WeightsU8ToFloat() const
|
||||
{
|
||||
float *wt = (float *)(decoded_ + decFmt.w0off);
|
||||
const u8 *wdata = (const u8*)(ptr_);
|
||||
int j;
|
||||
for (j = 0; j < nweights; j++) {
|
||||
wt[j] = (float)wdata[j] * (1.0f / 128.0f);
|
||||
}
|
||||
while (j & 3) // Zero additional weights rounding up to 4.
|
||||
wt[j++] = 0;
|
||||
}
|
||||
|
||||
void VertexDecoder::Step_WeightsU16ToFloat() const
|
||||
{
|
||||
float *wt = (float *)(decoded_ + decFmt.w0off);
|
||||
const u16 *wdata = (const u16*)(ptr_);
|
||||
int j;
|
||||
for (j = 0; j < nweights; j++) {
|
||||
wt[j] = (float)wdata[j] * (1.0f / 32768.0f);
|
||||
}
|
||||
while (j & 3) // Zero additional weights rounding up to 4.
|
||||
wt[j++] = 0;
|
||||
}
|
||||
|
||||
// Float weights should be uncommon, we can live with having to multiply these by 2.0
|
||||
// to avoid special checks in the vertex shader generator.
|
||||
// (PSP uses 0.0-2.0 fixed point numbers for weights)
|
||||
@ -459,6 +483,15 @@ void VertexDecoder::Step_NormalS8() const
|
||||
normal[3] = 0;
|
||||
}
|
||||
|
||||
void VertexDecoder::Step_NormalS8ToFloat() const
|
||||
{
|
||||
float *normal = (float *)(decoded_ + decFmt.nrmoff);
|
||||
const s8 *sv = (const s8*)(ptr_ + nrmoff);
|
||||
normal[0] = sv[0] * (1.0f / 128.0f);
|
||||
normal[1] = sv[1] * (1.0f / 128.0f);
|
||||
normal[2] = sv[2] * (1.0f / 128.0f);
|
||||
}
|
||||
|
||||
void VertexDecoder::Step_NormalS16() const
|
||||
{
|
||||
s16 *normal = (s16 *)(decoded_ + decFmt.nrmoff);
|
||||
@ -649,6 +682,13 @@ static const StepFunction wtstep[4] = {
|
||||
&VertexDecoder::Step_WeightsFloat,
|
||||
};
|
||||
|
||||
static const StepFunction wtstepToFloat[4] = {
|
||||
0,
|
||||
&VertexDecoder::Step_WeightsU8ToFloat,
|
||||
&VertexDecoder::Step_WeightsU16ToFloat,
|
||||
&VertexDecoder::Step_WeightsFloat,
|
||||
};
|
||||
|
||||
static const StepFunction wtstep_skin[4] = {
|
||||
0,
|
||||
&VertexDecoder::Step_WeightsU8Skin,
|
||||
@ -746,6 +786,13 @@ static const StepFunction nrmstep[4] = {
|
||||
&VertexDecoder::Step_NormalFloat,
|
||||
};
|
||||
|
||||
static const StepFunction nrmstep8BitToFloat[4] = {
|
||||
0,
|
||||
&VertexDecoder::Step_NormalS8ToFloat,
|
||||
&VertexDecoder::Step_NormalS16,
|
||||
&VertexDecoder::Step_NormalFloat,
|
||||
};
|
||||
|
||||
static const StepFunction nrmstep_skin[4] = {
|
||||
0,
|
||||
&VertexDecoder::Step_NormalS8Skin,
|
||||
@ -825,17 +872,21 @@ void VertexDecoder::SetVertexType(u32 fmt, const VertexDecoderOptions &options,
|
||||
|
||||
if (skinInDecode) {
|
||||
steps_[numSteps_++] = wtstep_skin[weighttype];
|
||||
// No visible output
|
||||
// No visible output, passed in register/external memory to the "pos" step.
|
||||
} else {
|
||||
steps_[numSteps_++] = wtstep[weighttype];
|
||||
|
||||
int fmtBase = DEC_FLOAT_1;
|
||||
if (weighttype == GE_VTYPE_WEIGHT_8BIT >> GE_VTYPE_WEIGHT_SHIFT) {
|
||||
fmtBase = DEC_U8_1;
|
||||
} else if (weighttype == GE_VTYPE_WEIGHT_16BIT >> GE_VTYPE_WEIGHT_SHIFT) {
|
||||
fmtBase = DEC_U16_1;
|
||||
} else if (weighttype == GE_VTYPE_WEIGHT_FLOAT >> GE_VTYPE_WEIGHT_SHIFT) {
|
||||
if (options.expandAllWeightsToFloat) {
|
||||
steps_[numSteps_++] = wtstepToFloat[weighttype];
|
||||
fmtBase = DEC_FLOAT_1;
|
||||
} else {
|
||||
steps_[numSteps_++] = wtstep[weighttype];
|
||||
if (weighttype == GE_VTYPE_WEIGHT_8BIT >> GE_VTYPE_WEIGHT_SHIFT) {
|
||||
fmtBase = DEC_U8_1;
|
||||
} else if (weighttype == GE_VTYPE_WEIGHT_16BIT >> GE_VTYPE_WEIGHT_SHIFT) {
|
||||
fmtBase = DEC_U16_1;
|
||||
} else if (weighttype == GE_VTYPE_WEIGHT_FLOAT >> GE_VTYPE_WEIGHT_SHIFT) {
|
||||
fmtBase = DEC_FLOAT_1;
|
||||
}
|
||||
}
|
||||
|
||||
int numWeights = TranslateNumBones(nweights);
|
||||
@ -927,14 +978,26 @@ void VertexDecoder::SetVertexType(u32 fmt, const VertexDecoderOptions &options,
|
||||
// After skinning, we always have three floats.
|
||||
decFmt.nrmfmt = DEC_FLOAT_3;
|
||||
} else {
|
||||
steps_[numSteps_++] = morphcount == 1 ? nrmstep[nrm] : nrmstep_morph[nrm];
|
||||
|
||||
if (morphcount == 1) {
|
||||
// The normal formats match the gl formats perfectly, let's use 'em.
|
||||
// The 8-bit and 16-bit normal formats match GL formats nicely, and the 16-bit normal format matches a D3D format so let's use them where possible.
|
||||
switch (nrm) {
|
||||
case GE_VTYPE_NRM_8BIT >> GE_VTYPE_NRM_SHIFT: decFmt.nrmfmt = DEC_S8_3; break;
|
||||
case GE_VTYPE_NRM_16BIT >> GE_VTYPE_NRM_SHIFT: decFmt.nrmfmt = DEC_S16_3; break;
|
||||
case GE_VTYPE_NRM_FLOAT >> GE_VTYPE_NRM_SHIFT: decFmt.nrmfmt = DEC_FLOAT_3; break;
|
||||
case GE_VTYPE_NRM_8BIT >> GE_VTYPE_NRM_SHIFT:
|
||||
if (options.expand8BitNormalsToFloat) {
|
||||
decFmt.nrmfmt = DEC_FLOAT_3;
|
||||
steps_[numSteps_++] = morphcount == 1 ? nrmstep8BitToFloat[nrm] : nrmstep_morph[nrm];
|
||||
} else {
|
||||
decFmt.nrmfmt = DEC_S8_3;
|
||||
steps_[numSteps_++] = morphcount == 1 ? nrmstep[nrm] : nrmstep_morph[nrm];
|
||||
}
|
||||
break;
|
||||
case GE_VTYPE_NRM_16BIT >> GE_VTYPE_NRM_SHIFT:
|
||||
decFmt.nrmfmt = DEC_S16_3;
|
||||
steps_[numSteps_++] = morphcount == 1 ? nrmstep[nrm] : nrmstep_morph[nrm];
|
||||
break;
|
||||
case GE_VTYPE_NRM_FLOAT >> GE_VTYPE_NRM_SHIFT:
|
||||
decFmt.nrmfmt = DEC_FLOAT_3;
|
||||
steps_[numSteps_++] = morphcount == 1 ? nrmstep[nrm] : nrmstep_morph[nrm];
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
decFmt.nrmfmt = DEC_FLOAT_3;
|
||||
|
@ -435,6 +435,8 @@ typedef void(*JittedVertexDecoder)(const u8 *src, u8 *dst, int count);
|
||||
|
||||
struct VertexDecoderOptions {
|
||||
bool expandAllUVtoFloat;
|
||||
bool expandAllWeightsToFloat;
|
||||
bool expand8BitNormalsToFloat;
|
||||
};
|
||||
|
||||
class VertexDecoder
|
||||
@ -457,6 +459,8 @@ public:
|
||||
|
||||
void Step_WeightsU8() const;
|
||||
void Step_WeightsU16() const;
|
||||
void Step_WeightsU8ToFloat() const;
|
||||
void Step_WeightsU16ToFloat() const;
|
||||
void Step_WeightsFloat() const;
|
||||
|
||||
void Step_WeightsU8Skin() const;
|
||||
@ -492,6 +496,7 @@ public:
|
||||
void Step_Color8888Morph() const;
|
||||
|
||||
void Step_NormalS8() const;
|
||||
void Step_NormalS8ToFloat() const;
|
||||
void Step_NormalS16() const;
|
||||
void Step_NormalFloat() const;
|
||||
|
||||
@ -627,6 +632,7 @@ public:
|
||||
void Jit_Color5551();
|
||||
|
||||
void Jit_NormalS8();
|
||||
void Jit_NormalS8ToFloat();
|
||||
void Jit_NormalS16();
|
||||
void Jit_NormalFloat();
|
||||
|
||||
@ -635,6 +641,7 @@ public:
|
||||
void Jit_NormalFloatSkin();
|
||||
|
||||
void Jit_PosS8();
|
||||
void Jit_PosS8ToFloat();
|
||||
void Jit_PosS16();
|
||||
void Jit_PosFloat();
|
||||
void Jit_PosS8Through();
|
||||
|
@ -109,6 +109,7 @@ static const JitLookup jitLookup[] = {
|
||||
{&VertexDecoder::Step_TcU16ThroughDouble, &VertexDecoderJitCache::Jit_TcU16ThroughDouble},
|
||||
|
||||
{&VertexDecoder::Step_NormalS8, &VertexDecoderJitCache::Jit_NormalS8},
|
||||
{&VertexDecoder::Step_NormalS8ToFloat, &VertexDecoderJitCache::Jit_NormalS8ToFloat},
|
||||
{&VertexDecoder::Step_NormalS16, &VertexDecoderJitCache::Jit_NormalS16},
|
||||
{&VertexDecoder::Step_NormalFloat, &VertexDecoderJitCache::Jit_NormalFloat},
|
||||
|
||||
@ -956,6 +957,11 @@ void VertexDecoderJitCache::Jit_NormalS8() {
|
||||
MOV(32, MDisp(dstReg, dec_->decFmt.nrmoff), R(tempReg1));
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_NormalS8ToFloat() {
|
||||
Jit_AnyS8ToFloat(dec_->nrmoff);
|
||||
MOVUPS(MDisp(dstReg, dec_->decFmt.nrmoff), XMM3);
|
||||
}
|
||||
|
||||
// Copy 6 bytes and then 2 zeroes.
|
||||
void VertexDecoderJitCache::Jit_NormalS16() {
|
||||
MOV(32, R(tempReg1), MDisp(srcReg, dec_->nrmoff));
|
||||
|
@ -147,6 +147,8 @@ TransformDrawEngineDX9::TransformDrawEngineDX9()
|
||||
|
||||
memset(&decOptions_, 0, sizeof(decOptions_));
|
||||
decOptions_.expandAllUVtoFloat = true;
|
||||
decOptions_.expandAllWeightsToFloat = true;
|
||||
decOptions_.expand8BitNormalsToFloat = true;
|
||||
|
||||
decimationCounter_ = VERTEXCACHE_DECIMATION_INTERVAL;
|
||||
// Allocate nicely aligned memory. Maybe graphics drivers will
|
||||
|
@ -312,9 +312,6 @@ void GenerateVertexShaderDX9(int prim, char *buffer, bool useHWTransform) {
|
||||
} else {
|
||||
int numWeights = TranslateNumBonesDX9(vertTypeGetNumBoneWeights(vertType));
|
||||
|
||||
static const char *rescale[4] = {"", " * 1.9921875", " * 1.999969482421875", ""}; // 2*127.5f/128.f, 2*32767.5f/32768.f, 1.0f};
|
||||
const char *factor = rescale[vertTypeGetWeightMask(vertType) >> GE_VTYPE_WEIGHT_SHIFT];
|
||||
|
||||
static const char * const boneWeightAttr[8] = {
|
||||
"a_w1.x", "a_w1.y", "a_w1.z", "a_w1.w",
|
||||
"a_w2.x", "a_w2.y", "a_w2.z", "a_w2.w",
|
||||
@ -377,11 +374,11 @@ void GenerateVertexShaderDX9(int prim, char *buffer, bool useHWTransform) {
|
||||
WRITE(p, ";\n");
|
||||
|
||||
// Trying to simplify this results in bugs in LBP...
|
||||
WRITE(p, " float3 skinnedpos = mul(float4(In.position.xyz, 1.0), skinMatrix).xyz %s;\n", factor);
|
||||
WRITE(p, " float3 skinnedpos = mul(float4(In.position.xyz, 1.0), skinMatrix).xyz;\n");
|
||||
WRITE(p, " float3 worldpos = mul(float4(skinnedpos, 1.0), u_world).xyz;\n");
|
||||
|
||||
if (hasNormal) {
|
||||
WRITE(p, " float3 skinnednormal = mul(float4(%sIn.normal, 0.0), skinMatrix).xyz %s;\n", flipNormal ? "-" : "", factor);
|
||||
WRITE(p, " float3 skinnednormal = mul(float4(%sIn.normal, 0.0), skinMatrix).xyz;\n", flipNormal ? "-" : "");
|
||||
WRITE(p, " float3 worldnormal = normalize(mul(float4(skinnednormal, 0.0), u_world).xyz);\n");
|
||||
} else {
|
||||
WRITE(p, " float3 worldnormal = mul( mul( float4(0.0, 0.0, 1.0, 0.0), skinMatrix), u_world).xyz;\n");
|
||||
|
Loading…
Reference in New Issue
Block a user