VertexDecoder: Remove member function pointers from decoding

This commit is contained in:
Henrik Rydgård 2024-07-22 14:06:15 +02:00
parent eda60a5df9
commit 43c68c4277
2 changed files with 311 additions and 356 deletions

View File

@ -190,46 +190,42 @@ void PrintDecodedVertex(const VertexReader &vtx) {
printf("P: %f %f %f\n", pos[0], pos[1], pos[2]); printf("P: %f %f %f\n", pos[0], pos[1], pos[2]);
} }
void VertexDecoder::Step_WeightsU8() const void VertexDecoder::Step_WeightsU8(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
{ u8 *wt = (u8 *)(decoded + dec->decFmt.w0off);
u8 *wt = (u8 *)(decoded_ + decFmt.w0off); const u8 *wdata = (const u8*)(ptr);
const u8 *wdata = (const u8*)(ptr_);
int j; int j;
for (j = 0; j < nweights; j++) for (j = 0; j < dec->nweights; j++)
wt[j] = wdata[j]; wt[j] = wdata[j];
while (j & 3) // Zero additional weights rounding up to 4. while (j & 3) // Zero additional weights rounding up to 4.
wt[j++] = 0; wt[j++] = 0;
} }
void VertexDecoder::Step_WeightsU16() const void VertexDecoder::Step_WeightsU16(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
{ u16 *wt = (u16 *)(decoded + dec->decFmt.w0off);
u16 *wt = (u16 *)(decoded_ + decFmt.w0off); const u16_le *wdata = (const u16_le *)(ptr);
const u16_le *wdata = (const u16_le *)(ptr_);
int j; int j;
for (j = 0; j < nweights; j++) for (j = 0; j < dec->nweights; j++)
wt[j] = wdata[j]; wt[j] = wdata[j];
while (j & 3) // Zero additional weights rounding up to 4. while (j & 3) // Zero additional weights rounding up to 4.
wt[j++] = 0; wt[j++] = 0;
} }
void VertexDecoder::Step_WeightsU8ToFloat() const void VertexDecoder::Step_WeightsU8ToFloat(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
{ float *wt = (float *)(decoded + dec->decFmt.w0off);
float *wt = (float *)(decoded_ + decFmt.w0off); const u8 *wdata = (const u8*)(ptr);
const u8 *wdata = (const u8*)(ptr_);
int j; int j;
for (j = 0; j < nweights; j++) { for (j = 0; j < dec->nweights; j++) {
wt[j] = (float)wdata[j] * (1.0f / 128.0f); wt[j] = (float)wdata[j] * (1.0f / 128.0f);
} }
while (j & 3) // Zero additional weights rounding up to 4. while (j & 3) // Zero additional weights rounding up to 4.
wt[j++] = 0; wt[j++] = 0;
} }
void VertexDecoder::Step_WeightsU16ToFloat() const void VertexDecoder::Step_WeightsU16ToFloat(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
{ float *wt = (float *)(decoded + dec->decFmt.w0off);
float *wt = (float *)(decoded_ + decFmt.w0off); const u16_le *wdata = (const u16_le *)(ptr);
const u16_le *wdata = (const u16_le *)(ptr_);
int j; int j;
for (j = 0; j < nweights; j++) { for (j = 0; j < dec->nweights; j++) {
wt[j] = (float)wdata[j] * (1.0f / 32768.0f); wt[j] = (float)wdata[j] * (1.0f / 32768.0f);
} }
while (j & 3) // Zero additional weights rounding up to 4. while (j & 3) // Zero additional weights rounding up to 4.
@ -239,12 +235,11 @@ void VertexDecoder::Step_WeightsU16ToFloat() const
// Float weights should be uncommon, we can live with having to multiply these by 2.0 // Float weights should be uncommon, we can live with having to multiply these by 2.0
// to avoid special checks in the vertex shader generator. // to avoid special checks in the vertex shader generator.
// (PSP uses 0.0-2.0 fixed point numbers for weights) // (PSP uses 0.0-2.0 fixed point numbers for weights)
void VertexDecoder::Step_WeightsFloat() const void VertexDecoder::Step_WeightsFloat(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
{ float *wt = (float *)(decoded + dec->decFmt.w0off);
float *wt = (float *)(decoded_ + decFmt.w0off); const float_le *wdata = (const float_le *)(ptr);
const float_le *wdata = (const float_le *)(ptr_);
int j; int j;
for (j = 0; j < nweights; j++) { for (j = 0; j < dec->nweights; j++) {
wt[j] = wdata[j]; wt[j] = wdata[j];
} }
while (j & 3) // Zero additional weights rounding up to 4. while (j & 3) // Zero additional weights rounding up to 4.
@ -298,56 +293,52 @@ void VertexDecoder::ComputeSkinMatrix(const float weights[8]) const {
#endif #endif
} }
void VertexDecoder::Step_WeightsU8Skin() const { void VertexDecoder::Step_WeightsU8Skin(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
const u8 *wdata = (const u8*)(ptr_); const u8 *wdata = (const u8*)(ptr);
float weights[8]; float weights[8];
for (int j = 0; j < nweights; j++) for (int j = 0; j < dec->nweights; j++)
weights[j] = wdata[j] * (1.0f / 128.0f); weights[j] = wdata[j] * (1.0f / 128.0f);
ComputeSkinMatrix(weights); dec->ComputeSkinMatrix(weights);
} }
void VertexDecoder::Step_WeightsU16Skin() const { void VertexDecoder::Step_WeightsU16Skin(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
const u16_le *wdata = (const u16_le *)(ptr_); const u16_le *wdata = (const u16_le *)(ptr);
float weights[8]; float weights[8];
for (int j = 0; j < nweights; j++) for (int j = 0; j < dec->nweights; j++)
weights[j] = wdata[j] * (1.0f / 32768.0f); weights[j] = wdata[j] * (1.0f / 32768.0f);
ComputeSkinMatrix(weights); dec->ComputeSkinMatrix(weights);
} }
void VertexDecoder::Step_WeightsFloatSkin() const { void VertexDecoder::Step_WeightsFloatSkin(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
const float_le *wdata = (const float_le *)(ptr_); const float_le *wdata = (const float_le *)(ptr);
ComputeSkinMatrix(wdata); dec->ComputeSkinMatrix(wdata);
} }
void VertexDecoder::Step_TcU8ToFloat() const void VertexDecoder::Step_TcU8ToFloat(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
{
// u32 to write two bytes of zeroes for free. // u32 to write two bytes of zeroes for free.
float *uv = (float *)(decoded_ + decFmt.uvoff); float *uv = (float *)(decoded + dec->decFmt.uvoff);
const u8 *uvdata = (const u8*)(ptr_ + tcoff); const u8 *uvdata = (const u8*)(ptr + dec->tcoff);
uv[0] = uvdata[0] * (1.0f / 128.0f); uv[0] = uvdata[0] * (1.0f / 128.0f);
uv[1] = uvdata[1] * (1.0f / 128.0f); uv[1] = uvdata[1] * (1.0f / 128.0f);
} }
void VertexDecoder::Step_TcU16ToFloat() const void VertexDecoder::Step_TcU16ToFloat(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
{ float *uv = (float *)(decoded + dec->decFmt.uvoff);
float *uv = (float *)(decoded_ + decFmt.uvoff); const u16_le *uvdata = (const u16_le *)(ptr + dec->tcoff);
const u16_le *uvdata = (const u16_le *)(ptr_ + tcoff);
uv[0] = uvdata[0] * (1.0f / 32768.0f); uv[0] = uvdata[0] * (1.0f / 32768.0f);
uv[1] = uvdata[1] * (1.0f / 32768.0f); uv[1] = uvdata[1] * (1.0f / 32768.0f);
} }
void VertexDecoder::Step_TcU16DoubleToFloat() const void VertexDecoder::Step_TcU16DoubleToFloat(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
{ float *uv = (float*)(decoded + dec->decFmt.uvoff);
float *uv = (float*)(decoded_ + decFmt.uvoff); const u16_le *uvdata = (const u16_le *)(ptr + dec->tcoff);
const u16_le *uvdata = (const u16_le *)(ptr_ + tcoff);
uv[0] = uvdata[0] * (1.0f / 16384.0f); uv[0] = uvdata[0] * (1.0f / 16384.0f);
uv[1] = uvdata[1] * (1.0f / 16384.0f); uv[1] = uvdata[1] * (1.0f / 16384.0f);
} }
void VertexDecoder::Step_TcU16ThroughToFloat() const void VertexDecoder::Step_TcU16ThroughToFloat(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
{ float *uv = (float *)(decoded + dec->decFmt.uvoff);
float *uv = (float *)(decoded_ + decFmt.uvoff); const u16_le *uvdata = (const u16_le *)(ptr + dec->tcoff);
const u16_le *uvdata = (const u16_le *)(ptr_ + tcoff);
uv[0] = uvdata[0]; uv[0] = uvdata[0];
uv[1] = uvdata[1]; uv[1] = uvdata[1];
@ -357,26 +348,23 @@ void VertexDecoder::Step_TcU16ThroughToFloat() const
gstate_c.vertBounds.maxV = std::max(gstate_c.vertBounds.maxV, (u16)uvdata[1]); gstate_c.vertBounds.maxV = std::max(gstate_c.vertBounds.maxV, (u16)uvdata[1]);
} }
void VertexDecoder::Step_TcU16ThroughDoubleToFloat() const void VertexDecoder::Step_TcU16ThroughDoubleToFloat(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
{ float *uv = (float *)(decoded + dec->decFmt.uvoff);
float *uv = (float *)(decoded_ + decFmt.uvoff); const u16_le *uvdata = (const u16_le *)(ptr + dec->tcoff);
const u16_le *uvdata = (const u16_le *)(ptr_ + tcoff);
uv[0] = uvdata[0] * 2; uv[0] = uvdata[0] * 2;
uv[1] = uvdata[1] * 2; uv[1] = uvdata[1] * 2;
} }
void VertexDecoder::Step_TcFloat() const void VertexDecoder::Step_TcFloat(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
{ float *uv = (float *)(decoded + dec->decFmt.uvoff);
float *uv = (float *)(decoded_ + decFmt.uvoff); const float_le *uvdata = (const float_le *)(ptr + dec->tcoff);
const float_le *uvdata = (const float_le *)(ptr_ + tcoff);
uv[0] = uvdata[0]; uv[0] = uvdata[0];
uv[1] = uvdata[1]; uv[1] = uvdata[1];
} }
void VertexDecoder::Step_TcFloatThrough() const void VertexDecoder::Step_TcFloatThrough(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
{ float *uv = (float *)(decoded + dec->decFmt.uvoff);
float *uv = (float *)(decoded_ + decFmt.uvoff); const float_le *uvdata = (const float_le *)(ptr + dec->tcoff);
const float_le *uvdata = (const float_le *)(ptr_ + tcoff);
uv[0] = uvdata[0]; uv[0] = uvdata[0];
uv[1] = uvdata[1]; uv[1] = uvdata[1];
@ -386,170 +374,167 @@ void VertexDecoder::Step_TcFloatThrough() const
gstate_c.vertBounds.maxV = std::max(gstate_c.vertBounds.maxV, (u16)uvdata[1]); gstate_c.vertBounds.maxV = std::max(gstate_c.vertBounds.maxV, (u16)uvdata[1]);
} }
void VertexDecoder::Step_TcU8Prescale() const { void VertexDecoder::Step_TcU8Prescale(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
float *uv = (float *)(decoded_ + decFmt.uvoff); float *uv = (float *)(decoded + dec->decFmt.uvoff);
const u8 *uvdata = (const u8 *)(ptr_ + tcoff); const u8 *uvdata = (const u8 *)(ptr + dec->tcoff);
uv[0] = (float)uvdata[0] * (1.f / 128.f) * prescaleUV_->uScale + prescaleUV_->uOff; uv[0] = (float)uvdata[0] * (1.f / 128.f) * dec->prescaleUV_->uScale + dec->prescaleUV_->uOff;
uv[1] = (float)uvdata[1] * (1.f / 128.f) * prescaleUV_->vScale + prescaleUV_->vOff; uv[1] = (float)uvdata[1] * (1.f / 128.f) * dec->prescaleUV_->vScale + dec->prescaleUV_->vOff;
} }
void VertexDecoder::Step_TcU16Prescale() const { void VertexDecoder::Step_TcU16Prescale(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
float *uv = (float *)(decoded_ + decFmt.uvoff); float *uv = (float *)(decoded + dec->decFmt.uvoff);
const u16_le *uvdata = (const u16_le *)(ptr_ + tcoff); const u16_le *uvdata = (const u16_le *)(ptr + dec->tcoff);
uv[0] = (float)uvdata[0] * (1.f / 32768.f) * prescaleUV_->uScale + prescaleUV_->uOff; uv[0] = (float)uvdata[0] * (1.f / 32768.f) * dec->prescaleUV_->uScale + dec->prescaleUV_->uOff;
uv[1] = (float)uvdata[1] * (1.f / 32768.f) * prescaleUV_->vScale + prescaleUV_->vOff; uv[1] = (float)uvdata[1] * (1.f / 32768.f) * dec->prescaleUV_->vScale + dec->prescaleUV_->vOff;
} }
void VertexDecoder::Step_TcU16DoublePrescale() const { void VertexDecoder::Step_TcU16DoublePrescale(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
float *uv = (float *)(decoded_ + decFmt.uvoff); float *uv = (float *)(decoded + dec->decFmt.uvoff);
const u16_le *uvdata = (const u16_le *)(ptr_ + tcoff); const u16_le *uvdata = (const u16_le *)(ptr + dec->tcoff);
uv[0] = (float)uvdata[0] * (1.f / 16384.f) * prescaleUV_->uScale + prescaleUV_->uOff; uv[0] = (float)uvdata[0] * (1.f / 16384.f) * dec->prescaleUV_->uScale + dec->prescaleUV_->uOff;
uv[1] = (float)uvdata[1] * (1.f / 16384.f) * prescaleUV_->vScale + prescaleUV_->vOff; uv[1] = (float)uvdata[1] * (1.f / 16384.f) * dec->prescaleUV_->vScale + dec->prescaleUV_->vOff;
} }
void VertexDecoder::Step_TcFloatPrescale() const { void VertexDecoder::Step_TcFloatPrescale(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
float *uv = (float *)(decoded_ + decFmt.uvoff); float *uv = (float *)(decoded + dec->decFmt.uvoff);
const float_le *uvdata = (const float_le *)(ptr_ + tcoff); const float_le *uvdata = (const float_le *)(ptr + dec->tcoff);
uv[0] = uvdata[0] * prescaleUV_->uScale + prescaleUV_->uOff; uv[0] = uvdata[0] * dec->prescaleUV_->uScale + dec->prescaleUV_->uOff;
uv[1] = uvdata[1] * prescaleUV_->vScale + prescaleUV_->vOff; uv[1] = uvdata[1] * dec->prescaleUV_->vScale + dec->prescaleUV_->vOff;
} }
void VertexDecoder::Step_TcU8MorphToFloat() const { void VertexDecoder::Step_TcU8MorphToFloat(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
float uv[2] = { 0, 0 }; float uv[2] = { 0, 0 };
for (int n = 0; n < morphcount; n++) { for (int n = 0; n < dec->morphcount; n++) {
float w = gstate_c.morphWeights[n]; float w = gstate_c.morphWeights[n];
const u8 *uvdata = (const u8 *)(ptr_ + onesize_*n + tcoff); const u8 *uvdata = (const u8 *)(ptr + dec->onesize_*n + dec->tcoff);
uv[0] += (float)uvdata[0] * (1.f / 128.f) * w; uv[0] += (float)uvdata[0] * (1.f / 128.f) * w;
uv[1] += (float)uvdata[1] * (1.f / 128.f) * w; uv[1] += (float)uvdata[1] * (1.f / 128.f) * w;
} }
float *out = (float *)(decoded_ + decFmt.uvoff); float *out = (float *)(decoded + dec->decFmt.uvoff);
out[0] = uv[0]; out[0] = uv[0];
out[1] = uv[1]; out[1] = uv[1];
} }
void VertexDecoder::Step_TcU16MorphToFloat() const { void VertexDecoder::Step_TcU16MorphToFloat(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
float uv[2] = { 0, 0 }; float uv[2] = { 0, 0 };
for (int n = 0; n < morphcount; n++) { for (int n = 0; n < dec->morphcount; n++) {
float w = gstate_c.morphWeights[n]; float w = gstate_c.morphWeights[n];
const u16_le *uvdata = (const u16_le *)(ptr_ + onesize_*n + tcoff); const u16_le *uvdata = (const u16_le *)(ptr + dec->onesize_*n + dec->tcoff);
uv[0] += (float)uvdata[0] * (1.f / 32768.f) * w; uv[0] += (float)uvdata[0] * (1.f / 32768.f) * w;
uv[1] += (float)uvdata[1] * (1.f / 32768.f) * w; uv[1] += (float)uvdata[1] * (1.f / 32768.f) * w;
} }
float *out = (float *)(decoded_ + decFmt.uvoff); float *out = (float *)(decoded + dec->decFmt.uvoff);
out[0] = uv[0]; out[0] = uv[0];
out[1] = uv[1]; out[1] = uv[1];
} }
void VertexDecoder::Step_TcU16DoubleMorphToFloat() const { void VertexDecoder::Step_TcU16DoubleMorphToFloat(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
float uv[2] = { 0, 0 }; float uv[2] = { 0, 0 };
for (int n = 0; n < morphcount; n++) { for (int n = 0; n < dec->morphcount; n++) {
float w = gstate_c.morphWeights[n]; float w = gstate_c.morphWeights[n];
const u16_le *uvdata = (const u16_le *)(ptr_ + onesize_*n + tcoff); const u16_le *uvdata = (const u16_le *)(ptr + dec->onesize_*n + dec->tcoff);
uv[0] += (float)uvdata[0] * (1.f / 16384.f) * w; uv[0] += (float)uvdata[0] * (1.f / 16384.f) * w;
uv[1] += (float)uvdata[1] * (1.f / 16384.f) * w; uv[1] += (float)uvdata[1] * (1.f / 16384.f) * w;
} }
float *out = (float *)(decoded_ + decFmt.uvoff); float *out = (float *)(decoded + dec->decFmt.uvoff);
out[0] = uv[0]; out[0] = uv[0];
out[1] = uv[1]; out[1] = uv[1];
} }
void VertexDecoder::Step_TcFloatMorph() const { void VertexDecoder::Step_TcFloatMorph(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
float uv[2] = { 0, 0 }; float uv[2] = { 0, 0 };
for (int n = 0; n < morphcount; n++) { for (int n = 0; n < dec->morphcount; n++) {
float w = gstate_c.morphWeights[n]; float w = gstate_c.morphWeights[n];
const float_le *uvdata = (const float_le *)(ptr_ + onesize_*n + tcoff); const float_le *uvdata = (const float_le *)(ptr + dec->onesize_*n + dec->tcoff);
uv[0] += (float)uvdata[0] * w; uv[0] += (float)uvdata[0] * w;
uv[1] += (float)uvdata[1] * w; uv[1] += (float)uvdata[1] * w;
} }
float *out = (float *)(decoded_ + decFmt.uvoff); float *out = (float *)(decoded + dec->decFmt.uvoff);
out[0] = uv[0]; out[0] = uv[0];
out[1] = uv[1]; out[1] = uv[1];
} }
void VertexDecoder::Step_TcU8PrescaleMorph() const { void VertexDecoder::Step_TcU8PrescaleMorph(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
float uv[2] = { 0, 0 }; float uv[2] = { 0, 0 };
for (int n = 0; n < morphcount; n++) { for (int n = 0; n < dec->morphcount; n++) {
float w = gstate_c.morphWeights[n]; float w = gstate_c.morphWeights[n];
const u8 *uvdata = (const u8 *)(ptr_ + onesize_*n + tcoff); const u8 *uvdata = (const u8 *)(ptr + dec->onesize_*n + dec->tcoff);
uv[0] += (float)uvdata[0] * (1.f / 128.f) * w; uv[0] += (float)uvdata[0] * (1.f / 128.f) * w;
uv[1] += (float)uvdata[1] * (1.f / 128.f) * w; uv[1] += (float)uvdata[1] * (1.f / 128.f) * w;
} }
float *out = (float *)(decoded_ + decFmt.uvoff); float *out = (float *)(decoded + dec->decFmt.uvoff);
out[0] = uv[0] * prescaleUV_->uScale + prescaleUV_->uOff; out[0] = uv[0] * dec->prescaleUV_->uScale + dec->prescaleUV_->uOff;
out[1] = uv[1] * prescaleUV_->vScale + prescaleUV_->vOff; out[1] = uv[1] * dec->prescaleUV_->vScale + dec->prescaleUV_->vOff;
} }
void VertexDecoder::Step_TcU16PrescaleMorph() const { void VertexDecoder::Step_TcU16PrescaleMorph(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
float uv[2] = { 0, 0 }; float uv[2] = { 0, 0 };
for (int n = 0; n < morphcount; n++) { for (int n = 0; n < dec->morphcount; n++) {
float w = gstate_c.morphWeights[n]; float w = gstate_c.morphWeights[n];
const u16_le *uvdata = (const u16_le *)(ptr_ + onesize_*n + tcoff); const u16_le *uvdata = (const u16_le *)(ptr + dec->onesize_*n + dec->tcoff);
uv[0] += (float)uvdata[0] * (1.f / 32768.f) * w; uv[0] += (float)uvdata[0] * (1.f / 32768.f) * w;
uv[1] += (float)uvdata[1] * (1.f / 32768.f) * w; uv[1] += (float)uvdata[1] * (1.f / 32768.f) * w;
} }
float *out = (float *)(decoded_ + decFmt.uvoff); float *out = (float *)(decoded + dec->decFmt.uvoff);
out[0] = uv[0] * prescaleUV_->uScale + prescaleUV_->uOff; out[0] = uv[0] * dec->prescaleUV_->uScale + dec->prescaleUV_->uOff;
out[1] = uv[1] * prescaleUV_->vScale + prescaleUV_->vOff; out[1] = uv[1] * dec->prescaleUV_->vScale + dec->prescaleUV_->vOff;
} }
void VertexDecoder::Step_TcU16DoublePrescaleMorph() const { void VertexDecoder::Step_TcU16DoublePrescaleMorph(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
float uv[2] = { 0, 0 }; float uv[2] = { 0, 0 };
for (int n = 0; n < morphcount; n++) { for (int n = 0; n < dec->morphcount; n++) {
float w = gstate_c.morphWeights[n]; float w = gstate_c.morphWeights[n];
const u16_le *uvdata = (const u16_le *)(ptr_ + onesize_*n + tcoff); const u16_le *uvdata = (const u16_le *)(ptr + dec->onesize_*n + dec->tcoff);
uv[0] += (float)uvdata[0] * (1.f / 16384.f) * w; uv[0] += (float)uvdata[0] * (1.f / 16384.f) * w;
uv[1] += (float)uvdata[1] * (1.f / 16384.f) * w; uv[1] += (float)uvdata[1] * (1.f / 16384.f) * w;
} }
float *out = (float *)(decoded_ + decFmt.uvoff); float *out = (float *)(decoded + dec->decFmt.uvoff);
out[0] = uv[0] * prescaleUV_->uScale + prescaleUV_->uOff; out[0] = uv[0] * dec->prescaleUV_->uScale + dec->prescaleUV_->uOff;
out[1] = uv[1] * prescaleUV_->vScale + prescaleUV_->vOff; out[1] = uv[1] * dec->prescaleUV_->vScale + dec->prescaleUV_->vOff;
} }
void VertexDecoder::Step_TcFloatPrescaleMorph() const { void VertexDecoder::Step_TcFloatPrescaleMorph(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
float uv[2] = { 0, 0 }; float uv[2] = { 0, 0 };
for (int n = 0; n < morphcount; n++) { for (int n = 0; n < dec->morphcount; n++) {
float w = gstate_c.morphWeights[n]; float w = gstate_c.morphWeights[n];
const float_le *uvdata = (const float_le *)(ptr_ + onesize_*n + tcoff); const float_le *uvdata = (const float_le *)(ptr + dec->onesize_*n + dec->tcoff);
uv[0] += (float)uvdata[0] * w; uv[0] += (float)uvdata[0] * w;
uv[1] += (float)uvdata[1] * w; uv[1] += (float)uvdata[1] * w;
} }
float *out = (float *)(decoded_ + decFmt.uvoff); float *out = (float *)(decoded + dec->decFmt.uvoff);
out[0] = uv[0] * prescaleUV_->uScale + prescaleUV_->uOff; out[0] = uv[0] * dec->prescaleUV_->uScale + dec->prescaleUV_->uOff;
out[1] = uv[1] * prescaleUV_->vScale + prescaleUV_->vOff; out[1] = uv[1] * dec->prescaleUV_->vScale + dec->prescaleUV_->vOff;
} }
void VertexDecoder::Step_ColorInvalid() const void VertexDecoder::Step_ColorInvalid(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
{
// Do nothing. This is only here to prevent crashes. // Do nothing. This is only here to prevent crashes.
} }
void VertexDecoder::Step_Color565() const void VertexDecoder::Step_Color565(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
{ u16 cdata = *(const u16_le *)(ptr + dec->coloff);
u16 cdata = *(const u16_le *)(ptr_ + coloff); u32 *c = (u32 *)(decoded + dec->decFmt.c0off);
u32 *c = (u32 *)(decoded_ + decFmt.c0off);
*c = RGB565ToRGBA8888(cdata); *c = RGB565ToRGBA8888(cdata);
} }
void VertexDecoder::Step_Color5551() const void VertexDecoder::Step_Color5551(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
{ u16 cdata = *(const u16_le *)(ptr + dec->coloff);
u16 cdata = *(const u16_le *)(ptr_ + coloff); u32 *c = (u32 *)(decoded + dec->decFmt.c0off);
u32 *c = (u32 *)(decoded_ + decFmt.c0off);
int alpha = (cdata >> 15); int alpha = (cdata >> 15);
if (!alpha) { if (!alpha) {
gstate_c.vertexFullAlpha = false; gstate_c.vertexFullAlpha = false;
@ -557,37 +542,34 @@ void VertexDecoder::Step_Color5551() const
*c = RGBA5551ToRGBA8888(cdata); *c = RGBA5551ToRGBA8888(cdata);
} }
void VertexDecoder::Step_Color4444() const void VertexDecoder::Step_Color4444(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
{ u16 cdata = *(const u16_le *)(ptr + dec->coloff);
u16 cdata = *(const u16_le *)(ptr_ + coloff); u32 *c = (u32 *)(decoded + dec->decFmt.c0off);
u32 *c = (u32 *)(decoded_ + decFmt.c0off);
if ((cdata >> 12) != 0xF) { if ((cdata >> 12) != 0xF) {
gstate_c.vertexFullAlpha = false; gstate_c.vertexFullAlpha = false;
} }
*c = RGBA4444ToRGBA8888(cdata); *c = RGBA4444ToRGBA8888(cdata);
} }
void VertexDecoder::Step_Color8888() const void VertexDecoder::Step_Color8888(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
{ u8 *c = decoded + dec->decFmt.c0off;
u8 *c = decoded_ + decFmt.c0off; const u8 *cdata = (const u8*)(ptr + dec->coloff);
const u8 *cdata = (const u8*)(ptr_ + coloff);
if (cdata[3] != 255) { if (cdata[3] != 255) {
gstate_c.vertexFullAlpha = false; gstate_c.vertexFullAlpha = false;
} }
memcpy(c, cdata, sizeof(u8) * 4); memcpy(c, cdata, sizeof(u8) * 4);
} }
void VertexDecoder::Step_Color565Morph() const void VertexDecoder::Step_Color565Morph(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
{
float col[3] = { 0 }; float col[3] = { 0 };
for (int n = 0; n < morphcount; n++) { for (int n = 0; n < dec->morphcount; n++) {
float w = gstate_c.morphWeights[n]; float w = gstate_c.morphWeights[n];
u16 cdata = *(const u16_le *)(ptr_ + onesize_*n + coloff); u16 cdata = *(const u16_le *)(ptr + dec->onesize_*n + dec->coloff);
col[0] += w * (cdata & 0x1f) * (255.0f / 31.0f); col[0] += w * (cdata & 0x1f) * (255.0f / 31.0f);
col[1] += w * ((cdata >> 5) & 0x3f) * (255.0f / 63.0f); col[1] += w * ((cdata >> 5) & 0x3f) * (255.0f / 63.0f);
col[2] += w * ((cdata >> 11) & 0x1f) * (255.0f / 31.0f); col[2] += w * ((cdata >> 11) & 0x1f) * (255.0f / 31.0f);
} }
u8 *c = decoded_ + decFmt.c0off; u8 *c = decoded + dec->decFmt.c0off;
for (int i = 0; i < 3; i++) { for (int i = 0; i < 3; i++) {
c[i] = clamp_u8((int)col[i]); c[i] = clamp_u8((int)col[i]);
} }
@ -595,155 +577,142 @@ void VertexDecoder::Step_Color565Morph() const
// Always full alpha. (Is this true??) // Always full alpha. (Is this true??)
} }
void VertexDecoder::Step_Color5551Morph() const void VertexDecoder::Step_Color5551Morph(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
{
float col[4] = { 0 }; float col[4] = { 0 };
for (int n = 0; n < morphcount; n++) { for (int n = 0; n < dec->morphcount; n++) {
float w = gstate_c.morphWeights[n]; float w = gstate_c.morphWeights[n];
u16 cdata = *(const u16_le *)(ptr_ + onesize_*n + coloff); u16 cdata = *(const u16_le *)(ptr + dec->onesize_*n + dec->coloff);
col[0] += w * (cdata & 0x1f) * (255.0f / 31.0f); col[0] += w * (cdata & 0x1f) * (255.0f / 31.0f);
col[1] += w * ((cdata >> 5) & 0x1f) * (255.0f / 31.0f); col[1] += w * ((cdata >> 5) & 0x1f) * (255.0f / 31.0f);
col[2] += w * ((cdata >> 10) & 0x1f) * (255.0f / 31.0f); col[2] += w * ((cdata >> 10) & 0x1f) * (255.0f / 31.0f);
col[3] += w * ((cdata >> 15) ? 255.0f : 0.0f); col[3] += w * ((cdata >> 15) ? 255.0f : 0.0f);
} }
u8 *c = decoded_ + decFmt.c0off; u8 *c = decoded + dec->decFmt.c0off;
for (int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++) {
c[i] = clamp_u8((int)col[i]); c[i] = clamp_u8((int)col[i]);
} }
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && (int)col[3] >= 255; gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && (int)col[3] >= 255;
} }
void VertexDecoder::Step_Color4444Morph() const void VertexDecoder::Step_Color4444Morph(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
{
float col[4] = { 0 }; float col[4] = { 0 };
for (int n = 0; n < morphcount; n++) { for (int n = 0; n < dec->morphcount; n++) {
float w = gstate_c.morphWeights[n]; float w = gstate_c.morphWeights[n];
u16 cdata = *(const u16_le *)(ptr_ + onesize_*n + coloff); u16 cdata = *(const u16_le *)(ptr + dec->onesize_*n + dec->coloff);
for (int j = 0; j < 4; j++) for (int j = 0; j < 4; j++)
col[j] += w * ((cdata >> (j * 4)) & 0xF) * (255.0f / 15.0f); col[j] += w * ((cdata >> (j * 4)) & 0xF) * (255.0f / 15.0f);
} }
u8 *c = decoded_ + decFmt.c0off; u8 *c = decoded + dec->decFmt.c0off;
for (int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++) {
c[i] = clamp_u8((int)col[i]); c[i] = clamp_u8((int)col[i]);
} }
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && (int)col[3] >= 255; gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && (int)col[3] >= 255;
} }
void VertexDecoder::Step_Color8888Morph() const void VertexDecoder::Step_Color8888Morph(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
{
float col[4] = { 0 }; float col[4] = { 0 };
for (int n = 0; n < morphcount; n++) { for (int n = 0; n < dec->morphcount; n++) {
float w = gstate_c.morphWeights[n]; float w = gstate_c.morphWeights[n];
const u8 *cdata = (const u8*)(ptr_ + onesize_*n + coloff); const u8 *cdata = (const u8*)(ptr + dec->onesize_*n + dec->coloff);
for (int j = 0; j < 4; j++) for (int j = 0; j < 4; j++)
col[j] += w * cdata[j]; col[j] += w * cdata[j];
} }
u8 *c = decoded_ + decFmt.c0off; u8 *c = decoded + dec->decFmt.c0off;
for (int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++) {
c[i] = clamp_u8((int)col[i]); c[i] = clamp_u8((int)col[i]);
} }
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && (int)col[3] >= 255; gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && (int)col[3] >= 255;
} }
void VertexDecoder::Step_NormalS8() const void VertexDecoder::Step_NormalS8(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
{ s8 *normal = (s8 *)(decoded + dec->decFmt.nrmoff);
s8 *normal = (s8 *)(decoded_ + decFmt.nrmoff); const s8 *sv = (const s8*)(ptr + dec->nrmoff);
const s8 *sv = (const s8*)(ptr_ + nrmoff);
for (int j = 0; j < 3; j++) for (int j = 0; j < 3; j++)
normal[j] = sv[j]; normal[j] = sv[j];
normal[3] = 0; normal[3] = 0;
} }
void VertexDecoder::Step_NormalS8ToFloat() const void VertexDecoder::Step_NormalS8ToFloat(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
{ float *normal = (float *)(decoded + dec->decFmt.nrmoff);
float *normal = (float *)(decoded_ + decFmt.nrmoff); const s8 *sv = (const s8*)(ptr + dec->nrmoff);
const s8 *sv = (const s8*)(ptr_ + nrmoff);
normal[0] = (float)sv[0] * (1.0f / 128.0f); normal[0] = (float)sv[0] * (1.0f / 128.0f);
normal[1] = (float)sv[1] * (1.0f / 128.0f); normal[1] = (float)sv[1] * (1.0f / 128.0f);
normal[2] = (float)sv[2] * (1.0f / 128.0f); normal[2] = (float)sv[2] * (1.0f / 128.0f);
} }
void VertexDecoder::Step_NormalS16() const void VertexDecoder::Step_NormalS16(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
{ s16 *normal = (s16 *)(decoded + dec->decFmt.nrmoff);
s16 *normal = (s16 *)(decoded_ + decFmt.nrmoff); const s16_le *sv = (const s16_le *)(ptr + dec->nrmoff);
const s16_le *sv = (const s16_le *)(ptr_ + nrmoff);
for (int j = 0; j < 3; j++) for (int j = 0; j < 3; j++)
normal[j] = sv[j]; normal[j] = sv[j];
normal[3] = 0; normal[3] = 0;
} }
void VertexDecoder::Step_NormalFloat() const void VertexDecoder::Step_NormalFloat(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
{ u32 *normal = (u32 *)(decoded + dec->decFmt.nrmoff);
u32 *normal = (u32 *)(decoded_ + decFmt.nrmoff); const u32_le *fv = (const u32_le *)(ptr + dec->nrmoff);
const u32_le *fv = (const u32_le *)(ptr_ + nrmoff);
for (int j = 0; j < 3; j++) for (int j = 0; j < 3; j++)
normal[j] = fv[j]; normal[j] = fv[j];
} }
void VertexDecoder::Step_NormalS8Skin() const void VertexDecoder::Step_NormalS8Skin(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
{ float *normal = (float *)(decoded + dec->decFmt.nrmoff);
float *normal = (float *)(decoded_ + decFmt.nrmoff); const s8 *sv = (const s8*)(ptr + dec->nrmoff);
const s8 *sv = (const s8*)(ptr_ + nrmoff);
const float fn[3] = { sv[0] * (1.0f / 128.0f), sv[1] * (1.0f / 128.0f), sv[2] * (1.0f / 128.0f) }; const float fn[3] = { sv[0] * (1.0f / 128.0f), sv[1] * (1.0f / 128.0f), sv[2] * (1.0f / 128.0f) };
Norm3ByMatrix43(normal, fn, skinMatrix); Norm3ByMatrix43(normal, fn, skinMatrix);
} }
void VertexDecoder::Step_NormalS16Skin() const void VertexDecoder::Step_NormalS16Skin(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
{ float *normal = (float *)(decoded + dec->decFmt.nrmoff);
float *normal = (float *)(decoded_ + decFmt.nrmoff); const s16_le *sv = (const s16_le *)(ptr + dec->nrmoff);
const s16_le *sv = (const s16_le *)(ptr_ + nrmoff);
const float fn[3] = { sv[0] * (1.0f / 32768.0f), sv[1] * (1.0f / 32768.0f), sv[2] * (1.0f / 32768.0f) }; const float fn[3] = { sv[0] * (1.0f / 32768.0f), sv[1] * (1.0f / 32768.0f), sv[2] * (1.0f / 32768.0f) };
Norm3ByMatrix43(normal, fn, skinMatrix); Norm3ByMatrix43(normal, fn, skinMatrix);
} }
void VertexDecoder::Step_NormalFloatSkin() const void VertexDecoder::Step_NormalFloatSkin(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
{ float *normal = (float *)(decoded + dec->decFmt.nrmoff);
float *normal = (float *)(decoded_ + decFmt.nrmoff); const float_le *fn = (const float_le *)(ptr + dec->nrmoff);
const float_le *fn = (const float_le *)(ptr_ + nrmoff);
Norm3ByMatrix43(normal, fn, skinMatrix); Norm3ByMatrix43(normal, fn, skinMatrix);
} }
void VertexDecoder::Step_NormalS8Morph() const void VertexDecoder::Step_NormalS8Morph(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
{ float *normal = (float *)(decoded + dec->decFmt.nrmoff);
float *normal = (float *)(decoded_ + decFmt.nrmoff);
memset(normal, 0, sizeof(float) * 3); memset(normal, 0, sizeof(float) * 3);
for (int n = 0; n < morphcount; n++) { for (int n = 0; n < dec->morphcount; n++) {
const s8 *bv = (const s8*)(ptr_ + onesize_*n + nrmoff); const s8 *bv = (const s8*)(ptr + dec->onesize_*n + dec->nrmoff);
const float multiplier = gstate_c.morphWeights[n] * (1.0f / 128.0f); const float multiplier = gstate_c.morphWeights[n] * (1.0f / 128.0f);
for (int j = 0; j < 3; j++) for (int j = 0; j < 3; j++)
normal[j] += bv[j] * multiplier; normal[j] += bv[j] * multiplier;
} }
} }
void VertexDecoder::Step_NormalS16Morph() const void VertexDecoder::Step_NormalS16Morph(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
{ float *normal = (float *)(decoded + dec->decFmt.nrmoff);
float *normal = (float *)(decoded_ + decFmt.nrmoff);
memset(normal, 0, sizeof(float) * 3); memset(normal, 0, sizeof(float) * 3);
for (int n = 0; n < morphcount; n++) { for (int n = 0; n < dec->morphcount; n++) {
const s16_le *sv = (const s16_le *)(ptr_ + onesize_*n + nrmoff); const s16_le *sv = (const s16_le *)(ptr + dec->onesize_*n + dec->nrmoff);
const float multiplier = gstate_c.morphWeights[n] * (1.0f / 32768.0f); const float multiplier = gstate_c.morphWeights[n] * (1.0f / 32768.0f);
for (int j = 0; j < 3; j++) for (int j = 0; j < 3; j++)
normal[j] += sv[j] * multiplier; normal[j] += sv[j] * multiplier;
} }
} }
void VertexDecoder::Step_NormalFloatMorph() const void VertexDecoder::Step_NormalFloatMorph(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
{ float *normal = (float *)(decoded + dec->decFmt.nrmoff);
float *normal = (float *)(decoded_ + decFmt.nrmoff);
memset(normal, 0, sizeof(float) * 3); memset(normal, 0, sizeof(float) * 3);
for (int n = 0; n < morphcount; n++) { for (int n = 0; n < dec->morphcount; n++) {
float multiplier = gstate_c.morphWeights[n]; float multiplier = gstate_c.morphWeights[n];
const float_le *fv = (const float_le *)(ptr_ + onesize_*n + nrmoff); const float_le *fv = (const float_le *)(ptr + dec->onesize_*n + dec->nrmoff);
for (int j = 0; j < 3; j++) for (int j = 0; j < 3; j++)
normal[j] += fv[j] * multiplier; normal[j] += fv[j] * multiplier;
} }
} }
void VertexDecoder::Step_NormalS8MorphSkin() const { void VertexDecoder::Step_NormalS8MorphSkin(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
float *normal = (float *)(decoded_ + decFmt.nrmoff); float *normal = (float *)(decoded + dec->decFmt.nrmoff);
float nrm[3]{}; float nrm[3]{};
for (int n = 0; n < morphcount; n++) { for (int n = 0; n < dec->morphcount; n++) {
const s8 *bv = (const s8*)(ptr_ + onesize_ * n + nrmoff); const s8 *bv = (const s8*)(ptr + dec->onesize_ * n + dec->nrmoff);
const float multiplier = gstate_c.morphWeights[n] * (1.0f / 128.0f); const float multiplier = gstate_c.morphWeights[n] * (1.0f / 128.0f);
for (int j = 0; j < 3; j++) for (int j = 0; j < 3; j++)
nrm[j] += bv[j] * multiplier; nrm[j] += bv[j] * multiplier;
@ -751,11 +720,11 @@ void VertexDecoder::Step_NormalS8MorphSkin() const {
Norm3ByMatrix43(normal, nrm, skinMatrix); Norm3ByMatrix43(normal, nrm, skinMatrix);
} }
void VertexDecoder::Step_NormalS16MorphSkin() const { void VertexDecoder::Step_NormalS16MorphSkin(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
float *normal = (float *)(decoded_ + decFmt.nrmoff); float *normal = (float *)(decoded + dec->decFmt.nrmoff);
float nrm[3]{}; float nrm[3]{};
for (int n = 0; n < morphcount; n++) { for (int n = 0; n < dec->morphcount; n++) {
const s16_le *sv = (const s16_le *)(ptr_ + onesize_ * n + nrmoff); const s16_le *sv = (const s16_le *)(ptr + dec->onesize_ * n + dec->nrmoff);
const float multiplier = gstate_c.morphWeights[n] * (1.0f / 32768.0f); const float multiplier = gstate_c.morphWeights[n] * (1.0f / 32768.0f);
for (int j = 0; j < 3; j++) for (int j = 0; j < 3; j++)
nrm[j] += sv[j] * multiplier; nrm[j] += sv[j] * multiplier;
@ -763,162 +732,151 @@ void VertexDecoder::Step_NormalS16MorphSkin() const {
Norm3ByMatrix43(normal, nrm, skinMatrix); Norm3ByMatrix43(normal, nrm, skinMatrix);
} }
void VertexDecoder::Step_NormalFloatMorphSkin() const { void VertexDecoder::Step_NormalFloatMorphSkin(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
float *normal = (float *)(decoded_ + decFmt.nrmoff); float *normal = (float *)(decoded + dec->decFmt.nrmoff);
float nrm[3]{}; float nrm[3]{};
for (int n = 0; n < morphcount; n++) { for (int n = 0; n < dec->morphcount; n++) {
float multiplier = gstate_c.morphWeights[n]; float multiplier = gstate_c.morphWeights[n];
const float_le *fv = (const float_le *)(ptr_ + onesize_ * n + nrmoff); const float_le *fv = (const float_le *)(ptr + dec->onesize_ * n + dec->nrmoff);
for (int j = 0; j < 3; j++) for (int j = 0; j < 3; j++)
nrm[j] += fv[j] * multiplier; nrm[j] += fv[j] * multiplier;
} }
Norm3ByMatrix43(normal, nrm, skinMatrix); Norm3ByMatrix43(normal, nrm, skinMatrix);
} }
void VertexDecoder::Step_PosS8() const void VertexDecoder::Step_PosS8(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
{ float *pos = (float *)(decoded + dec->decFmt.posoff);
float *pos = (float *)(decoded_ + decFmt.posoff); const s8 *sv = (const s8*)(ptr + dec->posoff);
const s8 *sv = (const s8*)(ptr_ + posoff);
for (int j = 0; j < 3; j++) for (int j = 0; j < 3; j++)
pos[j] = sv[j] * (1.0f / 128.0f); pos[j] = sv[j] * (1.0f / 128.0f);
} }
void VertexDecoder::Step_PosS16() const void VertexDecoder::Step_PosS16(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
{ float *pos = (float *)(decoded + dec->decFmt.posoff);
float *pos = (float *)(decoded_ + decFmt.posoff); const s16_le *sv = (const s16_le *)(ptr + dec->posoff);
const s16_le *sv = (const s16_le *)(ptr_ + posoff);
for (int j = 0; j < 3; j++) for (int j = 0; j < 3; j++)
pos[j] = sv[j] * (1.0f / 32768.0f); pos[j] = sv[j] * (1.0f / 32768.0f);
} }
void VertexDecoder::Step_PosFloat() const void VertexDecoder::Step_PosFloat(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
{ u8 *v = (u8 *)(decoded + dec->decFmt.posoff);
u8 *v = (u8 *)(decoded_ + decFmt.posoff); const u8 *fv = (const u8*)(ptr + dec->posoff);
const u8 *fv = (const u8*)(ptr_ + posoff);
memcpy(v, fv, 12); memcpy(v, fv, 12);
} }
void VertexDecoder::Step_PosS8Skin() const void VertexDecoder::Step_PosS8Skin(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
{ float *pos = (float *)(decoded + dec->decFmt.posoff);
float *pos = (float *)(decoded_ + decFmt.posoff); const s8 *sv = (const s8*)(ptr + dec->posoff);
const s8 *sv = (const s8*)(ptr_ + posoff);
const float fn[3] = { sv[0] * (1.0f / 128.0f), sv[1] * (1.0f / 128.0f), sv[2] * (1.0f / 128.0f) }; const float fn[3] = { sv[0] * (1.0f / 128.0f), sv[1] * (1.0f / 128.0f), sv[2] * (1.0f / 128.0f) };
Vec3ByMatrix43(pos, fn, skinMatrix); Vec3ByMatrix43(pos, fn, skinMatrix);
} }
void VertexDecoder::Step_PosS16Skin() const void VertexDecoder::Step_PosS16Skin(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
{ float *pos = (float *)(decoded + dec->decFmt.posoff);
float *pos = (float *)(decoded_ + decFmt.posoff); const s16_le *sv = (const s16_le *)(ptr + dec->posoff);
const s16_le *sv = (const s16_le *)(ptr_ + posoff);
const float fn[3] = { sv[0] * (1.0f / 32768.0f), sv[1] * (1.0f / 32768.0f), sv[2] * (1.0f / 32768.0f) }; const float fn[3] = { sv[0] * (1.0f / 32768.0f), sv[1] * (1.0f / 32768.0f), sv[2] * (1.0f / 32768.0f) };
Vec3ByMatrix43(pos, fn, skinMatrix); Vec3ByMatrix43(pos, fn, skinMatrix);
} }
void VertexDecoder::Step_PosFloatSkin() const void VertexDecoder::Step_PosFloatSkin(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
{ float *pos = (float *)(decoded + dec->decFmt.posoff);
float *pos = (float *)(decoded_ + decFmt.posoff); const float_le *fn = (const float_le *)(ptr + dec->posoff);
const float_le *fn = (const float_le *)(ptr_ + posoff);
Vec3ByMatrix43(pos, fn, skinMatrix); Vec3ByMatrix43(pos, fn, skinMatrix);
} }
void VertexDecoder::Step_PosInvalid() const { void VertexDecoder::Step_PosInvalid(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
// Invalid positions are just culled. Simulate by forcing invalid values. // Invalid positions are just culled. Simulate by forcing invalid values.
float *v = (float *)(decoded_ + decFmt.posoff); float *v = (float *)(decoded + dec->decFmt.posoff);
v[0] = std::numeric_limits<float>::infinity(); v[0] = std::numeric_limits<float>::infinity();
v[1] = std::numeric_limits<float>::infinity(); v[1] = std::numeric_limits<float>::infinity();
v[2] = std::numeric_limits<float>::infinity(); v[2] = std::numeric_limits<float>::infinity();
} }
void VertexDecoder::Step_PosS8Through() const { void VertexDecoder::Step_PosS8Through(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
// 8-bit positions in throughmode always decode to 0, depth included. // 8-bit positions in throughmode always decode to 0, depth included.
float *v = (float *)(decoded_ + decFmt.posoff); float *v = (float *)(decoded + dec->decFmt.posoff);
v[0] = 0; v[0] = 0;
v[1] = 0; v[1] = 0;
v[2] = 0; v[2] = 0;
} }
void VertexDecoder::Step_PosS16Through() const void VertexDecoder::Step_PosS16Through(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
{ float *v = (float *)(decoded + dec->decFmt.posoff);
float *v = (float *)(decoded_ + decFmt.posoff); const s16_le *sv = (const s16_le *)(ptr + dec->posoff);
const s16_le *sv = (const s16_le *)(ptr_ + posoff); const u16_le *uv = (const u16_le *)(ptr + dec->posoff);
const u16_le *uv = (const u16_le *)(ptr_ + posoff);
v[0] = sv[0]; v[0] = sv[0];
v[1] = sv[1]; v[1] = sv[1];
v[2] = uv[2]; v[2] = uv[2];
} }
void VertexDecoder::Step_PosFloatThrough() const void VertexDecoder::Step_PosFloatThrough(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
{ float *v = (float *)(decoded + dec->decFmt.posoff);
float *v = (float *)(decoded_ + decFmt.posoff); const float *fv = (const float *)(ptr + dec->posoff);
const float *fv = (const float *)(ptr_ + posoff);
memcpy(v, fv, 8); memcpy(v, fv, 8);
v[2] = fv[2] > 65535.0f ? 65535.0f : (fv[2] < 0.0f ? 0.0f : fv[2]); v[2] = fv[2] > 65535.0f ? 65535.0f : (fv[2] < 0.0f ? 0.0f : fv[2]);
} }
void VertexDecoder::Step_PosS8Morph() const void VertexDecoder::Step_PosS8Morph(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
{ float *v = (float *)(decoded + dec->decFmt.posoff);
float *v = (float *)(decoded_ + decFmt.posoff);
memset(v, 0, sizeof(float) * 3); memset(v, 0, sizeof(float) * 3);
for (int n = 0; n < morphcount; n++) { for (int n = 0; n < dec->morphcount; n++) {
const float multiplier = 1.0f / 128.0f; const float multiplier = 1.0f / 128.0f;
const s8 *sv = (const s8*)(ptr_ + onesize_*n + posoff); const s8 *sv = (const s8*)(ptr + dec->onesize_*n + dec->posoff);
for (int j = 0; j < 3; j++) for (int j = 0; j < 3; j++)
v[j] += (float)sv[j] * (multiplier * gstate_c.morphWeights[n]); v[j] += (float)sv[j] * (multiplier * gstate_c.morphWeights[n]);
} }
} }
void VertexDecoder::Step_PosS16Morph() const void VertexDecoder::Step_PosS16Morph(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
{ float *v = (float *)(decoded + dec->decFmt.posoff);
float *v = (float *)(decoded_ + decFmt.posoff);
memset(v, 0, sizeof(float) * 3); memset(v, 0, sizeof(float) * 3);
for (int n = 0; n < morphcount; n++) { for (int n = 0; n < dec->morphcount; n++) {
const float multiplier = 1.0f / 32768.0f; const float multiplier = 1.0f / 32768.0f;
const s16_le *sv = (const s16_le *)(ptr_ + onesize_*n + posoff); const s16_le *sv = (const s16_le *)(ptr + dec->onesize_*n + dec->posoff);
for (int j = 0; j < 3; j++) for (int j = 0; j < 3; j++)
v[j] += (float)sv[j] * (multiplier * gstate_c.morphWeights[n]); v[j] += (float)sv[j] * (multiplier * gstate_c.morphWeights[n]);
} }
} }
void VertexDecoder::Step_PosFloatMorph() const void VertexDecoder::Step_PosFloatMorph(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
{ float *v = (float *)(decoded + dec->decFmt.posoff);
float *v = (float *)(decoded_ + decFmt.posoff);
memset(v, 0, sizeof(float) * 3); memset(v, 0, sizeof(float) * 3);
for (int n = 0; n < morphcount; n++) { for (int n = 0; n < dec->morphcount; n++) {
const float_le *fv = (const float_le *)(ptr_ + onesize_*n + posoff); const float_le *fv = (const float_le *)(ptr + dec->onesize_*n + dec->posoff);
for (int j = 0; j < 3; j++) for (int j = 0; j < 3; j++)
v[j] += fv[j] * gstate_c.morphWeights[n]; v[j] += fv[j] * gstate_c.morphWeights[n];
} }
} }
void VertexDecoder::Step_PosS8MorphSkin() const { void VertexDecoder::Step_PosS8MorphSkin(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
float *v = (float *)(decoded_ + decFmt.posoff); float *v = (float *)(decoded + dec->decFmt.posoff);
float pos[3]{}; float pos[3]{};
for (int n = 0; n < morphcount; n++) { for (int n = 0; n < dec->morphcount; n++) {
const float multiplier = 1.0f / 128.0f; const float multiplier = 1.0f / 128.0f;
const s8 *sv = (const s8*)(ptr_ + onesize_ * n + posoff); const s8 *sv = (const s8*)(ptr + dec->onesize_ * n + dec->posoff);
for (int j = 0; j < 3; j++) for (int j = 0; j < 3; j++)
pos[j] += (float)sv[j] * (multiplier * gstate_c.morphWeights[n]); pos[j] += (float)sv[j] * (multiplier * gstate_c.morphWeights[n]);
} }
Vec3ByMatrix43(v, pos, skinMatrix); Vec3ByMatrix43(v, pos, skinMatrix);
} }
void VertexDecoder::Step_PosS16MorphSkin() const { void VertexDecoder::Step_PosS16MorphSkin(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
float *v = (float *)(decoded_ + decFmt.posoff); float *v = (float *)(decoded + dec->decFmt.posoff);
float pos[3]{}; float pos[3]{};
for (int n = 0; n < morphcount; n++) { for (int n = 0; n < dec->morphcount; n++) {
const float multiplier = 1.0f / 32768.0f; const float multiplier = 1.0f / 32768.0f;
const s16_le *sv = (const s16_le *)(ptr_ + onesize_ * n + posoff); const s16_le *sv = (const s16_le *)(ptr + dec->onesize_ * n + dec->posoff);
for (int j = 0; j < 3; j++) for (int j = 0; j < 3; j++)
pos[j] += (float)sv[j] * (multiplier * gstate_c.morphWeights[n]); pos[j] += (float)sv[j] * (multiplier * gstate_c.morphWeights[n]);
} }
Vec3ByMatrix43(v, pos, skinMatrix); Vec3ByMatrix43(v, pos, skinMatrix);
} }
void VertexDecoder::Step_PosFloatMorphSkin() const { void VertexDecoder::Step_PosFloatMorphSkin(const VertexDecoder *dec, const u8 *ptr, u8 *decoded) {
float *v = (float *)(decoded_ + decFmt.posoff); float *v = (float *)(decoded + dec->decFmt.posoff);
float pos[3]{}; float pos[3]{};
for (int n = 0; n < morphcount; n++) { for (int n = 0; n < dec->morphcount; n++) {
const float_le *fv = (const float_le *)(ptr_ + onesize_ * n + posoff); const float_le *fv = (const float_le *)(ptr + dec->onesize_ * n + dec->posoff);
for (int j = 0; j < 3; j++) for (int j = 0; j < 3; j++)
pos[j] += fv[j] * gstate_c.morphWeights[n]; pos[j] += fv[j] * gstate_c.morphWeights[n];
} }
@ -1369,19 +1327,18 @@ void VertexDecoder::DecodeVerts(u8 *decodedptr, const void *verts, const UVScale
// We've compiled the steps into optimized machine code, so just jump! // We've compiled the steps into optimized machine code, so just jump!
jitted_(startPtr, decodedptr, count, uvScaleOffset); jitted_(startPtr, decodedptr, count, uvScaleOffset);
} else { } else {
ptr_ = startPtr; const u8 *ptr = startPtr;
decoded_ = decodedptr; u8 *decoded = decodedptr;
prescaleUV_ = uvScaleOffset; prescaleUV_ = uvScaleOffset;
// Interpret the decode steps // Interpret the decode steps
for (; count; count--) { for (; count; count--) {
const int steps = numSteps_; const int steps = numSteps_;
for (int i = 0; i < steps; i++) { for (int i = 0; i < steps; i++) {
(this->*steps_[i])(); steps_[i](this, ptr, decoded);
} }
ptr_ += size; ptr += size;
decoded_ += stride; decoded += stride;
} }
if (jitted_ && validateJit) { if (jitted_ && validateJit) {
CompareToJit(startPtr, decodedptr, indexUpperBound - indexLowerBound + 1, uvScaleOffset); CompareToJit(startPtr, decodedptr, indexUpperBound - indexLowerBound + 1, uvScaleOffset);
} }

View File

@ -309,7 +309,7 @@ void PrintDecodedVertex(const VertexReader &vtx);
class VertexDecoder; class VertexDecoder;
class VertexDecoderJitCache; class VertexDecoderJitCache;
typedef void (VertexDecoder::*StepFunction)() const; typedef void (*StepFunction)(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
typedef void (VertexDecoderJitCache::*JitStepFunction)(); typedef void (VertexDecoderJitCache::*JitStepFunction)();
struct JitLookup { struct JitLookup {
@ -343,89 +343,89 @@ public:
std::string GetString(DebugShaderStringType stringType) const; std::string GetString(DebugShaderStringType stringType) const;
void Step_WeightsU8() const;
void Step_WeightsU16() const;
void Step_WeightsU8ToFloat() const;
void Step_WeightsU16ToFloat() const;
void Step_WeightsFloat() const;
void ComputeSkinMatrix(const float weights[8]) const; void ComputeSkinMatrix(const float weights[8]) const;
void Step_WeightsU8Skin() const; static void Step_WeightsU8(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_WeightsU16Skin() const; static void Step_WeightsU16(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_WeightsFloatSkin() const; static void Step_WeightsU8ToFloat(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
static void Step_WeightsU16ToFloat(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
static void Step_WeightsFloat(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_TcU8ToFloat() const; static void Step_WeightsU8Skin(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_TcU16ToFloat() const; static void Step_WeightsU16Skin(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_TcFloat() const; static void Step_WeightsFloatSkin(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_TcU8Prescale() const; static void Step_TcU8ToFloat(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_TcU16Prescale() const; static void Step_TcU16ToFloat(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_TcU16DoublePrescale() const; static void Step_TcFloat(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_TcFloatPrescale() const;
void Step_TcU16DoubleToFloat() const; static void Step_TcU8Prescale(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_TcU16ThroughToFloat() const; static void Step_TcU16Prescale(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_TcU16ThroughDoubleToFloat() const; static void Step_TcU16DoublePrescale(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_TcFloatThrough() const; static void Step_TcFloatPrescale(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_TcU8MorphToFloat() const; static void Step_TcU16DoubleToFloat(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_TcU16MorphToFloat() const; static void Step_TcU16ThroughToFloat(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_TcU16DoubleMorphToFloat() const; static void Step_TcU16ThroughDoubleToFloat(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_TcFloatMorph() const; static void Step_TcFloatThrough(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_TcU8PrescaleMorph() const;
void Step_TcU16PrescaleMorph() const;
void Step_TcU16DoublePrescaleMorph() const;
void Step_TcFloatPrescaleMorph() const;
void Step_ColorInvalid() const; static void Step_TcU8MorphToFloat(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_Color4444() const; static void Step_TcU16MorphToFloat(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_Color565() const; static void Step_TcU16DoubleMorphToFloat(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_Color5551() const; static void Step_TcFloatMorph(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_Color8888() const; static void Step_TcU8PrescaleMorph(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
static void Step_TcU16PrescaleMorph(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
static void Step_TcU16DoublePrescaleMorph(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
static void Step_TcFloatPrescaleMorph(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_Color4444Morph() const; static void Step_ColorInvalid(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_Color565Morph() const; static void Step_Color4444(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_Color5551Morph() const; static void Step_Color565(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_Color8888Morph() const; static void Step_Color5551(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
static void Step_Color8888(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_NormalS8() const; static void Step_Color4444Morph(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_NormalS8ToFloat() const; static void Step_Color565Morph(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_NormalS16() const; static void Step_Color5551Morph(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_NormalFloat() const; static void Step_Color8888Morph(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_NormalS8Skin() const; static void Step_NormalS8(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_NormalS16Skin() const; static void Step_NormalS8ToFloat(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_NormalFloatSkin() const; static void Step_NormalS16(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
static void Step_NormalFloat(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_NormalS8Morph() const; static void Step_NormalS8Skin(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_NormalS16Morph() const; static void Step_NormalS16Skin(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_NormalFloatMorph() const; static void Step_NormalFloatSkin(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_NormalS8MorphSkin() const; static void Step_NormalS8Morph(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_NormalS16MorphSkin() const; static void Step_NormalS16Morph(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_NormalFloatMorphSkin() const; static void Step_NormalFloatMorph(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_PosS8() const; static void Step_NormalS8MorphSkin(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_PosS16() const; static void Step_NormalS16MorphSkin(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_PosFloat() const; static void Step_NormalFloatMorphSkin(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_PosS8Skin() const; static void Step_PosS8(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_PosS16Skin() const; static void Step_PosS16(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_PosFloatSkin() const; static void Step_PosFloat(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_PosS8Morph() const; static void Step_PosS8Skin(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_PosS16Morph() const; static void Step_PosS16Skin(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_PosFloatMorph() const; static void Step_PosFloatSkin(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_PosS8MorphSkin() const; static void Step_PosS8Morph(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_PosS16MorphSkin() const; static void Step_PosS16Morph(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_PosFloatMorphSkin() const; static void Step_PosFloatMorph(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_PosInvalid() const; static void Step_PosS8MorphSkin(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_PosS8Through() const; static void Step_PosS16MorphSkin(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_PosS16Through() const; static void Step_PosFloatMorphSkin(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
void Step_PosFloatThrough() const;
static void Step_PosInvalid(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
static void Step_PosS8Through(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
static void Step_PosS16Through(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
static void Step_PosFloatThrough(const VertexDecoder *dec, const u8 *ptr, u8 *decoded);
// output must be big for safety. // output must be big for safety.
// Returns number of chars written. // Returns number of chars written.
@ -437,8 +437,6 @@ public:
} }
// Mutable decoder state // Mutable decoder state
mutable u8 *decoded_ = nullptr;
mutable const u8 *ptr_ = nullptr;
mutable const UVScale *prescaleUV_ = nullptr; mutable const UVScale *prescaleUV_ = nullptr;
JittedVertexDecoder jitted_ = 0; JittedVertexDecoder jitted_ = 0;
int32_t jittedSize_ = 0; int32_t jittedSize_ = 0;