mirror of
https://github.com/libretro/ppsspp.git
synced 2025-02-13 21:29:40 +00:00
Merge pull request #8699 from unknownbrackets/morph
Apply morphing to texcoord values
This commit is contained in:
commit
08ec95ae4f
@ -385,11 +385,18 @@ void VertexDecoder::Step_TcU8Prescale() const {
|
||||
|
||||
void VertexDecoder::Step_TcU16Prescale() const {
|
||||
float *uv = (float *)(decoded_ + decFmt.uvoff);
|
||||
const u16 *uvdata = (const u16_le *)(ptr_ + tcoff);
|
||||
const u16_le *uvdata = (const u16_le *)(ptr_ + tcoff);
|
||||
uv[0] = (float)uvdata[0] * (1.f / 32768.f) * gstate_c.uv.uScale + gstate_c.uv.uOff;
|
||||
uv[1] = (float)uvdata[1] * (1.f / 32768.f) * gstate_c.uv.vScale + gstate_c.uv.vOff;
|
||||
}
|
||||
|
||||
void VertexDecoder::Step_TcU16DoublePrescale() const {
|
||||
float *uv = (float *)(decoded_ + decFmt.uvoff);
|
||||
const u16_le *uvdata = (const u16_le *)(ptr_ + tcoff);
|
||||
uv[0] = (float)uvdata[0] * (1.f / 16384.f) * gstate_c.uv.uScale + gstate_c.uv.uOff;
|
||||
uv[1] = (float)uvdata[1] * (1.f / 16384.f) * gstate_c.uv.vScale + gstate_c.uv.vOff;
|
||||
}
|
||||
|
||||
void VertexDecoder::Step_TcFloatPrescale() const {
|
||||
float *uv = (float *)(decoded_ + decFmt.uvoff);
|
||||
const float *uvdata = (const float*)(ptr_ + tcoff);
|
||||
@ -397,6 +404,171 @@ void VertexDecoder::Step_TcFloatPrescale() const {
|
||||
uv[1] = uvdata[1] * gstate_c.uv.vScale + gstate_c.uv.vOff;
|
||||
}
|
||||
|
||||
void VertexDecoder::Step_TcU8Morph() const {
|
||||
float uv[2] = { 0, 0 };
|
||||
for (int n = 0; n < morphcount; n++) {
|
||||
float w = gstate_c.morphWeights[n];
|
||||
const u8 *uvdata = (const u8 *)(ptr_ + onesize_*n + tcoff);
|
||||
|
||||
uv[0] += (float)uvdata[0] * w;
|
||||
uv[1] += (float)uvdata[1] * w;
|
||||
}
|
||||
|
||||
u8 *out = decoded_ + decFmt.uvoff;
|
||||
out[0] = (int)uv[0];
|
||||
out[1] = (int)uv[1];
|
||||
}
|
||||
|
||||
void VertexDecoder::Step_TcU16Morph() const {
|
||||
float uv[2] = { 0, 0 };
|
||||
for (int n = 0; n < morphcount; n++) {
|
||||
float w = gstate_c.morphWeights[n];
|
||||
const u16_le *uvdata = (const u16_le *)(ptr_ + onesize_*n + tcoff);
|
||||
|
||||
uv[0] += (float)uvdata[0] * w;
|
||||
uv[1] += (float)uvdata[1] * w;
|
||||
}
|
||||
|
||||
u16_le *out = (u16_le *)(decoded_ + decFmt.uvoff);
|
||||
out[0] = (int)uv[0];
|
||||
out[1] = (int)uv[1];
|
||||
}
|
||||
|
||||
void VertexDecoder::Step_TcU16DoubleMorph() const {
|
||||
float uv[2] = { 0, 0 };
|
||||
for (int n = 0; n < morphcount; n++) {
|
||||
float w = gstate_c.morphWeights[n];
|
||||
const u16_le *uvdata = (const u16_le *)(ptr_ + onesize_*n + tcoff);
|
||||
|
||||
uv[0] += (float)uvdata[0] * w;
|
||||
uv[1] += (float)uvdata[1] * w;
|
||||
}
|
||||
|
||||
u16_le *out = (u16_le *)(decoded_ + decFmt.uvoff);
|
||||
out[0] = (int)(uv[0] * 2.0f);
|
||||
out[1] = (int)(uv[1] * 2.0f);
|
||||
}
|
||||
|
||||
void VertexDecoder::Step_TcU8MorphToFloat() const {
|
||||
float uv[2] = { 0, 0 };
|
||||
for (int n = 0; n < morphcount; n++) {
|
||||
float w = gstate_c.morphWeights[n];
|
||||
const u8 *uvdata = (const u8 *)(ptr_ + onesize_*n + tcoff);
|
||||
|
||||
uv[0] += (float)uvdata[0] * (1.f / 128.f) * w;
|
||||
uv[1] += (float)uvdata[1] * (1.f / 128.f) * w;
|
||||
}
|
||||
|
||||
float *out = (float *)(decoded_ + decFmt.uvoff);
|
||||
out[0] = uv[0];
|
||||
out[1] = uv[1];
|
||||
}
|
||||
|
||||
void VertexDecoder::Step_TcU16MorphToFloat() const {
|
||||
float uv[2] = { 0, 0 };
|
||||
for (int n = 0; n < morphcount; n++) {
|
||||
float w = gstate_c.morphWeights[n];
|
||||
const u16_le *uvdata = (const u16_le *)(ptr_ + onesize_*n + tcoff);
|
||||
|
||||
uv[0] += (float)uvdata[0] * (1.f / 32768.f) * w;
|
||||
uv[1] += (float)uvdata[1] * (1.f / 32768.f) * w;
|
||||
}
|
||||
|
||||
float *out = (float *)(decoded_ + decFmt.uvoff);
|
||||
out[0] = uv[0];
|
||||
out[1] = uv[1];
|
||||
}
|
||||
|
||||
void VertexDecoder::Step_TcU16DoubleMorphToFloat() const {
|
||||
float uv[2] = { 0, 0 };
|
||||
for (int n = 0; n < morphcount; n++) {
|
||||
float w = gstate_c.morphWeights[n];
|
||||
const u16_le *uvdata = (const u16_le *)(ptr_ + onesize_*n + tcoff);
|
||||
|
||||
uv[0] += (float)uvdata[0] * (1.f / 16384.f) * w;
|
||||
uv[1] += (float)uvdata[1] * (1.f / 16384.f) * w;
|
||||
}
|
||||
|
||||
float *out = (float *)(decoded_ + decFmt.uvoff);
|
||||
out[0] = uv[0];
|
||||
out[1] = uv[1];
|
||||
}
|
||||
|
||||
void VertexDecoder::Step_TcFloatMorph() const {
|
||||
float uv[2] = { 0, 0 };
|
||||
for (int n = 0; n < morphcount; n++) {
|
||||
float w = gstate_c.morphWeights[n];
|
||||
const float_le *uvdata = (const float_le *)(ptr_ + onesize_*n + tcoff);
|
||||
|
||||
uv[0] += (float)uvdata[0] * w;
|
||||
uv[1] += (float)uvdata[1] * w;
|
||||
}
|
||||
|
||||
float *out = (float *)(decoded_ + decFmt.uvoff);
|
||||
out[0] = uv[0];
|
||||
out[1] = uv[1];
|
||||
}
|
||||
|
||||
void VertexDecoder::Step_TcU8PrescaleMorph() const {
|
||||
float uv[2] = { 0, 0 };
|
||||
for (int n = 0; n < morphcount; n++) {
|
||||
float w = gstate_c.morphWeights[n];
|
||||
const u8 *uvdata = (const u8 *)(ptr_ + onesize_*n + tcoff);
|
||||
|
||||
uv[0] += (float)uvdata[0] * (1.f / 128.f) * w;
|
||||
uv[1] += (float)uvdata[1] * (1.f / 128.f) * w;
|
||||
}
|
||||
|
||||
float *out = (float *)(decoded_ + decFmt.uvoff);
|
||||
out[0] = uv[0] * gstate_c.uv.uScale + gstate_c.uv.uOff;
|
||||
out[1] = uv[1] * gstate_c.uv.vScale + gstate_c.uv.vOff;
|
||||
}
|
||||
|
||||
void VertexDecoder::Step_TcU16PrescaleMorph() const {
|
||||
float uv[2] = { 0, 0 };
|
||||
for (int n = 0; n < morphcount; n++) {
|
||||
float w = gstate_c.morphWeights[n];
|
||||
const u16_le *uvdata = (const u16_le *)(ptr_ + onesize_*n + tcoff);
|
||||
|
||||
uv[0] += (float)uvdata[0] * (1.f / 32768.f) * w;
|
||||
uv[1] += (float)uvdata[1] * (1.f / 32768.f) * w;
|
||||
}
|
||||
|
||||
float *out = (float *)(decoded_ + decFmt.uvoff);
|
||||
out[0] = uv[0] * gstate_c.uv.uScale + gstate_c.uv.uOff;
|
||||
out[1] = uv[1] * gstate_c.uv.vScale + gstate_c.uv.vOff;
|
||||
}
|
||||
|
||||
void VertexDecoder::Step_TcU16DoublePrescaleMorph() const {
|
||||
float uv[2] = { 0, 0 };
|
||||
for (int n = 0; n < morphcount; n++) {
|
||||
float w = gstate_c.morphWeights[n];
|
||||
const u16_le *uvdata = (const u16_le *)(ptr_ + onesize_*n + tcoff);
|
||||
|
||||
uv[0] += (float)uvdata[0] * (1.f / 16384.f) * w;
|
||||
uv[1] += (float)uvdata[1] * (1.f / 16384.f) * w;
|
||||
}
|
||||
|
||||
float *out = (float *)(decoded_ + decFmt.uvoff);
|
||||
out[0] = uv[0] * gstate_c.uv.uScale + gstate_c.uv.uOff;
|
||||
out[1] = uv[1] * gstate_c.uv.vScale + gstate_c.uv.vOff;
|
||||
}
|
||||
|
||||
void VertexDecoder::Step_TcFloatPrescaleMorph() const {
|
||||
float uv[2] = { 0, 0 };
|
||||
for (int n = 0; n < morphcount; n++) {
|
||||
float w = gstate_c.morphWeights[n];
|
||||
const float_le *uvdata = (const float_le *)(ptr_ + onesize_*n + tcoff);
|
||||
|
||||
uv[0] += (float)uvdata[0] * w;
|
||||
uv[1] += (float)uvdata[1] * w;
|
||||
}
|
||||
|
||||
float *out = (float *)(decoded_ + decFmt.uvoff);
|
||||
out[0] = uv[0] * gstate_c.uv.uScale + gstate_c.uv.uOff;
|
||||
out[1] = uv[1] * gstate_c.uv.vScale + gstate_c.uv.vOff;
|
||||
}
|
||||
|
||||
void VertexDecoder::Step_ColorInvalid() const
|
||||
{
|
||||
// Do nothing. This is only here to prevent crashes.
|
||||
@ -752,6 +924,55 @@ static const StepFunction tcstep_prescale[4] = {
|
||||
&VertexDecoder::Step_TcFloatPrescale,
|
||||
};
|
||||
|
||||
static const StepFunction tcstep_prescale_remaster[4] = {
|
||||
0,
|
||||
&VertexDecoder::Step_TcU8Prescale,
|
||||
&VertexDecoder::Step_TcU16DoublePrescale,
|
||||
&VertexDecoder::Step_TcFloatPrescale,
|
||||
};
|
||||
|
||||
static const StepFunction tcstep_prescale_morph[4] = {
|
||||
0,
|
||||
&VertexDecoder::Step_TcU8PrescaleMorph,
|
||||
&VertexDecoder::Step_TcU16PrescaleMorph,
|
||||
&VertexDecoder::Step_TcFloatPrescaleMorph,
|
||||
};
|
||||
|
||||
static const StepFunction tcstep_prescale_morph_remaster[4] = {
|
||||
0,
|
||||
&VertexDecoder::Step_TcU8PrescaleMorph,
|
||||
&VertexDecoder::Step_TcU16DoublePrescaleMorph,
|
||||
&VertexDecoder::Step_TcFloatPrescaleMorph,
|
||||
};
|
||||
|
||||
static const StepFunction tcstep_morph[4] = {
|
||||
0,
|
||||
&VertexDecoder::Step_TcU8Morph,
|
||||
&VertexDecoder::Step_TcU16Morph,
|
||||
&VertexDecoder::Step_TcFloatMorph,
|
||||
};
|
||||
|
||||
static const StepFunction tcstep_morph_remaster[4] = {
|
||||
0,
|
||||
&VertexDecoder::Step_TcU8Morph,
|
||||
&VertexDecoder::Step_TcU16DoubleMorph,
|
||||
&VertexDecoder::Step_TcFloatMorph,
|
||||
};
|
||||
|
||||
static const StepFunction tcstep_morphToFloat[4] = {
|
||||
0,
|
||||
&VertexDecoder::Step_TcU8MorphToFloat,
|
||||
&VertexDecoder::Step_TcU16MorphToFloat,
|
||||
&VertexDecoder::Step_TcFloatMorph,
|
||||
};
|
||||
|
||||
static const StepFunction tcstep_morph_remasterToFloat[4] = {
|
||||
0,
|
||||
&VertexDecoder::Step_TcU8MorphToFloat,
|
||||
&VertexDecoder::Step_TcU16DoubleMorphToFloat,
|
||||
&VertexDecoder::Step_TcFloatMorph,
|
||||
};
|
||||
|
||||
static const StepFunction tcstep_through[4] = {
|
||||
0,
|
||||
&VertexDecoder::Step_TcU8,
|
||||
@ -767,37 +988,34 @@ static const StepFunction tcstep_throughToFloat[4] = {
|
||||
};
|
||||
|
||||
// Some HD Remaster games double the u16 texture coordinates.
|
||||
static const StepFunction tcstep_Remaster[4] = {
|
||||
static const StepFunction tcstep_remaster[4] = {
|
||||
0,
|
||||
&VertexDecoder::Step_TcU8,
|
||||
&VertexDecoder::Step_TcU16Double,
|
||||
&VertexDecoder::Step_TcFloat,
|
||||
};
|
||||
|
||||
static const StepFunction tcstep_RemasterToFloat[4] = {
|
||||
static const StepFunction tcstep_remasterToFloat[4] = {
|
||||
0,
|
||||
&VertexDecoder::Step_TcU8ToFloat,
|
||||
&VertexDecoder::Step_TcU16DoubleToFloat,
|
||||
&VertexDecoder::Step_TcFloat,
|
||||
};
|
||||
|
||||
static const StepFunction tcstep_through_Remaster[4] = {
|
||||
static const StepFunction tcstep_through_remaster[4] = {
|
||||
0,
|
||||
&VertexDecoder::Step_TcU8,
|
||||
&VertexDecoder::Step_TcU16ThroughDouble,
|
||||
&VertexDecoder::Step_TcFloatThrough,
|
||||
};
|
||||
|
||||
static const StepFunction tcstep_through_RemasterToFloat[4] = {
|
||||
static const StepFunction tcstep_through_remasterToFloat[4] = {
|
||||
0,
|
||||
&VertexDecoder::Step_TcU8ToFloat,
|
||||
&VertexDecoder::Step_TcU16ThroughDoubleToFloat,
|
||||
&VertexDecoder::Step_TcFloatThrough,
|
||||
};
|
||||
|
||||
|
||||
// TODO: Tc Morph
|
||||
|
||||
static const StepFunction colstep[8] = {
|
||||
0,
|
||||
&VertexDecoder::Step_ColorInvalid,
|
||||
@ -955,19 +1173,26 @@ void VertexDecoder::SetVertexType(u32 fmt, const VertexDecoderOptions &options,
|
||||
biggest = tcalign[tc];
|
||||
|
||||
// NOTE: That we check getUVGenMode here means that we must include it in the decoder ID!
|
||||
if (g_Config.bPrescaleUV && !throughmode && (gstate.getUVGenMode() == 0 || gstate.getUVGenMode() == 3)) {
|
||||
steps_[numSteps_++] = tcstep_prescale[tc];
|
||||
if (g_Config.bPrescaleUV && !throughmode && (gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_COORDS || gstate.getUVGenMode() == GE_TEXMAP_UNKNOWN)) {
|
||||
if (g_DoubleTextureCoordinates)
|
||||
steps_[numSteps_++] = morphcount == 1 ? tcstep_prescale_remaster[tc] : tcstep_prescale_morph_remaster[tc];
|
||||
else
|
||||
steps_[numSteps_++] = morphcount == 1 ? tcstep_prescale[tc] : tcstep_prescale_morph[tc];
|
||||
decFmt.uvfmt = DEC_FLOAT_2;
|
||||
} else {
|
||||
if (options.expandAllUVtoFloat) {
|
||||
if (g_DoubleTextureCoordinates)
|
||||
steps_[numSteps_++] = throughmode ? tcstep_through_RemasterToFloat[tc] : tcstep_RemasterToFloat[tc];
|
||||
if (morphcount != 1 && !throughmode)
|
||||
steps_[numSteps_++] = g_DoubleTextureCoordinates ? tcstep_morph_remasterToFloat[tc] : tcstep_morphToFloat[tc];
|
||||
else if (g_DoubleTextureCoordinates)
|
||||
steps_[numSteps_++] = throughmode ? tcstep_through_remasterToFloat[tc] : tcstep_remasterToFloat[tc];
|
||||
else
|
||||
steps_[numSteps_++] = throughmode ? tcstep_throughToFloat[tc] : tcstepToFloat[tc];
|
||||
decFmt.uvfmt = DEC_FLOAT_2;
|
||||
} else {
|
||||
if (g_DoubleTextureCoordinates)
|
||||
steps_[numSteps_++] = throughmode ? tcstep_through_Remaster[tc] : tcstep_Remaster[tc];
|
||||
if (morphcount != 1 && !throughmode)
|
||||
steps_[numSteps_++] = g_DoubleTextureCoordinates ? tcstep_morph_remaster[tc] : tcstep_morph[tc];
|
||||
else if (g_DoubleTextureCoordinates)
|
||||
steps_[numSteps_++] = throughmode ? tcstep_through_remaster[tc] : tcstep_remaster[tc];
|
||||
else
|
||||
steps_[numSteps_++] = throughmode ? tcstep_through[tc] : tcstep[tc];
|
||||
|
||||
|
@ -484,6 +484,7 @@ public:
|
||||
|
||||
void Step_TcU8Prescale() const;
|
||||
void Step_TcU16Prescale() const;
|
||||
void Step_TcU16DoublePrescale() const;
|
||||
void Step_TcFloatPrescale() const;
|
||||
|
||||
void Step_TcU16Double() const;
|
||||
@ -494,6 +495,18 @@ public:
|
||||
void Step_TcU16ThroughDoubleToFloat() const;
|
||||
void Step_TcFloatThrough() const;
|
||||
|
||||
void Step_TcU8Morph() const;
|
||||
void Step_TcU16Morph() const;
|
||||
void Step_TcU16DoubleMorph() const;
|
||||
void Step_TcU8MorphToFloat() const;
|
||||
void Step_TcU16MorphToFloat() const;
|
||||
void Step_TcU16DoubleMorphToFloat() const;
|
||||
void Step_TcFloatMorph() const;
|
||||
void Step_TcU8PrescaleMorph() const;
|
||||
void Step_TcU16PrescaleMorph() const;
|
||||
void Step_TcU16DoublePrescaleMorph() const;
|
||||
void Step_TcFloatPrescaleMorph() const;
|
||||
|
||||
void Step_ColorInvalid() const;
|
||||
void Step_Color4444() const;
|
||||
void Step_Color565() const;
|
||||
@ -627,6 +640,14 @@ public:
|
||||
void Jit_TcU16Prescale();
|
||||
void Jit_TcFloatPrescale();
|
||||
|
||||
void Jit_TcAnyMorph(int bits);
|
||||
void Jit_TcU8MorphToFloat();
|
||||
void Jit_TcU16MorphToFloat();
|
||||
void Jit_TcFloatMorph();
|
||||
void Jit_TcU8PrescaleMorph();
|
||||
void Jit_TcU16PrescaleMorph();
|
||||
void Jit_TcFloatPrescaleMorph();
|
||||
|
||||
void Jit_TcU16Double();
|
||||
void Jit_TcU16ThroughDouble();
|
||||
|
||||
|
@ -105,6 +105,13 @@ static const JitLookup jitLookup[] = {
|
||||
{&VertexDecoder::Step_TcU16Prescale, &VertexDecoderJitCache::Jit_TcU16Prescale},
|
||||
{&VertexDecoder::Step_TcFloatPrescale, &VertexDecoderJitCache::Jit_TcFloatPrescale},
|
||||
|
||||
{&VertexDecoder::Step_TcU8MorphToFloat, &VertexDecoderJitCache::Jit_TcU8MorphToFloat},
|
||||
{&VertexDecoder::Step_TcU16MorphToFloat, &VertexDecoderJitCache::Jit_TcU16MorphToFloat},
|
||||
{&VertexDecoder::Step_TcFloatMorph, &VertexDecoderJitCache::Jit_TcFloatMorph},
|
||||
{&VertexDecoder::Step_TcU8PrescaleMorph, &VertexDecoderJitCache::Jit_TcU8PrescaleMorph},
|
||||
{&VertexDecoder::Step_TcU16PrescaleMorph, &VertexDecoderJitCache::Jit_TcU16PrescaleMorph},
|
||||
{&VertexDecoder::Step_TcFloatPrescaleMorph, &VertexDecoderJitCache::Jit_TcFloatPrescaleMorph},
|
||||
|
||||
{&VertexDecoder::Step_TcU16Through, &VertexDecoderJitCache::Jit_TcU16Through},
|
||||
{&VertexDecoder::Step_TcU16ThroughToFloat, &VertexDecoderJitCache::Jit_TcU16ThroughToFloat},
|
||||
{&VertexDecoder::Step_TcFloatThrough, &VertexDecoderJitCache::Jit_TcFloatThrough},
|
||||
@ -185,6 +192,11 @@ JittedVertexDecoder VertexDecoderJitCache::Compile(const VertexDecoder &dec, int
|
||||
dec.steps_[i] == &VertexDecoder::Step_TcFloatPrescale) {
|
||||
prescaleStep = true;
|
||||
}
|
||||
if (dec.steps_[i] == &VertexDecoder::Step_TcU8PrescaleMorph ||
|
||||
dec.steps_[i] == &VertexDecoder::Step_TcU16PrescaleMorph ||
|
||||
dec.steps_[i] == &VertexDecoder::Step_TcFloatPrescaleMorph) {
|
||||
prescaleStep = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Add code to convert matrices to 4x4.
|
||||
@ -747,6 +759,105 @@ void VertexDecoderJitCache::Jit_TcFloatPrescale() {
|
||||
MOVQ_xmm(MDisp(dstReg, dec_->decFmt.uvoff), fpScratchReg);
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_TcAnyMorph(int bits) {
|
||||
MOV(PTRBITS, R(tempReg1), ImmPtr(&gstate_c.morphWeights[0]));
|
||||
if (!cpu_info.bSSE4_1) {
|
||||
PXOR(fpScratchReg4, R(fpScratchReg4));
|
||||
}
|
||||
|
||||
bool first = true;
|
||||
for (int n = 0; n < dec_->morphcount; ++n) {
|
||||
const X64Reg reg = first ? fpScratchReg : fpScratchReg2;
|
||||
const OpArg src = MDisp(srcReg, dec_->onesize_ * n + dec_->tcoff);
|
||||
|
||||
// Load the actual values and convert to float.
|
||||
if (bits == 32) {
|
||||
// Two floats: just load as a MOVQ.
|
||||
MOVQ_xmm(reg, src);
|
||||
} else {
|
||||
if (bits == 8) {
|
||||
MOVZX(32, 16, tempReg2, src);
|
||||
MOVD_xmm(reg, R(tempReg2));
|
||||
} else {
|
||||
MOVD_xmm(reg, src);
|
||||
}
|
||||
if (cpu_info.bSSE4_1) {
|
||||
if (bits == 8) {
|
||||
PMOVZXBD(reg, R(reg));
|
||||
} else {
|
||||
PMOVZXWD(reg, R(reg));
|
||||
}
|
||||
} else {
|
||||
if (bits == 8) {
|
||||
PUNPCKLBW(reg, R(fpScratchReg4));
|
||||
}
|
||||
PUNPCKLWD(reg, R(fpScratchReg4));
|
||||
}
|
||||
|
||||
CVTDQ2PS(reg, R(reg));
|
||||
}
|
||||
|
||||
// And now scale by the weight.
|
||||
MOVSS(fpScratchReg3, MDisp(tempReg1, n * sizeof(float)));
|
||||
SHUFPS(fpScratchReg3, R(fpScratchReg3), _MM_SHUFFLE(0, 0, 0, 0));
|
||||
MULPS(reg, R(fpScratchReg3));
|
||||
|
||||
if (!first) {
|
||||
ADDPS(fpScratchReg, R(fpScratchReg2));
|
||||
} else {
|
||||
first = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_TcU8MorphToFloat() {
|
||||
Jit_TcAnyMorph(8);
|
||||
// They were all added (weighted) pre-normalize, we normalize once here.
|
||||
MULPS(fpScratchReg, M(&by128));
|
||||
MOVQ_xmm(MDisp(dstReg, dec_->decFmt.uvoff), fpScratchReg);
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_TcU16MorphToFloat() {
|
||||
Jit_TcAnyMorph(16);
|
||||
// They were all added (weighted) pre-normalize, we normalize once here.
|
||||
MULPS(fpScratchReg, M(&by32768));
|
||||
MOVQ_xmm(MDisp(dstReg, dec_->decFmt.uvoff), fpScratchReg);
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_TcFloatMorph() {
|
||||
Jit_TcAnyMorph(32);
|
||||
MOVQ_xmm(MDisp(dstReg, dec_->decFmt.uvoff), fpScratchReg);
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_TcU8PrescaleMorph() {
|
||||
Jit_TcAnyMorph(8);
|
||||
// The scale takes into account the u8 normalization.
|
||||
MULPS(fpScratchReg, R(fpScaleOffsetReg));
|
||||
SHUFPS(fpScaleOffsetReg, R(fpScaleOffsetReg), _MM_SHUFFLE(1, 0, 3, 2));
|
||||
ADDPS(fpScratchReg, R(fpScaleOffsetReg));
|
||||
SHUFPS(fpScaleOffsetReg, R(fpScaleOffsetReg), _MM_SHUFFLE(1, 0, 3, 2));
|
||||
MOVQ_xmm(MDisp(dstReg, dec_->decFmt.uvoff), fpScratchReg);
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_TcU16PrescaleMorph() {
|
||||
Jit_TcAnyMorph(16);
|
||||
// The scale takes into account the u16 normalization.
|
||||
MULPS(fpScratchReg, R(fpScaleOffsetReg));
|
||||
SHUFPS(fpScaleOffsetReg, R(fpScaleOffsetReg), _MM_SHUFFLE(1, 0, 3, 2));
|
||||
ADDPS(fpScratchReg, R(fpScaleOffsetReg));
|
||||
SHUFPS(fpScaleOffsetReg, R(fpScaleOffsetReg), _MM_SHUFFLE(1, 0, 3, 2));
|
||||
MOVQ_xmm(MDisp(dstReg, dec_->decFmt.uvoff), fpScratchReg);
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_TcFloatPrescaleMorph() {
|
||||
Jit_TcAnyMorph(32);
|
||||
MULPS(fpScratchReg, R(fpScaleOffsetReg));
|
||||
SHUFPS(fpScaleOffsetReg, R(fpScaleOffsetReg), _MM_SHUFFLE(1, 0, 3, 2));
|
||||
ADDPS(fpScratchReg, R(fpScaleOffsetReg));
|
||||
SHUFPS(fpScaleOffsetReg, R(fpScaleOffsetReg), _MM_SHUFFLE(1, 0, 3, 2));
|
||||
MOVQ_xmm(MDisp(dstReg, dec_->decFmt.uvoff), fpScratchReg);
|
||||
}
|
||||
|
||||
void VertexDecoderJitCache::Jit_TcU16Through() {
|
||||
MOV(32, R(tempReg1), MDisp(srcReg, dec_->tcoff));
|
||||
MOV(32, MDisp(dstReg, dec_->decFmt.uvoff), R(tempReg1));
|
||||
@ -960,7 +1071,9 @@ void VertexDecoderJitCache::Jit_Color5551() {
|
||||
|
||||
void VertexDecoderJitCache::Jit_Color8888Morph() {
|
||||
MOV(PTRBITS, R(tempReg1), ImmPtr(&gstate_c.morphWeights[0]));
|
||||
PXOR(fpScratchReg4, R(fpScratchReg4));
|
||||
if (!cpu_info.bSSE4_1) {
|
||||
PXOR(fpScratchReg4, R(fpScratchReg4));
|
||||
}
|
||||
|
||||
bool first = true;
|
||||
for (int n = 0; n < dec_->morphcount; ++n) {
|
||||
@ -994,7 +1107,9 @@ static const float MEMORY_ALIGNED16(byColor4444[4]) = { 255.0f / 15.0f, 255.0f /
|
||||
|
||||
void VertexDecoderJitCache::Jit_Color4444Morph() {
|
||||
MOV(PTRBITS, R(tempReg1), ImmPtr(&gstate_c.morphWeights[0]));
|
||||
PXOR(fpScratchReg4, R(fpScratchReg4));
|
||||
if (!cpu_info.bSSE4_1) {
|
||||
PXOR(fpScratchReg4, R(fpScratchReg4));
|
||||
}
|
||||
MOVDQA(XMM5, M(color4444mask));
|
||||
MOVAPS(XMM6, M(byColor4444));
|
||||
|
||||
@ -1376,7 +1491,9 @@ void VertexDecoderJitCache::Jit_AnyU16ToFloat(int srcoff, u32 bits) {
|
||||
|
||||
void VertexDecoderJitCache::Jit_AnyS8Morph(int srcoff, int dstoff) {
|
||||
MOV(PTRBITS, R(tempReg1), ImmPtr(&gstate_c.morphWeights[0]));
|
||||
PXOR(fpScratchReg4, R(fpScratchReg4));
|
||||
if (!cpu_info.bSSE4_1) {
|
||||
PXOR(fpScratchReg4, R(fpScratchReg4));
|
||||
}
|
||||
MOVAPS(XMM5, M(by128));
|
||||
|
||||
// Sum into fpScratchReg.
|
||||
@ -1414,7 +1531,9 @@ void VertexDecoderJitCache::Jit_AnyS8Morph(int srcoff, int dstoff) {
|
||||
|
||||
void VertexDecoderJitCache::Jit_AnyS16Morph(int srcoff, int dstoff) {
|
||||
MOV(PTRBITS, R(tempReg1), ImmPtr(&gstate_c.morphWeights[0]));
|
||||
PXOR(fpScratchReg4, R(fpScratchReg4));
|
||||
if (!cpu_info.bSSE4_1) {
|
||||
PXOR(fpScratchReg4, R(fpScratchReg4));
|
||||
}
|
||||
MOVAPS(XMM5, M(by32768));
|
||||
|
||||
// Sum into fpScratchReg.
|
||||
|
Loading…
x
Reference in New Issue
Block a user