mirror of
https://github.com/libretro/ppsspp.git
synced 2024-11-25 01:00:01 +00:00
vertexjit: Fix a silly mistake in weights > 4.
Darn switch, took me way too long to notice this.
This commit is contained in:
parent
717e6db3a7
commit
b589d3b170
@ -1634,7 +1634,7 @@ void ARMXEmitter::VMOV_neon(u32 Size, ARMReg Vd, ARMReg Rt, int lane)
|
||||
_assert_msg_(JIT, false, "VMOV_neon unsupported size");
|
||||
}
|
||||
|
||||
if (Vd < S0 && Rt >= D0)
|
||||
if (Vd < S0 && Rt >= D0 && Rt < Q0)
|
||||
{
|
||||
// Oh, reading to reg, our params are backwards.
|
||||
ARMReg Src = Rt;
|
||||
@ -1645,7 +1645,7 @@ void ARMXEmitter::VMOV_neon(u32 Size, ARMReg Vd, ARMReg Rt, int lane)
|
||||
|
||||
Write32(condition | (0xE1 << 20) | U | (opc1 << 21) | EncodeVn(Src) | (Dest << 12) | (0xB << 8) | (opc2 << 5) | (1 << 4));
|
||||
}
|
||||
else if (Rt < S0 && Vd >= D0)
|
||||
else if (Rt < S0 && Vd >= D0 && Vd < Q0)
|
||||
{
|
||||
ARMReg Src = Rt;
|
||||
ARMReg Dest = Vd;
|
||||
|
@ -430,16 +430,13 @@ void VertexDecoderJitCache::Jit_WeightsU8Skin() {
|
||||
if (NEONSkinning) {
|
||||
// Weight is first so srcReg is correct.
|
||||
switch (dec_->nweights) {
|
||||
case 1: LDRB(scratchReg2, srcReg, 0); break;
|
||||
case 2: LDRH(scratchReg2, srcReg, 0); break;
|
||||
case 3:
|
||||
case 4:
|
||||
case 1: VLD1_lane(I_8, neonScratchReg, srcReg, 0, false); break;
|
||||
case 2: VLD1_lane(I_16, neonScratchReg, srcReg, 0, false); break;
|
||||
default:
|
||||
// For 3, we over read, for over 4, we read more later.
|
||||
VLD1_lane(I_32, neonScratchReg, srcReg, 0, false);
|
||||
break;
|
||||
}
|
||||
if (dec_->nweights == 1 || dec_->nweights == 2) {
|
||||
VMOV_neon(I_32, neonScratchReg, scratchReg2, 0);
|
||||
}
|
||||
// This can be represented as a constant.
|
||||
VMOV_neon(F_32, Q3, by128);
|
||||
VMOVL(I_8 | I_UNSIGNED, neonScratchRegQ, neonScratchReg);
|
||||
@ -450,16 +447,13 @@ void VertexDecoderJitCache::Jit_WeightsU8Skin() {
|
||||
if (dec_->nweights > 4) {
|
||||
ADD(tempReg1, srcReg, 4 * sizeof(u8));
|
||||
switch (dec_->nweights) {
|
||||
case 5: LDRB(scratchReg2, tempReg1, 0); break;
|
||||
case 6: LDRH(scratchReg2, tempReg1, 0); break;
|
||||
case 5: VLD1_lane(I_8, neonScratchReg, tempReg1, 0, false); break;
|
||||
case 6: VLD1_lane(I_16, neonScratchReg, tempReg1, 0, false); break;
|
||||
case 7:
|
||||
case 8:
|
||||
VLD1_lane(I_32, neonScratchReg, tempReg1, 0, false);
|
||||
break;
|
||||
}
|
||||
if (dec_->nweights == 5 || dec_->nweights == 6) {
|
||||
VMOV_neon(I_32, neonScratchReg, scratchReg2, 0);
|
||||
}
|
||||
VMOVL(I_8 | I_UNSIGNED, neonScratchRegQ, neonScratchReg);
|
||||
VMOVL(I_16 | I_UNSIGNED, neonScratchRegQ, neonScratchReg);
|
||||
VCVT(F_32 | I_UNSIGNED, neonScratchRegQ, neonScratchRegQ);
|
||||
@ -480,13 +474,10 @@ void VertexDecoderJitCache::Jit_WeightsU8Skin() {
|
||||
void VertexDecoderJitCache::Jit_WeightsU16Skin() {
|
||||
if (NEONSkinning) {
|
||||
switch (dec_->nweights) {
|
||||
case 1:
|
||||
LDRH(scratchReg, srcReg, 0);
|
||||
VMOV_neon(I_32, neonScratchReg, scratchReg, 0);
|
||||
break;
|
||||
case 1: VLD1_lane(I_16, neonScratchReg, srcReg, 0, true); break;
|
||||
case 2: VLD1_lane(I_32, neonScratchReg, srcReg, 0, false); break;
|
||||
case 3:
|
||||
case 4:
|
||||
default:
|
||||
// For 3, we over read, for over 4, we read more later.
|
||||
VLD1(I_32, neonScratchReg, srcReg, 1, ALIGN_NONE);
|
||||
break;
|
||||
}
|
||||
@ -499,10 +490,7 @@ void VertexDecoderJitCache::Jit_WeightsU16Skin() {
|
||||
if (dec_->nweights > 4) {
|
||||
ADD(tempReg1, srcReg, 4 * sizeof(u16));
|
||||
switch (dec_->nweights) {
|
||||
case 5:
|
||||
LDRH(scratchReg, tempReg1, 0);
|
||||
VMOV_neon(I_32, neonScratchReg, scratchReg, 0);
|
||||
break;
|
||||
case 5: VLD1_lane(I_16, neonScratchReg, tempReg1, 0, true); break;
|
||||
case 6: VLD1_lane(I_32, neonScratchReg, tempReg1, 0, false); break;
|
||||
case 7:
|
||||
case 8:
|
||||
|
Loading…
Reference in New Issue
Block a user