mirror of
https://github.com/libretro/ppsspp.git
synced 2025-02-03 07:11:56 +00:00
ARM: Fix vsgn. Some vertex decoder tweaks.
This commit is contained in:
parent
030e6460cc
commit
e5e23f3ce1
@ -1824,8 +1824,6 @@ namespace MIPSComp
|
||||
}
|
||||
|
||||
void Jit::Comp_Vsgn(MIPSOpcode op) {
|
||||
DISABLE; // Breaks Miami Vice
|
||||
|
||||
NEON_IF_AVAILABLE(CompNEON_Vsgn);
|
||||
CONDITIONAL_DISABLE;
|
||||
if (js.HasUnknownPrefix()) {
|
||||
@ -1855,12 +1853,13 @@ namespace MIPSComp
|
||||
// care of NaNs like the interpreter (ignores them and just operates on the bits).
|
||||
MOVI2F(S0, 0.0f, R0);
|
||||
VCMP(fpr.V(sregs[i]), S0);
|
||||
VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags).
|
||||
VMOV(R0, fpr.V(sregs[i]));
|
||||
VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags).
|
||||
SetCC(CC_NEQ);
|
||||
AND(R0, R0, AssumeMakeOperand2(0x80000000));
|
||||
ORR(R0, R0, AssumeMakeOperand2(0x3F800000));
|
||||
SetCC(CC_EQ);
|
||||
MOV(R1, AssumeMakeOperand2(0x0));
|
||||
MOV(R0, AssumeMakeOperand2(0x0));
|
||||
SetCC(CC_AL);
|
||||
VMOV(fpr.V(tempregs[i]), R0);
|
||||
}
|
||||
|
@ -28,7 +28,7 @@ bool NEONSkinning = false;
|
||||
static float MEMORY_ALIGNED16(skinMatrix[12]);
|
||||
|
||||
// Will be used only in NEON mode.
|
||||
static float MEMORY_ALIGNED16(bones[16 * 8]); // First two will be kept in registers later
|
||||
static float MEMORY_ALIGNED16(bones[16 * 8]); // First two are kept in registers
|
||||
|
||||
// NEON register allocation:
|
||||
// Q0: Texture scaling parameters
|
||||
@ -315,7 +315,9 @@ void VertexDecoderJitCache::Jit_ApplyWeights() {
|
||||
if (NEONSkinning) {
|
||||
// We construct a matrix in Q4-Q7
|
||||
// We can use Q1 as temp.
|
||||
MOVP2R(scratchReg, bones);
|
||||
if (dec_->nweights >= 2) {
|
||||
MOVP2R(scratchReg, bones + 16 * 2);
|
||||
}
|
||||
for (int i = 0; i < dec_->nweights; i++) {
|
||||
switch (i) {
|
||||
case 0:
|
||||
@ -323,26 +325,36 @@ void VertexDecoderJitCache::Jit_ApplyWeights() {
|
||||
VMUL_scalar(F_32, Q5, Q9, QScalar(neonWeightRegs[0], 0));
|
||||
VMUL_scalar(F_32, Q6, Q10, QScalar(neonWeightRegs[0], 0));
|
||||
VMUL_scalar(F_32, Q7, Q11, QScalar(neonWeightRegs[0], 0));
|
||||
ADD(scratchReg, scratchReg, 16 * 4);
|
||||
break;
|
||||
case 1:
|
||||
VMLA_scalar(F_32, Q4, Q12, QScalar(neonWeightRegs[0], 1));
|
||||
VMLA_scalar(F_32, Q5, Q13, QScalar(neonWeightRegs[0], 1));
|
||||
VMLA_scalar(F_32, Q6, Q14, QScalar(neonWeightRegs[0], 1));
|
||||
VMLA_scalar(F_32, Q7, Q15, QScalar(neonWeightRegs[0], 1));
|
||||
ADD(scratchReg, scratchReg, 16 * 4);
|
||||
break;
|
||||
default:
|
||||
// Matrices 2+ need to be loaded from memory.
|
||||
// Wonder if we can free up one more register so we could get some parallelism.
|
||||
VLD1(F_32, Q1, scratchReg, 2, ALIGN_128, REG_UPDATE);
|
||||
VMLA_scalar(F_32, Q4, Q1, QScalar(neonWeightRegs[i >> 2], i & 3));
|
||||
VLD1(F_32, Q1, scratchReg, 2, ALIGN_128, REG_UPDATE);
|
||||
VMLA_scalar(F_32, Q5, Q1, QScalar(neonWeightRegs[i >> 2], i & 3));
|
||||
VLD1(F_32, Q1, scratchReg, 2, ALIGN_128, REG_UPDATE);
|
||||
VMLA_scalar(F_32, Q6, Q1, QScalar(neonWeightRegs[i >> 2], i & 3));
|
||||
VLD1(F_32, Q1, scratchReg, 2, ALIGN_128, REG_UPDATE);
|
||||
VMLA_scalar(F_32, Q7, Q1, QScalar(neonWeightRegs[i >> 2], i & 3));
|
||||
// Actually Q3 is free if there are fewer than 5 weights...
|
||||
if (dec_->nweights <= 4) {
|
||||
VLD1(F_32, Q1, scratchReg, 2, ALIGN_128, REG_UPDATE);
|
||||
VLD1(F_32, Q3, scratchReg, 2, ALIGN_128, REG_UPDATE);
|
||||
VMLA_scalar(F_32, Q4, Q1, QScalar(neonWeightRegs[i >> 2], i & 3));
|
||||
VMLA_scalar(F_32, Q5, Q3, QScalar(neonWeightRegs[i >> 2], i & 3));
|
||||
VLD1(F_32, Q1, scratchReg, 2, ALIGN_128, REG_UPDATE);
|
||||
VLD1(F_32, Q3, scratchReg, 2, ALIGN_128, REG_UPDATE);
|
||||
VMLA_scalar(F_32, Q6, Q1, QScalar(neonWeightRegs[i >> 2], i & 3));
|
||||
VMLA_scalar(F_32, Q7, Q3, QScalar(neonWeightRegs[i >> 2], i & 3));
|
||||
} else {
|
||||
VLD1(F_32, Q1, scratchReg, 2, ALIGN_128, REG_UPDATE);
|
||||
VMLA_scalar(F_32, Q4, Q1, QScalar(neonWeightRegs[i >> 2], i & 3));
|
||||
VLD1(F_32, Q1, scratchReg, 2, ALIGN_128, REG_UPDATE);
|
||||
VMLA_scalar(F_32, Q5, Q1, QScalar(neonWeightRegs[i >> 2], i & 3));
|
||||
VLD1(F_32, Q1, scratchReg, 2, ALIGN_128, REG_UPDATE);
|
||||
VMLA_scalar(F_32, Q6, Q1, QScalar(neonWeightRegs[i >> 2], i & 3));
|
||||
VLD1(F_32, Q1, scratchReg, 2, ALIGN_128, REG_UPDATE);
|
||||
VMLA_scalar(F_32, Q7, Q1, QScalar(neonWeightRegs[i >> 2], i & 3));
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user