mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-02-26 08:55:58 +00:00
Merge pull request #2093 from xsacha/armjit-vfpu
Armjit: Improve ApplyPrefixD. Add VABD to emitter.
This commit is contained in:
commit
7574ebbe58
@ -850,6 +850,21 @@ ARMReg ARMXEmitter::SubBase(ARMReg Reg)
|
||||
}
|
||||
|
||||
// NEON Specific
|
||||
void ARMXEmitter::VABD(IntegerSize Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
|
||||
{
|
||||
_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to VABD(float)");
|
||||
_assert_msg_(DYNA_REC, cpu_info.bNEON, "Can't use VABD(float) when CPU doesn't support it");
|
||||
bool register_quad = Vd >= Q0;
|
||||
|
||||
// Gets encoded as a double register
|
||||
Vd = SubBase(Vd);
|
||||
Vn = SubBase(Vn);
|
||||
Vm = SubBase(Vm);
|
||||
|
||||
Write32((0xF3 << 24) | ((Vd & 0x10) << 18) | (Size << 20) | ((Vn & 0xF) << 16) \
|
||||
| ((Vd & 0xF) << 12) | (0xD << 8) | ((Vn & 0x10) << 3) | (register_quad << 6) \
|
||||
| ((Vm & 0x10) << 2) | (Vm & 0xF));
|
||||
}
|
||||
void ARMXEmitter::VADD(IntegerSize Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
|
||||
{
|
||||
_assert_msg_(DYNA_REC, Vd >= D0, "Pass invalid register to VADD(integer)");
|
||||
@ -864,7 +879,7 @@ void ARMXEmitter::VADD(IntegerSize Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
|
||||
|
||||
Write32((0xF2 << 24) | ((Vd & 0x10) << 18) | (Size << 20) | ((Vn & 0xF) << 16) \
|
||||
| ((Vd & 0xF) << 12) | (0x8 << 8) | ((Vn & 0x10) << 3) | (register_quad << 6) \
|
||||
| ((Vm & 0x10) << 2) | (Vm & 0xF));
|
||||
| ((Vm & 0x10) << 2) | (Vm & 0xF));
|
||||
|
||||
}
|
||||
void ARMXEmitter::VSUB(IntegerSize Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
|
||||
@ -879,7 +894,7 @@ void ARMXEmitter::VSUB(IntegerSize Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
|
||||
|
||||
Write32((0xF3 << 24) | ((Vd & 0x10) << 18) | (Size << 20) | ((Vn & 0xF) << 16) \
|
||||
| ((Vd & 0xF) << 12) | (0x8 << 8) | ((Vn & 0x10) << 3) | (1 << 6) \
|
||||
| ((Vm & 0x10) << 2) | (Vm & 0xF));
|
||||
| ((Vm & 0x10) << 2) | (Vm & 0xF));
|
||||
}
|
||||
|
||||
// VFP Specific
|
||||
|
@ -530,6 +530,7 @@ public:
|
||||
// Subtracts the base from the register to give us the real one
|
||||
ARMReg SubBase(ARMReg Reg);
|
||||
// NEON Only
|
||||
void VABD(IntegerSize Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
|
||||
void VADD(IntegerSize Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
|
||||
void VSUB(IntegerSize Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
|
||||
|
||||
|
@ -17,6 +17,7 @@
|
||||
|
||||
#include "../../MemMap.h"
|
||||
#include "../MIPSAnalyst.h"
|
||||
#include "Common/CPUDetect.h"
|
||||
#include "Core/Config.h"
|
||||
#include "Core/Reporting.h"
|
||||
|
||||
@ -166,39 +167,22 @@ namespace MIPSComp
|
||||
|
||||
int sat = (js.prefixD >> (i * 2)) & 3;
|
||||
if (sat == 1) {
|
||||
// clamped = fabs(x) - fabs(x-0.5f) + 0.5f; // [ 0, 1]
|
||||
fpr.MapRegV(vregs[i], MAP_DIRTY);
|
||||
// ARGH this is a pain - no MIN/MAX in non-NEON VFP!
|
||||
// NEON does have min/max though so this should only be a fallback.
|
||||
MOVI2F(S0, 0.0, R0);
|
||||
MOVI2F(S1, 1.0, R0);
|
||||
VCMP(fpr.V(vregs[i]), S1);
|
||||
VMRS_APSR();
|
||||
SetCC(CC_GE);
|
||||
VMOV(fpr.V(vregs[i]), S1);
|
||||
FixupBranch skip = B();
|
||||
SetCC(CC_AL);
|
||||
VCMP(fpr.V(vregs[i]), S0);
|
||||
VMRS_APSR();
|
||||
SetCC(CC_LE);
|
||||
VMOV(fpr.V(vregs[i]), S0);
|
||||
SetCC(CC_AL);
|
||||
SetJumpTarget(skip);
|
||||
MOVI2F(S0, 0.5, R0);
|
||||
VABS(S1, fpr.V(vregs[i])); // S1 = fabs(x)
|
||||
VSUB(S2, fpr.V(vregs[i]), S0); // S2 = fabs(x-0.5f) {VABD}
|
||||
VABS(S2, S2);
|
||||
VSUB(fpr.V(vregs[i]), S1, S2); // v[i] = S1 - S2 + 0.5f
|
||||
VADD(fpr.V(vregs[i]), fpr.V(vregs[i]), S0);
|
||||
} else if (sat == 3) {
|
||||
// clamped = fabs(x) - fabs(x-1.0f); // [-1, 1]
|
||||
fpr.MapRegV(vregs[i], MAP_DIRTY);
|
||||
MOVI2F(S0, -1.0, R0);
|
||||
MOVI2F(S1, 1.0, R0);
|
||||
VCMP(fpr.V(vregs[i]), S1);
|
||||
VMRS_APSR();
|
||||
SetCC(CC_GE);
|
||||
VMOV(fpr.V(vregs[i]), S1);
|
||||
FixupBranch skip = B();
|
||||
SetCC(CC_AL);
|
||||
VCMP(fpr.V(vregs[i]), S0);
|
||||
VMRS_APSR();
|
||||
SetCC(CC_LE);
|
||||
VMOV(fpr.V(vregs[i]), S0);
|
||||
SetCC(CC_AL);
|
||||
SetJumpTarget(skip);
|
||||
MOVI2F(S0, 1.0, R0);
|
||||
VABS(S1, fpr.V(vregs[i])); // S1 = fabs(x)
|
||||
VSUB(S2, fpr.V(vregs[i]), S0); // S2 = fabs(x-1.0f) {VABD}
|
||||
VABS(S2, S2);
|
||||
VSUB(fpr.V(vregs[i]), S1, S2); // v[i] = S1 - S2
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -115,9 +115,9 @@ return floor(x+.5);
|
||||
|
||||
void ApplyPrefixST(float *v, u32 data, VectorSize size)
|
||||
{
|
||||
// Possible optimization shortcut:
|
||||
if (data == 0xe4)
|
||||
return;
|
||||
// Possible optimization shortcut:
|
||||
if (data == 0xe4)
|
||||
return;
|
||||
|
||||
int n = GetNumVectorElements(size);
|
||||
float origV[4];
|
||||
|
Loading…
x
Reference in New Issue
Block a user