mirror of
https://github.com/hrydgard/ppsspp.git
synced 2024-11-23 05:19:56 +00:00
Some more armjit work (ext, ins) and VFPU prefix clamps (not enabled)
This commit is contained in:
parent
3c640a0f1e
commit
650c02c3a5
@ -83,6 +83,14 @@ bool TryMakeOperand2_AllowNegation(s32 imm, Operand2 &op2, bool *negated)
|
||||
}
|
||||
}
|
||||
|
||||
void ARMXEmitter::MOVI2F(ARMReg dest, float val, ARMReg tempReg)
|
||||
{
|
||||
union {float f; u32 u;} conv;
|
||||
conv.f = val;
|
||||
MOVI2R(tempReg, conv.u);
|
||||
VMOV(dest, R0);
|
||||
}
|
||||
|
||||
void ARMXEmitter::MOVI2R(ARMReg reg, u32 val, bool optimize)
|
||||
{
|
||||
Operand2 op2;
|
||||
@ -501,10 +509,23 @@ void ARMXEmitter::SMLAL(ARMReg destLo, ARMReg destHi, ARMReg rm, ARMReg rn)
|
||||
Write4OpMultiply(0xE, destLo, destHi, rn, rm);
|
||||
}
|
||||
|
||||
void ARMXEmitter::UBFX(ARMReg dest, ARMReg rn, u8 lsb, u8 width)
|
||||
{
|
||||
Write32(condition | (0x7E0 << 16) | ((width - 1) << 16) | (dest << 12) | (lsb << 7) | (5 << 4) | rn);
|
||||
}
|
||||
|
||||
void ARMXEmitter::BFI(ARMReg rd, ARMReg rn, u8 lsb, u8 width)
|
||||
{
|
||||
u32 msb = (lsb + width - 1);
|
||||
if (msb > 31) msb = 31;
|
||||
Write32(condition | (0x7C0 << 16) | (msb << 16) | (rd << 12) | (lsb << 7) | (1 << 4) | rn);
|
||||
}
|
||||
|
||||
void ARMXEmitter::SXTB (ARMReg dest, ARMReg op2)
|
||||
{
|
||||
Write32(condition | (0x6AF << 16) | (dest << 12) | (7 << 4) | op2);
|
||||
}
|
||||
|
||||
void ARMXEmitter::SXTH (ARMReg dest, ARMReg op2, u8 rotation)
|
||||
{
|
||||
SXTAH(dest, (ARMReg)15, op2, rotation);
|
||||
|
@ -469,6 +469,9 @@ public:
|
||||
void SXTB(ARMReg dest, ARMReg op2);
|
||||
void SXTH(ARMReg dest, ARMReg op2, u8 rotation = 0);
|
||||
void SXTAH(ARMReg dest, ARMReg src, ARMReg op2, u8 rotation = 0);
|
||||
void BFI(ARMReg rd, ARMReg rn, u8 lsb, u8 width);
|
||||
void UBFX(ARMReg dest, ARMReg op2, u8 lsb, u8 width);
|
||||
|
||||
// Using just MSR here messes with our defines on the PPC side of stuff (when this code was in dolphin...)
|
||||
// Just need to put an underscore here, bit annoying.
|
||||
void _MSR (bool nzcvq, bool g, Operand2 op2);
|
||||
@ -542,6 +545,7 @@ public:
|
||||
|
||||
// Wrapper around MOVT/MOVW with fallbacks.
|
||||
void MOVI2R(ARMReg reg, u32 val, bool optimize = true);
|
||||
void MOVI2F(ARMReg dest, float val, ARMReg tempReg);
|
||||
}; // class ARMXEmitter
|
||||
|
||||
|
||||
|
@ -25,6 +25,10 @@
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning (disable:4100)
|
||||
#endif
|
||||
|
||||
#if defined(ARM)
|
||||
#define _M_ARM32
|
||||
#endif
|
||||
@ -77,7 +81,9 @@ private:
|
||||
#error needs at least version 1000 of MSC
|
||||
#endif
|
||||
|
||||
#ifndef NOMINMAX
|
||||
#define NOMINMAX
|
||||
#endif
|
||||
|
||||
// Memory leak checks
|
||||
#define CHECK_HEAP_INTEGRITY()
|
||||
|
@ -16,6 +16,7 @@
|
||||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
#include "ArmJit.h"
|
||||
#include "Common/CPUDetect.h"
|
||||
|
||||
using namespace MIPSAnalyst;
|
||||
#define _RS ((op>>21) & 0x1F)
|
||||
@ -312,10 +313,15 @@ namespace MIPSComp
|
||||
void Jit::Comp_Special3(u32 op)
|
||||
{
|
||||
CONDITIONAL_DISABLE;
|
||||
|
||||
if (!cpu_info.bArmV7) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
int rs = _RS;
|
||||
int rt = _RT;
|
||||
int pos = _POS;
|
||||
|
||||
int pos = _POS;
|
||||
int size = _SIZE + 1;
|
||||
u32 mask = 0xFFFFFFFFUL >> (32 - size);
|
||||
|
||||
@ -331,12 +337,23 @@ namespace MIPSComp
|
||||
gpr.SetImm(rt, (gpr.GetImm(rs) >> pos) & mask);
|
||||
return;
|
||||
}
|
||||
DISABLE;
|
||||
// TODO. There's an NEON ARM instruction for this, VEXT - worth using?
|
||||
// TODO: Make fallback for when UBFX isn't available
|
||||
gpr.MapDirtyIn(rt, rs, false);
|
||||
UBFX(gpr.R(rt), gpr.R(rs), pos, size);
|
||||
break;
|
||||
|
||||
case 0x4: //ins
|
||||
DISABLE;
|
||||
if (gpr.IsImm(rs) && gpr.IsImm(rt))
|
||||
{
|
||||
u32 sourcemask = mask >> pos;
|
||||
u32 destmask = ~(sourcemask << pos);
|
||||
u32 inserted = (gpr.GetImm(rs) & sourcemask) << pos;
|
||||
gpr.SetImm(rt, (gpr.GetImm(rt) & destmask) | inserted);
|
||||
return;
|
||||
}
|
||||
gpr.MapDirtyIn(rt, rs, false);
|
||||
// TODO: Make fallback for when BFI isn't available
|
||||
BFI(gpr.R(rt), gpr.R(rs), pos, size);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -126,8 +126,8 @@ namespace MIPSComp
|
||||
VMOV(fpr.V(vregs[i]), fpr.V(origV[regnum]));
|
||||
}
|
||||
} else {
|
||||
MOVI2R(R0, (u32)(constantArray[regnum + (abs<<2)]));
|
||||
VMOV(fpr.V(vregs[i]), R0);
|
||||
// TODO: There is VMOV s, imm on ARM, that can generate some of these constants. Not 1/3 or 1/6 though.
|
||||
MOVI2F(fpr.V(vregs[i]), constantArray[regnum + (abs<<2)], R0);
|
||||
}
|
||||
|
||||
// TODO: This can be integrated into the VABS / VMOV above, and also the constants.
|
||||
@ -147,8 +147,7 @@ namespace MIPSComp
|
||||
return;
|
||||
|
||||
int n = GetNumVectorElements(sz);
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
for (int i = 0; i < n; i++) {
|
||||
// Hopefully this is rare, we'll just write it into a reg we drop.
|
||||
if (js.VfpuWriteMask(i))
|
||||
regs[i] = fpr.GetTempV();
|
||||
@ -160,26 +159,45 @@ namespace MIPSComp
|
||||
if (!js.prefixD) return;
|
||||
|
||||
int n = GetNumVectorElements(sz);
|
||||
for (int i = 0; i < n; i++)
|
||||
{
|
||||
for (int i = 0; i < n; i++) {
|
||||
if (js.VfpuWriteMask(i))
|
||||
continue;
|
||||
|
||||
int sat = (js.prefixD >> (i * 2)) & 3;
|
||||
if (sat == 1)
|
||||
{
|
||||
if (sat == 1) {
|
||||
fpr.MapRegV(vregs[i], MAP_DIRTY);
|
||||
// ARGH this is a pain - no MIN/MAX in non-NEON VFP!
|
||||
// TODO
|
||||
|
||||
//MAXSS(fpr.VX(vregs[i]), M((void *)&zero));
|
||||
//MINSS(fpr.VX(vregs[i]), M((void *)&one));
|
||||
}
|
||||
else if (sat == 3)
|
||||
{
|
||||
// NEON does have min/max though so this should only be a fallback.
|
||||
MOVI2F(S0, 0.0, R0);
|
||||
MOVI2F(S1, 1.0, R0);
|
||||
VCMP(fpr.V(vregs[i]), S1);
|
||||
VMRS_APSR();
|
||||
SetCC(CC_GE);
|
||||
VMOV(fpr.V(vregs[i]), S1);
|
||||
FixupBranch skip = B();
|
||||
SetCC(CC_AL);
|
||||
VCMP(fpr.V(vregs[i]), S0);
|
||||
VMRS_APSR();
|
||||
SetCC(CC_LE);
|
||||
VMOV(fpr.V(vregs[i]), S0);
|
||||
SetCC(CC_AL);
|
||||
SetJumpTarget(skip);
|
||||
} else if (sat == 3) {
|
||||
fpr.MapRegV(vregs[i], MAP_DIRTY);
|
||||
//MAXSS(fpr.VX(vregs[i]), M((void *)&minus_one));
|
||||
//MINSS(fpr.VX(vregs[i]), M((void *)&one));
|
||||
MOVI2F(S0, -1.0, R0);
|
||||
MOVI2F(S1, 1.0, R0);
|
||||
VCMP(fpr.V(vregs[i]), S1);
|
||||
VMRS_APSR();
|
||||
SetCC(CC_GE);
|
||||
VMOV(fpr.V(vregs[i]), S1);
|
||||
FixupBranch skip = B();
|
||||
SetCC(CC_AL);
|
||||
VCMP(fpr.V(vregs[i]), S0);
|
||||
VMRS_APSR();
|
||||
SetCC(CC_LE);
|
||||
VMOV(fpr.V(vregs[i]), S0);
|
||||
SetCC(CC_AL);
|
||||
SetJumpTarget(skip);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -299,8 +317,7 @@ namespace MIPSComp
|
||||
VMUL(S0, fpr.V(sregs[0]), fpr.V(tregs[0]));
|
||||
|
||||
int n = GetNumVectorElements(sz);
|
||||
for (int i = 1; i < n; i++)
|
||||
{
|
||||
for (int i = 1; i < n; i++) {
|
||||
// sum += s[i]*t[i];
|
||||
VMUL(S1, fpr.V(sregs[i]), fpr.V(tregs[i]));
|
||||
VADD(S0, S0, S1);
|
||||
@ -506,7 +523,6 @@ namespace MIPSComp
|
||||
|
||||
void Jit::Comp_Mftv(u32 op)
|
||||
{
|
||||
// DISABLE;
|
||||
CONDITIONAL_DISABLE;
|
||||
|
||||
int imm = op & 0xFF;
|
||||
|
@ -235,8 +235,7 @@ private:
|
||||
}
|
||||
void GetVectorRegsPrefixD(u8 *regs, VectorSize sz, int vectorReg);
|
||||
|
||||
|
||||
/*
|
||||
/*
|
||||
void CompImmLogic(u32 op, void (ARMXEmitter::*arith)(int, const OpArg &, const OpArg &));
|
||||
void CompTriArith(u32 op, void (ARMXEmitter::*arith)(int, const OpArg &, const OpArg &));
|
||||
void CompShiftImm(u32 op, void (ARMXEmitter::*shift)(int, OpArg, OpArg));
|
||||
|
@ -1605,11 +1605,11 @@ namespace MIPSInt
|
||||
// negative NAN seems different? TODO
|
||||
switch ((op >> 23) & 3) {
|
||||
case 2: // vmin
|
||||
for (int i = 0; i < GetNumVectorElements(sz); i++)
|
||||
for (int i = 0; i < numElements; i++)
|
||||
d[i] = isnan(t[i]) ? s[i] : (isnan(s[i]) ? t[i] : std::min(s[i], t[i]));
|
||||
break;
|
||||
case 3: // vmax
|
||||
for (int i = 0; i < GetNumVectorElements(sz); i++)
|
||||
for (int i = 0; i < numElements; i++)
|
||||
d[i] = isnan(t[i]) ? t[i] : (isnan(s[i]) ? s[i] : std::max(s[i], t[i]));
|
||||
break;
|
||||
default:
|
||||
|
Loading…
Reference in New Issue
Block a user