Some more armjit work (ext, ins) and VFPU prefix clamps (not enabled)

This commit is contained in:
Henrik Rydgard 2013-03-03 16:40:58 +01:00
parent 3c640a0f1e
commit 650c02c3a5
7 changed files with 91 additions and 28 deletions

View File

@ -83,6 +83,14 @@ bool TryMakeOperand2_AllowNegation(s32 imm, Operand2 &op2, bool *negated)
}
}
void ARMXEmitter::MOVI2F(ARMReg dest, float val, ARMReg tempReg)
{
union {float f; u32 u;} conv;
conv.f = val;
MOVI2R(tempReg, conv.u);
VMOV(dest, R0);
}
void ARMXEmitter::MOVI2R(ARMReg reg, u32 val, bool optimize)
{
Operand2 op2;
@ -501,10 +509,23 @@ void ARMXEmitter::SMLAL(ARMReg destLo, ARMReg destHi, ARMReg rm, ARMReg rn)
Write4OpMultiply(0xE, destLo, destHi, rn, rm);
}
void ARMXEmitter::UBFX(ARMReg dest, ARMReg rn, u8 lsb, u8 width)
{
Write32(condition | (0x7E0 << 16) | ((width - 1) << 16) | (dest << 12) | (lsb << 7) | (5 << 4) | rn);
}
void ARMXEmitter::BFI(ARMReg rd, ARMReg rn, u8 lsb, u8 width)
{
u32 msb = (lsb + width - 1);
if (msb > 31) msb = 31;
Write32(condition | (0x7C0 << 16) | (msb << 16) | (rd << 12) | (lsb << 7) | (1 << 4) | rn);
}
void ARMXEmitter::SXTB (ARMReg dest, ARMReg op2)
{
Write32(condition | (0x6AF << 16) | (dest << 12) | (7 << 4) | op2);
}
void ARMXEmitter::SXTH (ARMReg dest, ARMReg op2, u8 rotation)
{
SXTAH(dest, (ARMReg)15, op2, rotation);

View File

@ -469,6 +469,9 @@ public:
void SXTB(ARMReg dest, ARMReg op2);
void SXTH(ARMReg dest, ARMReg op2, u8 rotation = 0);
void SXTAH(ARMReg dest, ARMReg src, ARMReg op2, u8 rotation = 0);
void BFI(ARMReg rd, ARMReg rn, u8 lsb, u8 width);
void UBFX(ARMReg dest, ARMReg op2, u8 lsb, u8 width);
// Using just MSR here messes with our defines on the PPC side of stuff (when this code was in dolphin...)
// Just need to put an underscore here, bit annoying.
void _MSR (bool nzcvq, bool g, Operand2 op2);
@ -542,6 +545,7 @@ public:
// Wrapper around MOVT/MOVW with fallbacks.
void MOVI2R(ARMReg reg, u32 val, bool optimize = true);
void MOVI2F(ARMReg dest, float val, ARMReg tempReg);
}; // class ARMXEmitter

View File

@ -25,6 +25,10 @@
#include <stdio.h>
#include <string.h>
#ifdef _MSC_VER
#pragma warning (disable:4100)
#endif
#if defined(ARM)
#define _M_ARM32
#endif
@ -77,7 +81,9 @@ private:
#error needs at least version 1000 of MSC
#endif
#ifndef NOMINMAX
#define NOMINMAX
#endif
// Memory leak checks
#define CHECK_HEAP_INTEGRITY()

View File

@ -16,6 +16,7 @@
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include "ArmJit.h"
#include "Common/CPUDetect.h"
using namespace MIPSAnalyst;
#define _RS ((op>>21) & 0x1F)
@ -312,10 +313,15 @@ namespace MIPSComp
void Jit::Comp_Special3(u32 op)
{
CONDITIONAL_DISABLE;
if (!cpu_info.bArmV7) {
DISABLE;
}
int rs = _RS;
int rt = _RT;
int pos = _POS;
int pos = _POS;
int size = _SIZE + 1;
u32 mask = 0xFFFFFFFFUL >> (32 - size);
@ -331,12 +337,23 @@ namespace MIPSComp
gpr.SetImm(rt, (gpr.GetImm(rs) >> pos) & mask);
return;
}
DISABLE;
// TODO. There's an NEON ARM instruction for this, VEXT - worth using?
// TODO: Make fallback for when UBFX isn't available
gpr.MapDirtyIn(rt, rs, false);
UBFX(gpr.R(rt), gpr.R(rs), pos, size);
break;
case 0x4: //ins
DISABLE;
if (gpr.IsImm(rs) && gpr.IsImm(rt))
{
u32 sourcemask = mask >> pos;
u32 destmask = ~(sourcemask << pos);
u32 inserted = (gpr.GetImm(rs) & sourcemask) << pos;
gpr.SetImm(rt, (gpr.GetImm(rt) & destmask) | inserted);
return;
}
gpr.MapDirtyIn(rt, rs, false);
// TODO: Make fallback for when BFI isn't available
BFI(gpr.R(rt), gpr.R(rs), pos, size);
break;
}
}

View File

@ -126,8 +126,8 @@ namespace MIPSComp
VMOV(fpr.V(vregs[i]), fpr.V(origV[regnum]));
}
} else {
MOVI2R(R0, (u32)(constantArray[regnum + (abs<<2)]));
VMOV(fpr.V(vregs[i]), R0);
// TODO: There is VMOV s, imm on ARM, that can generate some of these constants. Not 1/3 or 1/6 though.
MOVI2F(fpr.V(vregs[i]), constantArray[regnum + (abs<<2)], R0);
}
// TODO: This can be integrated into the VABS / VMOV above, and also the constants.
@ -147,8 +147,7 @@ namespace MIPSComp
return;
int n = GetNumVectorElements(sz);
for (int i = 0; i < n; i++)
{
for (int i = 0; i < n; i++) {
// Hopefully this is rare, we'll just write it into a reg we drop.
if (js.VfpuWriteMask(i))
regs[i] = fpr.GetTempV();
@ -160,26 +159,45 @@ namespace MIPSComp
if (!js.prefixD) return;
int n = GetNumVectorElements(sz);
for (int i = 0; i < n; i++)
{
for (int i = 0; i < n; i++) {
if (js.VfpuWriteMask(i))
continue;
int sat = (js.prefixD >> (i * 2)) & 3;
if (sat == 1)
{
if (sat == 1) {
fpr.MapRegV(vregs[i], MAP_DIRTY);
// ARGH this is a pain - no MIN/MAX in non-NEON VFP!
// TODO
//MAXSS(fpr.VX(vregs[i]), M((void *)&zero));
//MINSS(fpr.VX(vregs[i]), M((void *)&one));
}
else if (sat == 3)
{
// NEON does have min/max though so this should only be a fallback.
MOVI2F(S0, 0.0, R0);
MOVI2F(S1, 1.0, R0);
VCMP(fpr.V(vregs[i]), S1);
VMRS_APSR();
SetCC(CC_GE);
VMOV(fpr.V(vregs[i]), S1);
FixupBranch skip = B();
SetCC(CC_AL);
VCMP(fpr.V(vregs[i]), S0);
VMRS_APSR();
SetCC(CC_LE);
VMOV(fpr.V(vregs[i]), S0);
SetCC(CC_AL);
SetJumpTarget(skip);
} else if (sat == 3) {
fpr.MapRegV(vregs[i], MAP_DIRTY);
//MAXSS(fpr.VX(vregs[i]), M((void *)&minus_one));
//MINSS(fpr.VX(vregs[i]), M((void *)&one));
MOVI2F(S0, -1.0, R0);
MOVI2F(S1, 1.0, R0);
VCMP(fpr.V(vregs[i]), S1);
VMRS_APSR();
SetCC(CC_GE);
VMOV(fpr.V(vregs[i]), S1);
FixupBranch skip = B();
SetCC(CC_AL);
VCMP(fpr.V(vregs[i]), S0);
VMRS_APSR();
SetCC(CC_LE);
VMOV(fpr.V(vregs[i]), S0);
SetCC(CC_AL);
SetJumpTarget(skip);
}
}
}
@ -299,8 +317,7 @@ namespace MIPSComp
VMUL(S0, fpr.V(sregs[0]), fpr.V(tregs[0]));
int n = GetNumVectorElements(sz);
for (int i = 1; i < n; i++)
{
for (int i = 1; i < n; i++) {
// sum += s[i]*t[i];
VMUL(S1, fpr.V(sregs[i]), fpr.V(tregs[i]));
VADD(S0, S0, S1);
@ -506,7 +523,6 @@ namespace MIPSComp
void Jit::Comp_Mftv(u32 op)
{
// DISABLE;
CONDITIONAL_DISABLE;
int imm = op & 0xFF;

View File

@ -235,8 +235,7 @@ private:
}
void GetVectorRegsPrefixD(u8 *regs, VectorSize sz, int vectorReg);
/*
/*
void CompImmLogic(u32 op, void (ARMXEmitter::*arith)(int, const OpArg &, const OpArg &));
void CompTriArith(u32 op, void (ARMXEmitter::*arith)(int, const OpArg &, const OpArg &));
void CompShiftImm(u32 op, void (ARMXEmitter::*shift)(int, OpArg, OpArg));

View File

@ -1605,11 +1605,11 @@ namespace MIPSInt
// negative NAN seems different? TODO
switch ((op >> 23) & 3) {
case 2: // vmin
for (int i = 0; i < GetNumVectorElements(sz); i++)
for (int i = 0; i < numElements; i++)
d[i] = isnan(t[i]) ? s[i] : (isnan(s[i]) ? t[i] : std::min(s[i], t[i]));
break;
case 3: // vmax
for (int i = 0; i < GetNumVectorElements(sz); i++)
for (int i = 0; i < numElements; i++)
d[i] = isnan(t[i]) ? t[i] : (isnan(s[i]) ? s[i] : std::max(s[i], t[i]));
break;
default: