x86 Jit: Basic implementation of vbfy1/2 (mostly to just cross another one off the list..)

This commit is contained in:
Henrik Rydgard 2014-12-03 23:18:53 +01:00
parent 7711315804
commit e3a81f4346
11 changed files with 94 additions and 7 deletions

View File

@ -2115,4 +2115,8 @@ namespace MIPSComp
DISABLE;
}
void Jit::Comp_Vbfy(MIPSOpcode op) {
DISABLE;
}
}

View File

@ -181,5 +181,9 @@ void Jit::CompNEON_ColorConv(MIPSOpcode op) {
DISABLE;
}
void Jit::CompNEON_Vbfy(MIPSOpcode op) {
DISABLE;
}
}
// namespace MIPSComp

View File

@ -139,6 +139,7 @@ public:
void Comp_Vsgn(MIPSOpcode op);
void Comp_Vocp(MIPSOpcode op);
void Comp_ColorConv(MIPSOpcode op);
void Comp_Vbfy(MIPSOpcode op);
// Non-NEON: VPFX
@ -178,6 +179,7 @@ public:
void CompNEON_Vsgn(MIPSOpcode op);
void CompNEON_Vocp(MIPSOpcode op);
void CompNEON_ColorConv(MIPSOpcode op);
void CompNEON_Vbfy(MIPSOpcode op);
int Replace_fabsf();

View File

@ -1044,10 +1044,11 @@ namespace MIPSInt
}
else
{
for (int i = 0; i < n; i+=2)
{
d[i] = s[i] + s[i+1];
d[i+1] = s[i] - s[i+1];
d[0] = s[0] + s[1];
d[1] = s[0] - s[1];
if (n == 4) {
d[2] = s[2] + s[3];
d[3] = s[2] - s[3];
}
}
ApplyPrefixD(d, sz);

View File

@ -738,8 +738,8 @@ const MIPSInstruction tableVFPU9[32] = // 110100 00010 xxxxx . ....... . .......
INSTR("vsrt1", &Jit::Comp_Generic, Dis_Vbfy, Int_Vsrt1, IN_OTHER|OUT_OTHER|IS_VFPU),
// TODO: Flags may not be correct (prefixes, etc.)
INSTR("vsrt2", &Jit::Comp_Generic, Dis_Vbfy, Int_Vsrt2, IN_OTHER|OUT_OTHER|IS_VFPU),
INSTR("vbfy1", &Jit::Comp_Generic, Dis_Vbfy, Int_Vbfy, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
INSTR("vbfy2", &Jit::Comp_Generic, Dis_Vbfy, Int_Vbfy, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
INSTR("vbfy1", &Jit::Comp_Vbfy, Dis_Vbfy, Int_Vbfy, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
INSTR("vbfy2", &Jit::Comp_Vbfy, Dis_Vbfy, Int_Vbfy, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),
//4
INSTR("vocp", &Jit::Comp_Vocp, Dis_Vbfy, Int_Vocp, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX), // one's complement
INSTR("vsocp", &Jit::Comp_Generic, Dis_Vbfy, Int_Vsocp, IN_OTHER|OUT_OTHER|IS_VFPU|OUT_EAT_PREFIX),

View File

@ -1212,5 +1212,8 @@ namespace MIPSComp
void Jit::Comp_ColorConv(MIPSOpcode op) {
DISABLE;
}
void Jit::Comp_Vbfy(MIPSOpcode op) {
DISABLE;
}
}

View File

@ -241,6 +241,8 @@ namespace MIPSComp
void Comp_VCrossQuat(MIPSOpcode op);
void Comp_Vsgn(MIPSOpcode op);
void Comp_Vocp(MIPSOpcode op);
void Comp_ColorConv(MIPSOpcode op);
void Comp_Vbfy(MIPSOpcode op);
int Replace_fabsf();

View File

@ -1937,6 +1937,74 @@ void Jit::Comp_Vocp(MIPSOpcode op) {
fpr.ReleaseSpillLocks();
}
void Jit::Comp_Vbfy(MIPSOpcode op) {
CONDITIONAL_DISABLE;
if (js.HasUnknownPrefix())
DISABLE;
VectorSize sz = GetVecSize(op);
int n = GetNumVectorElements(sz);
if (n != 2 && n != 4) {
DISABLE;
}
u8 sregs[4], dregs[4];
GetVectorRegsPrefixS(sregs, sz, _VS);
GetVectorRegsPrefixD(dregs, sz, _VD);
// Flush SIMD.
fpr.SimpleRegsV(sregs, sz, 0);
fpr.SimpleRegsV(dregs, sz, MAP_NOINIT | MAP_DIRTY);
X64Reg tempxregs[4];
for (int i = 0; i < n; ++i) {
if (!IsOverlapSafe(dregs[i], i, n, sregs)) {
int reg = fpr.GetTempV();
fpr.MapRegV(reg, MAP_NOINIT | MAP_DIRTY);
fpr.SpillLockV(reg);
tempxregs[i] = fpr.VX(reg);
} else {
fpr.MapRegV(dregs[i], dregs[i] == sregs[i] ? MAP_DIRTY : MAP_NOINIT);
fpr.SpillLockV(dregs[i]);
tempxregs[i] = fpr.VX(dregs[i]);
}
}
int subop = (op >> 16) & 0x1F;
if (subop == 3) {
// vbfy2
MOVSS(tempxregs[0], fpr.V(sregs[0]));
MOVSS(tempxregs[1], fpr.V(sregs[1]));
MOVSS(tempxregs[2], fpr.V(sregs[0]));
MOVSS(tempxregs[3], fpr.V(sregs[1]));
ADDSS(tempxregs[0], fpr.V(sregs[2]));
ADDSS(tempxregs[1], fpr.V(sregs[3]));
SUBSS(tempxregs[2], fpr.V(sregs[2]));
SUBSS(tempxregs[3], fpr.V(sregs[3]));
} else if (subop == 2) {
// vbfy1
MOVSS(tempxregs[0], fpr.V(sregs[0]));
MOVSS(tempxregs[1], fpr.V(sregs[0]));
ADDSS(tempxregs[0], fpr.V(sregs[1]));
SUBSS(tempxregs[1], fpr.V(sregs[1]));
if (n == 4) {
MOVSS(tempxregs[2], fpr.V(sregs[2]));
MOVSS(tempxregs[3], fpr.V(sregs[2]));
ADDSS(tempxregs[2], fpr.V(sregs[3]));
SUBSS(tempxregs[3], fpr.V(sregs[3]));
}
} else {
DISABLE;
}
for (int i = 0; i < n; ++i) {
if (!fpr.V(dregs[i]).IsSimpleReg(tempxregs[i]))
MOVSS(fpr.V(dregs[i]), tempxregs[i]);
}
ApplyPrefixD(dregs, sz);
fpr.ReleaseSpillLocks();
}
static float sincostemp[2];
union u32float {

View File

@ -120,7 +120,7 @@ JitOptions::JitOptions()
continueBranches = false;
continueJumps = false;
continueMaxInstructions = 300;
enableVFPUSIMD = false;
enableVFPUSIMD = true;
}
#ifdef _MSC_VER

View File

@ -146,6 +146,7 @@ public:
void Comp_Vsgn(MIPSOpcode op);
void Comp_Vocp(MIPSOpcode op);
void Comp_ColorConv(MIPSOpcode op);
void Comp_Vbfy(MIPSOpcode op);
void Comp_DoNothing(MIPSOpcode op);

View File

@ -477,6 +477,8 @@ bool FPURegCache::TryMapDirtyInInVS(const u8 *vd, VectorSize vdsz, const u8 *vs,
if (!CanMapVS(vd, vdsz) || !CanMapVS(vs, vssz) || !CanMapVS(vt, vtsz)) {
return false;
}
// But, they could still fail based on overlap. Hopefully not common...
bool success = TryMapRegsVS(vs, vssz, 0);
if (success) {