JIT: Implement vf2i (truncate mode only)

This commit is contained in:
Henrik Rydgard 2013-08-06 19:08:06 +02:00
parent 993d3b01de
commit 201282f28c
3 changed files with 154 additions and 14 deletions

View File

@ -846,6 +846,73 @@ namespace MIPSComp
fpr.ReleaseSpillLocksAndDiscardTemps(); fpr.ReleaseSpillLocksAndDiscardTemps();
} }
void Jit::Comp_Vf2i(u32 op) {
CONDITIONAL_DISABLE;
if (js.HasUnknownPrefix())
DISABLE;
VectorSize sz = GetVecSize(op);
int n = GetNumVectorElements(sz);
int imm = (op >> 16) & 0x1f;
float mult = (float)(1ULL << imm);
switch ((op >> 21) & 0x1f)
{
case 17:
break; //z - truncate. Easy to support.
case 16:
case 18:
case 19:
DISABLE;
break;
}
u8 sregs[4], dregs[4];
GetVectorRegsPrefixS(sregs, sz, _VS);
GetVectorRegsPrefixD(dregs, sz, _VD);
MIPSReg tempregs[4];
for (int i = 0; i < n; ++i) {
if (!IsOverlapSafe(dregs[i], i, n, sregs)) {
tempregs[i] = fpr.GetTempV();
} else {
tempregs[i] = dregs[i];
}
}
if (mult != 1.0f)
MOVI2F(S1, mult, R0);
for (int i = 0; i < n; i++) {
fpr.MapDirtyInV(tempregs[i], sregs[i]);
switch ((op >> 21) & 0x1f) {
case 16: /* TODO */ break; //n (round_vfpu_n causes issue #3011 but seems right according to tests...)
case 17:
if (mult != 1.0f) {
VMUL(S0, fpr.V(sregs[i]), S1);
VCVT(fpr.V(tempregs[i]), S0, TO_INT | ROUND_TO_ZERO);
} else {
VCVT(fpr.V(tempregs[i]), fpr.V(sregs[i]), TO_INT | ROUND_TO_ZERO);
}
break;
case 18: /* TODO */ break; //u
case 19: /* TODO */ break; //d
}
}
for (int i = 0; i < n; ++i) {
if (dregs[i] != tempregs[i]) {
fpr.MapDirtyInV(dregs[i], tempregs[i]);
VMOV(fpr.V(dregs[i]), fpr.V(tempregs[i]));
}
}
ApplyPrefixD(dregs, sz);
fpr.ReleaseSpillLocksAndDiscardTemps();
}
void Jit::Comp_Mftv(u32 op) void Jit::Comp_Mftv(u32 op)
{ {
CONDITIONAL_DISABLE; CONDITIONAL_DISABLE;
@ -1140,10 +1207,6 @@ namespace MIPSComp
DISABLE; DISABLE;
} }
void Jit::Comp_Vf2i(u32 op) {
DISABLE;
}
void Jit::Comp_VCrossQuat(u32 op) { void Jit::Comp_VCrossQuat(u32 op) {
// This op does not support prefixes. // This op does not support prefixes.
if (js.HasUnknownPrefix() || disablePrefixes) if (js.HasUnknownPrefix() || disablePrefixes)
@ -1192,8 +1255,6 @@ namespace MIPSComp
void Jit::Comp_Vsge(u32 op) { void Jit::Comp_Vsge(u32 op) {
DISABLE; DISABLE;
} }
void Jit::Comp_Vslt(u32 op) { void Jit::Comp_Vslt(u32 op) {
@ -1337,7 +1398,7 @@ namespace MIPSComp
void Jit::Comp_Vcmov(u32 op) { void Jit::Comp_Vcmov(u32 op) {
CONDITIONAL_DISABLE; CONDITIONAL_DISABLE;
if (js.HasUnknownPrefix()) if (js.HasUnknownPrefix() || disablePrefixes)
DISABLE; DISABLE;
VectorSize sz = GetVecSize(op); VectorSize sz = GetVecSize(op);

View File

@ -45,7 +45,7 @@ void ArmRegCacheFPU::Start(MIPSAnalyst::AnalysisResults &stats) {
} }
static const ARMReg *GetMIPSAllocationOrder(int &count) { static const ARMReg *GetMIPSAllocationOrder(int &count) {
// We conservatively reserve both S0-S2 as scratch for now. // We reserve S0-S1 as scratch. Can afford two registers. Maybe even four, which could simplify some things.
static const ARMReg allocationOrder[] = { static const ARMReg allocationOrder[] = {
S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15 S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15
}; };
@ -54,6 +54,8 @@ static const ARMReg *GetMIPSAllocationOrder(int &count) {
S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15, S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15,
S16, S17, S18, S19, S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, S30, S31 S16, S17, S18, S19, S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, S30, S31
}; };
// Disabled the NEON path due to issues so we limit ourselves to 16.
if (false && cpu_info.bNEON) { if (false && cpu_info.bNEON) {
count = sizeof(allocationOrderNEON) / sizeof(const int); count = sizeof(allocationOrderNEON) / sizeof(const int);
return allocationOrderNEON; return allocationOrderNEON;

View File

@ -941,6 +941,15 @@ void Jit::Comp_Vi2f(u32 op) {
GetVectorRegsPrefixS(sregs, sz, _VS); GetVectorRegsPrefixS(sregs, sz, _VS);
GetVectorRegsPrefixD(dregs, sz, _VD); GetVectorRegsPrefixD(dregs, sz, _VD);
int tempregs[4];
for (int i = 0; i < n; ++i) {
if (!IsOverlapSafe(dregs[i], i, n, sregs)) {
tempregs[i] = fpr.GetTempV();
} else {
tempregs[i] = dregs[i];
}
}
if (*mult != 1.0f) if (*mult != 1.0f)
MOVSS(XMM1, M((void *)mult)); MOVSS(XMM1, M((void *)mult));
for (int i = 0; i < n; i++) { for (int i = 0; i < n; i++) {
@ -951,8 +960,80 @@ void Jit::Comp_Vi2f(u32 op) {
CVTSI2SS(XMM0, R(EAX)); CVTSI2SS(XMM0, R(EAX));
if (*mult != 1.0f) if (*mult != 1.0f)
MULSS(XMM0, R(XMM1)); MULSS(XMM0, R(XMM1));
fpr.MapRegV(dregs[i], MAP_DIRTY); fpr.MapRegV(tempregs[i], MAP_DIRTY);
MOVSS(fpr.V(dregs[i]), XMM0); MOVSS(fpr.V(tempregs[i]), XMM0);
}
for (int i = 0; i < n; ++i) {
if (dregs[i] != tempregs[i]) {
fpr.MapRegV(dregs[i], MAP_DIRTY | MAP_NOINIT);
MOVSS(fpr.VX(dregs[i]), fpr.V(tempregs[i]));
}
}
ApplyPrefixD(dregs, sz);
fpr.ReleaseSpillLocks();
}
extern const float mulTableVf2i[32] = {
(float)(1UL<<0),(float)(1UL<<1),(float)(1UL<<2),(float)(1UL<<3),
(float)(1UL<<4),(float)(1UL<<5),(float)(1UL<<6),(float)(1UL<<7),
(float)(1UL<<8),(float)(1UL<<9),(float)(1UL<<10),(float)(1UL<<11),
(float)(1UL<<12),(float)(1UL<<13),(float)(1UL<<14),(float)(1UL<<15),
(float)(1UL<<16),(float)(1UL<<17),(float)(1UL<<18),(float)(1UL<<19),
(float)(1UL<<20),(float)(1UL<<21),(float)(1UL<<22),(float)(1UL<<23),
(float)(1UL<<24),(float)(1UL<<25),(float)(1UL<<26),(float)(1UL<<27),
(float)(1UL<<28),(float)(1UL<<29),(float)(1UL<<30),(float)(1UL<<31),
};
static const float half = 0.5f;
void Jit::Comp_Vf2i(u32 op) {
CONDITIONAL_DISABLE;
if (js.HasUnknownPrefix())
DISABLE;
VectorSize sz = GetVecSize(op);
int n = GetNumVectorElements(sz);
int imm = (op >> 16) & 0x1f;
const float *mult = &mulTableVf2i[imm];
switch ((op >> 21) & 0x1f)
{
case 17:
break; //z - truncate. Easy to support.
case 16:
case 18:
case 19:
DISABLE;
break;
}
u8 sregs[4], dregs[4];
GetVectorRegsPrefixS(sregs, sz, _VS);
GetVectorRegsPrefixD(dregs, sz, _VD);
if (*mult != 1.0f)
MOVSS(XMM1, M((void *)mult));
for (int i = 0; i < n; i++) {
OpArg reg = fpr.V(sregs[i]);
if (*mult != 1.0f) {
MOVSS(XMM0, fpr.V(sregs[i]));
if (*mult != 1.0f)
MULSS(XMM0, R(XMM1));
reg = R(XMM0);
}
switch ((op >> 21) & 0x1f) {
case 16: /* TODO */ break; //n (round_vfpu_n causes issue #3011 but seems right according to tests...)
case 17: CVTTSS2SI(EAX, reg); break; //z - truncate
case 18: /* TODO */ break; //u
case 19: /* TODO */ break; //d
}
fpr.StoreFromRegisterV(dregs[i]);
MOV(32, fpr.V(dregs[i]), R(EAX));
} }
ApplyPrefixD(dregs, sz); ApplyPrefixD(dregs, sz);
@ -1499,10 +1580,6 @@ void Jit::Comp_Vx2i(u32 op) {
DISABLE; DISABLE;
} }
void Jit::Comp_Vf2i(u32 op) {
DISABLE;
}
void Jit::Comp_Vhoriz(u32 op) { void Jit::Comp_Vhoriz(u32 op) {
DISABLE; DISABLE;
} }