mirror of
https://github.com/libretro/ppsspp.git
synced 2024-12-03 22:51:05 +00:00
JIT: Implement vf2i (truncate mode only)
This commit is contained in:
parent
993d3b01de
commit
201282f28c
@ -846,6 +846,73 @@ namespace MIPSComp
|
||||
fpr.ReleaseSpillLocksAndDiscardTemps();
|
||||
}
|
||||
|
||||
void Jit::Comp_Vf2i(u32 op) {
|
||||
CONDITIONAL_DISABLE;
|
||||
|
||||
if (js.HasUnknownPrefix())
|
||||
DISABLE;
|
||||
|
||||
VectorSize sz = GetVecSize(op);
|
||||
int n = GetNumVectorElements(sz);
|
||||
|
||||
int imm = (op >> 16) & 0x1f;
|
||||
float mult = (float)(1ULL << imm);
|
||||
|
||||
switch ((op >> 21) & 0x1f)
|
||||
{
|
||||
case 17:
|
||||
break; //z - truncate. Easy to support.
|
||||
case 16:
|
||||
case 18:
|
||||
case 19:
|
||||
DISABLE;
|
||||
break;
|
||||
}
|
||||
|
||||
u8 sregs[4], dregs[4];
|
||||
GetVectorRegsPrefixS(sregs, sz, _VS);
|
||||
GetVectorRegsPrefixD(dregs, sz, _VD);
|
||||
|
||||
MIPSReg tempregs[4];
|
||||
for (int i = 0; i < n; ++i) {
|
||||
if (!IsOverlapSafe(dregs[i], i, n, sregs)) {
|
||||
tempregs[i] = fpr.GetTempV();
|
||||
} else {
|
||||
tempregs[i] = dregs[i];
|
||||
}
|
||||
}
|
||||
|
||||
if (mult != 1.0f)
|
||||
MOVI2F(S1, mult, R0);
|
||||
|
||||
for (int i = 0; i < n; i++) {
|
||||
fpr.MapDirtyInV(tempregs[i], sregs[i]);
|
||||
switch ((op >> 21) & 0x1f) {
|
||||
case 16: /* TODO */ break; //n (round_vfpu_n causes issue #3011 but seems right according to tests...)
|
||||
case 17:
|
||||
if (mult != 1.0f) {
|
||||
VMUL(S0, fpr.V(sregs[i]), S1);
|
||||
VCVT(fpr.V(tempregs[i]), S0, TO_INT | ROUND_TO_ZERO);
|
||||
} else {
|
||||
VCVT(fpr.V(tempregs[i]), fpr.V(sregs[i]), TO_INT | ROUND_TO_ZERO);
|
||||
}
|
||||
break;
|
||||
case 18: /* TODO */ break; //u
|
||||
case 19: /* TODO */ break; //d
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < n; ++i) {
|
||||
if (dregs[i] != tempregs[i]) {
|
||||
fpr.MapDirtyInV(dregs[i], tempregs[i]);
|
||||
VMOV(fpr.V(dregs[i]), fpr.V(tempregs[i]));
|
||||
}
|
||||
}
|
||||
|
||||
ApplyPrefixD(dregs, sz);
|
||||
fpr.ReleaseSpillLocksAndDiscardTemps();
|
||||
}
|
||||
|
||||
void Jit::Comp_Mftv(u32 op)
|
||||
{
|
||||
CONDITIONAL_DISABLE;
|
||||
@ -1140,10 +1207,6 @@ namespace MIPSComp
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
void Jit::Comp_Vf2i(u32 op) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
void Jit::Comp_VCrossQuat(u32 op) {
|
||||
// This op does not support prefixes.
|
||||
if (js.HasUnknownPrefix() || disablePrefixes)
|
||||
@ -1192,8 +1255,6 @@ namespace MIPSComp
|
||||
|
||||
void Jit::Comp_Vsge(u32 op) {
|
||||
DISABLE;
|
||||
|
||||
|
||||
}
|
||||
|
||||
void Jit::Comp_Vslt(u32 op) {
|
||||
@ -1337,7 +1398,7 @@ namespace MIPSComp
|
||||
void Jit::Comp_Vcmov(u32 op) {
|
||||
CONDITIONAL_DISABLE;
|
||||
|
||||
if (js.HasUnknownPrefix())
|
||||
if (js.HasUnknownPrefix() || disablePrefixes)
|
||||
DISABLE;
|
||||
|
||||
VectorSize sz = GetVecSize(op);
|
||||
|
@ -45,7 +45,7 @@ void ArmRegCacheFPU::Start(MIPSAnalyst::AnalysisResults &stats) {
|
||||
}
|
||||
|
||||
static const ARMReg *GetMIPSAllocationOrder(int &count) {
|
||||
// We conservatively reserve both S0-S2 as scratch for now.
|
||||
// We reserve S0-S1 as scratch. Can afford two registers. Maybe even four, which could simplify some things.
|
||||
static const ARMReg allocationOrder[] = {
|
||||
S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15
|
||||
};
|
||||
@ -54,6 +54,8 @@ static const ARMReg *GetMIPSAllocationOrder(int &count) {
|
||||
S2, S3, S4, S5, S6, S7, S8, S9, S10, S11, S12, S13, S14, S15,
|
||||
S16, S17, S18, S19, S20, S21, S22, S23, S24, S25, S26, S27, S28, S29, S30, S31
|
||||
};
|
||||
|
||||
// Disabled the NEON path due to issues so we limit ourselves to 16.
|
||||
if (false && cpu_info.bNEON) {
|
||||
count = sizeof(allocationOrderNEON) / sizeof(const int);
|
||||
return allocationOrderNEON;
|
||||
|
@ -941,6 +941,15 @@ void Jit::Comp_Vi2f(u32 op) {
|
||||
GetVectorRegsPrefixS(sregs, sz, _VS);
|
||||
GetVectorRegsPrefixD(dregs, sz, _VD);
|
||||
|
||||
int tempregs[4];
|
||||
for (int i = 0; i < n; ++i) {
|
||||
if (!IsOverlapSafe(dregs[i], i, n, sregs)) {
|
||||
tempregs[i] = fpr.GetTempV();
|
||||
} else {
|
||||
tempregs[i] = dregs[i];
|
||||
}
|
||||
}
|
||||
|
||||
if (*mult != 1.0f)
|
||||
MOVSS(XMM1, M((void *)mult));
|
||||
for (int i = 0; i < n; i++) {
|
||||
@ -951,8 +960,80 @@ void Jit::Comp_Vi2f(u32 op) {
|
||||
CVTSI2SS(XMM0, R(EAX));
|
||||
if (*mult != 1.0f)
|
||||
MULSS(XMM0, R(XMM1));
|
||||
fpr.MapRegV(dregs[i], MAP_DIRTY);
|
||||
MOVSS(fpr.V(dregs[i]), XMM0);
|
||||
fpr.MapRegV(tempregs[i], MAP_DIRTY);
|
||||
MOVSS(fpr.V(tempregs[i]), XMM0);
|
||||
}
|
||||
|
||||
for (int i = 0; i < n; ++i) {
|
||||
if (dregs[i] != tempregs[i]) {
|
||||
fpr.MapRegV(dregs[i], MAP_DIRTY | MAP_NOINIT);
|
||||
MOVSS(fpr.VX(dregs[i]), fpr.V(tempregs[i]));
|
||||
}
|
||||
}
|
||||
|
||||
ApplyPrefixD(dregs, sz);
|
||||
fpr.ReleaseSpillLocks();
|
||||
}
|
||||
|
||||
extern const float mulTableVf2i[32] = {
|
||||
(float)(1UL<<0),(float)(1UL<<1),(float)(1UL<<2),(float)(1UL<<3),
|
||||
(float)(1UL<<4),(float)(1UL<<5),(float)(1UL<<6),(float)(1UL<<7),
|
||||
(float)(1UL<<8),(float)(1UL<<9),(float)(1UL<<10),(float)(1UL<<11),
|
||||
(float)(1UL<<12),(float)(1UL<<13),(float)(1UL<<14),(float)(1UL<<15),
|
||||
(float)(1UL<<16),(float)(1UL<<17),(float)(1UL<<18),(float)(1UL<<19),
|
||||
(float)(1UL<<20),(float)(1UL<<21),(float)(1UL<<22),(float)(1UL<<23),
|
||||
(float)(1UL<<24),(float)(1UL<<25),(float)(1UL<<26),(float)(1UL<<27),
|
||||
(float)(1UL<<28),(float)(1UL<<29),(float)(1UL<<30),(float)(1UL<<31),
|
||||
};
|
||||
|
||||
static const float half = 0.5f;
|
||||
|
||||
void Jit::Comp_Vf2i(u32 op) {
|
||||
CONDITIONAL_DISABLE;
|
||||
|
||||
if (js.HasUnknownPrefix())
|
||||
DISABLE;
|
||||
|
||||
VectorSize sz = GetVecSize(op);
|
||||
int n = GetNumVectorElements(sz);
|
||||
|
||||
int imm = (op >> 16) & 0x1f;
|
||||
const float *mult = &mulTableVf2i[imm];
|
||||
|
||||
switch ((op >> 21) & 0x1f)
|
||||
{
|
||||
case 17:
|
||||
break; //z - truncate. Easy to support.
|
||||
case 16:
|
||||
case 18:
|
||||
case 19:
|
||||
DISABLE;
|
||||
break;
|
||||
}
|
||||
|
||||
u8 sregs[4], dregs[4];
|
||||
GetVectorRegsPrefixS(sregs, sz, _VS);
|
||||
GetVectorRegsPrefixD(dregs, sz, _VD);
|
||||
|
||||
if (*mult != 1.0f)
|
||||
MOVSS(XMM1, M((void *)mult));
|
||||
|
||||
for (int i = 0; i < n; i++) {
|
||||
OpArg reg = fpr.V(sregs[i]);
|
||||
if (*mult != 1.0f) {
|
||||
MOVSS(XMM0, fpr.V(sregs[i]));
|
||||
if (*mult != 1.0f)
|
||||
MULSS(XMM0, R(XMM1));
|
||||
reg = R(XMM0);
|
||||
}
|
||||
switch ((op >> 21) & 0x1f) {
|
||||
case 16: /* TODO */ break; //n (round_vfpu_n causes issue #3011 but seems right according to tests...)
|
||||
case 17: CVTTSS2SI(EAX, reg); break; //z - truncate
|
||||
case 18: /* TODO */ break; //u
|
||||
case 19: /* TODO */ break; //d
|
||||
}
|
||||
fpr.StoreFromRegisterV(dregs[i]);
|
||||
MOV(32, fpr.V(dregs[i]), R(EAX));
|
||||
}
|
||||
|
||||
ApplyPrefixD(dregs, sz);
|
||||
@ -1499,10 +1580,6 @@ void Jit::Comp_Vx2i(u32 op) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
void Jit::Comp_Vf2i(u32 op) {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
void Jit::Comp_Vhoriz(u32 op) {
|
||||
DISABLE;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user