mirror of
https://github.com/libretro/ppsspp.git
synced 2025-02-25 11:20:55 +00:00
Fix vf2i properly on x86.
This commit is contained in:
parent
6e5b4ca082
commit
8714240519
@ -1298,6 +1298,7 @@ void XEmitter::CVTSI2SS(X64Reg xregdest, OpArg arg) {WriteSSEOp(32, 0x2A, false,
|
||||
void XEmitter::CVTSS2SI(X64Reg xregdest, OpArg arg) {WriteSSEOp(32, 0x2D, false, xregdest, arg);}
|
||||
void XEmitter::CVTTSS2SI(X64Reg xregdest, OpArg arg) {WriteSSEOp(32, 0x2C, false, xregdest, arg);}
|
||||
void XEmitter::CVTTPS2DQ(X64Reg xregdest, OpArg arg) {WriteSSEOp(32, 0x5B, false, xregdest, arg);}
|
||||
void XEmitter::CVTTSD2SI(X64Reg xregdest, OpArg arg) {WriteSSEOp(64, 0x2C, false, xregdest, arg);}
|
||||
|
||||
void XEmitter::MASKMOVDQU(X64Reg dest, X64Reg src) {WriteSSEOp(64, sseMASKMOVDQU, true, dest, R(src));}
|
||||
|
||||
|
@ -581,6 +581,7 @@ public:
|
||||
void CVTSI2SS(X64Reg xregdest, OpArg arg); // Yeah, destination really is a GPR like EAX!
|
||||
void CVTSS2SI(X64Reg xregdest, OpArg arg); // Yeah, destination really is a GPR like EAX!
|
||||
void CVTTSS2SI(X64Reg xregdest, OpArg arg); // Yeah, destination really is a GPR like EAX!
|
||||
void CVTTSD2SI(X64Reg xregdest, OpArg arg); // Yeah, destination really is a GPR like EAX!
|
||||
void CVTTPS2DQ(X64Reg regOp, OpArg arg);
|
||||
|
||||
// SSE2: Packed integer instructions
|
||||
|
@ -600,13 +600,12 @@ namespace MIPSInt
|
||||
EatPrefixes();
|
||||
}
|
||||
|
||||
inline int round_vfpu_n(float param) {
|
||||
inline int round_vfpu_n(double param) {
|
||||
// return floorf(param);
|
||||
return (int)round_ieee_754(param);
|
||||
}
|
||||
|
||||
void Int_Vf2i(u32 op)
|
||||
{
|
||||
void Int_Vf2i(u32 op) {
|
||||
float s[4];
|
||||
int d[4];
|
||||
int vd = _VD;
|
||||
@ -616,25 +615,28 @@ namespace MIPSInt
|
||||
VectorSize sz = GetVecSize(op);
|
||||
ReadVector(s, sz, vs);
|
||||
ApplySwizzleS(s, sz); //TODO: and the mask to kill everything but swizzle
|
||||
for (int i = 0; i < GetNumVectorElements(sz); i++)
|
||||
{
|
||||
for (int i = 0; i < GetNumVectorElements(sz); i++) {
|
||||
if (my_isnan(s[i])) {
|
||||
d[i] = 0x7FFFFFFF;
|
||||
continue;
|
||||
}
|
||||
double sv = s[i] * mult; // (float)0x7fffffff == (float)0x80000000
|
||||
// Cap/floor it to 0x7fffffff / 0x80000000
|
||||
if (sv > 0x7fffffff) sv = 0x7fffffff;
|
||||
if (sv < (int)0x80000000) sv = (int)0x80000000;
|
||||
if (sv > (double)0x7fffffff) {
|
||||
d[i] = 0x7fffffff;
|
||||
} else if (sv <= (double)(int)0x80000000) {
|
||||
d[i] = 0x80000000;
|
||||
} else {
|
||||
switch ((op >> 21) & 0x1f)
|
||||
{
|
||||
case 16: d[i] = (int)(floor(sv + 0.5f)); break; //n (round_vfpu_n causes issue #3011 but seems right according to tests...)
|
||||
case 16: d[i] = (int)round_vfpu_n(sv); break; //(floor(sv + 0.5f)); break; //n (round_vfpu_n causes issue #3011 but seems right according to tests...)
|
||||
case 17: d[i] = s[i]>=0 ? (int)floor(sv) : (int)ceil(sv); break; //z
|
||||
case 18: d[i] = (int)ceil(sv); break; //u
|
||||
case 19: d[i] = (int)floor(sv); break; //d
|
||||
default: d[i] = 0x7FFFFFFF; break;
|
||||
}
|
||||
}
|
||||
}
|
||||
ApplyPrefixD((float*)d, sz, true);
|
||||
WriteVector((float*)d, sz, vd);
|
||||
PC += 4;
|
||||
|
@ -975,26 +975,24 @@ void Jit::Comp_Vi2f(u32 op) {
|
||||
fpr.ReleaseSpillLocks();
|
||||
}
|
||||
|
||||
extern const float mulTableVf2i[32] = {
|
||||
(float)(1ULL<<0),(float)(1ULL<<1),(float)(1ULL<<2),(float)(1ULL<<3),
|
||||
(float)(1ULL<<4),(float)(1ULL<<5),(float)(1ULL<<6),(float)(1ULL<<7),
|
||||
(float)(1ULL<<8),(float)(1ULL<<9),(float)(1ULL<<10),(float)(1ULL<<11),
|
||||
(float)(1ULL<<12),(float)(1ULL<<13),(float)(1ULL<<14),(float)(1ULL<<15),
|
||||
(float)(1ULL<<16),(float)(1ULL<<17),(float)(1ULL<<18),(float)(1ULL<<19),
|
||||
(float)(1ULL<<20),(float)(1ULL<<21),(float)(1ULL<<22),(float)(1ULL<<23),
|
||||
(float)(1ULL<<24),(float)(1ULL<<25),(float)(1ULL<<26),(float)(1ULL<<27),
|
||||
(float)(1ULL<<28),(float)(1ULL<<29),(float)(1ULL<<30),(float)(1ULL<<31),
|
||||
extern const double mulTableVf2i[32] = {
|
||||
(1ULL<<0),(1ULL<<1),(1ULL<<2),(1ULL<<3),
|
||||
(1ULL<<4),(1ULL<<5),(1ULL<<6),(1ULL<<7),
|
||||
(1ULL<<8),(1ULL<<9),(1ULL<<10),(1ULL<<11),
|
||||
(1ULL<<12),(1ULL<<13),(1ULL<<14),(1ULL<<15),
|
||||
(1ULL<<16),(1ULL<<17),(1ULL<<18),(1ULL<<19),
|
||||
(1ULL<<20),(1ULL<<21),(1ULL<<22),(1ULL<<23),
|
||||
(1ULL<<24),(1ULL<<25),(1ULL<<26),(1ULL<<27),
|
||||
(1ULL<<28),(1ULL<<29),(1ULL<<30),(1ULL<<31),
|
||||
};
|
||||
|
||||
static const float half = 0.5f;
|
||||
|
||||
static const float maxIntAsFloat = (float)(int)0x7FFFFFFF;
|
||||
static const float minIntAsFloat = (float)(int)0x80000000;
|
||||
static double maxIntAsDouble = (double)0x7fffffff; // that's not equal to 0x80000000
|
||||
static double minIntAsDouble = (double)(int)0x80000000;
|
||||
|
||||
void Jit::Comp_Vf2i(u32 op) {
|
||||
CONDITIONAL_DISABLE;
|
||||
DISABLE; // Broken :( (KH)
|
||||
|
||||
if (js.HasUnknownPrefix())
|
||||
DISABLE;
|
||||
|
||||
@ -1002,7 +1000,7 @@ void Jit::Comp_Vf2i(u32 op) {
|
||||
int n = GetNumVectorElements(sz);
|
||||
|
||||
int imm = (op >> 16) & 0x1f;
|
||||
const float *mult = &mulTableVf2i[imm];
|
||||
const double *mult = &mulTableVf2i[imm];
|
||||
|
||||
switch ((op >> 21) & 0x1f)
|
||||
{
|
||||
@ -1029,21 +1027,22 @@ void Jit::Comp_Vf2i(u32 op) {
|
||||
}
|
||||
|
||||
if (*mult != 1.0f)
|
||||
MOVSS(XMM1, M((void *)mult));
|
||||
MOVSD(XMM1, M((void *)mult));
|
||||
|
||||
fpr.MapRegsV(tempregs, sz, MAP_DIRTY | MAP_NOINIT);
|
||||
for (int i = 0; i < n; i++) {
|
||||
// Need to do this in double precision to clamp correctly as float
|
||||
// doesn't have enough precision to represent 0x7fffffff for example exactly.
|
||||
MOVSS(XMM0, fpr.V(sregs[i]));
|
||||
CVTSS2SD(XMM0, R(XMM0)); // convert to double precision
|
||||
if (*mult != 1.0f) {
|
||||
if (*mult != 1.0f)
|
||||
MULSS(XMM0, R(XMM1));
|
||||
MULSD(XMM0, R(XMM1));
|
||||
}
|
||||
// Clamp to max and min
|
||||
MINSS(XMM0, M((void *)&maxIntAsFloat));
|
||||
MAXSS(XMM0, M((void *)&minIntAsFloat));
|
||||
MINSD(XMM0, M((void *)&maxIntAsDouble));
|
||||
MAXSD(XMM0, M((void *)&minIntAsDouble));
|
||||
switch ((op >> 21) & 0x1f) {
|
||||
case 16: /* TODO */ break; //n (round_vfpu_n causes issue #3011 but seems right according to tests...)
|
||||
case 17: CVTTSS2SI(EAX, R(XMM0)); break; //z - truncate
|
||||
case 17: CVTTSD2SI(EAX, R(XMM0)); break; //z - truncate
|
||||
case 18: /* TODO */ break; //u
|
||||
case 19: /* TODO */ break; //d
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user