diff --git a/Common/x64Emitter.cpp b/Common/x64Emitter.cpp index 77813e7c4..94d470ba5 100644 --- a/Common/x64Emitter.cpp +++ b/Common/x64Emitter.cpp @@ -1298,6 +1298,7 @@ void XEmitter::CVTSI2SS(X64Reg xregdest, OpArg arg) {WriteSSEOp(32, 0x2A, false, void XEmitter::CVTSS2SI(X64Reg xregdest, OpArg arg) {WriteSSEOp(32, 0x2D, false, xregdest, arg);} void XEmitter::CVTTSS2SI(X64Reg xregdest, OpArg arg) {WriteSSEOp(32, 0x2C, false, xregdest, arg);} void XEmitter::CVTTPS2DQ(X64Reg xregdest, OpArg arg) {WriteSSEOp(32, 0x5B, false, xregdest, arg);} +void XEmitter::CVTTSD2SI(X64Reg xregdest, OpArg arg) {WriteSSEOp(64, 0x2C, false, xregdest, arg);} void XEmitter::MASKMOVDQU(X64Reg dest, X64Reg src) {WriteSSEOp(64, sseMASKMOVDQU, true, dest, R(src));} diff --git a/Common/x64Emitter.h b/Common/x64Emitter.h index 825bcab9d..f82a1659e 100644 --- a/Common/x64Emitter.h +++ b/Common/x64Emitter.h @@ -581,6 +581,7 @@ public: void CVTSI2SS(X64Reg xregdest, OpArg arg); // Yeah, destination really is a GPR like EAX! void CVTSS2SI(X64Reg xregdest, OpArg arg); // Yeah, destination really is a GPR like EAX! void CVTTSS2SI(X64Reg xregdest, OpArg arg); // Yeah, destination really is a GPR like EAX! + void CVTTSD2SI(X64Reg xregdest, OpArg arg); // Yeah, destination really is a GPR like EAX! void CVTTPS2DQ(X64Reg regOp, OpArg arg); // SSE2: Packed integer instructions diff --git a/Core/MIPS/MIPSIntVFPU.cpp b/Core/MIPS/MIPSIntVFPU.cpp index e26d18bb1..1d53f9cdc 100644 --- a/Core/MIPS/MIPSIntVFPU.cpp +++ b/Core/MIPS/MIPSIntVFPU.cpp @@ -600,13 +600,12 @@ namespace MIPSInt EatPrefixes(); } - inline int round_vfpu_n(float param) { + inline int round_vfpu_n(double param) { // return floorf(param); return (int)round_ieee_754(param); } - void Int_Vf2i(u32 op) - { + void Int_Vf2i(u32 op) { float s[4]; int d[4]; int vd = _VD; @@ -616,23 +615,26 @@ namespace MIPSInt VectorSize sz = GetVecSize(op); ReadVector(s, sz, vs); ApplySwizzleS(s, sz); //TODO: and the mask to kill everything but swizzle - for (int i = 0; i < GetNumVectorElements(sz); i++) - { + for (int i = 0; i < GetNumVectorElements(sz); i++) { if (my_isnan(s[i])) { d[i] = 0x7FFFFFFF; continue; } double sv = s[i] * mult; // (float)0x7fffffff == (float)0x80000000 // Cap/floor it to 0x7fffffff / 0x80000000 - if (sv > 0x7fffffff) sv = 0x7fffffff; - if (sv < (int)0x80000000) sv = (int)0x80000000; - switch ((op >> 21) & 0x1f) - { - case 16: d[i] = (int)(floor(sv + 0.5f)); break; //n (round_vfpu_n causes issue #3011 but seems right according to tests...) - case 17: d[i] = s[i]>=0 ? (int)floor(sv) : (int)ceil(sv); break; //z - case 18: d[i] = (int)ceil(sv); break; //u - case 19: d[i] = (int)floor(sv); break; //d - default: d[i] = 0x7FFFFFFF; break; + if (sv > (double)0x7fffffff) { + d[i] = 0x7fffffff; + } else if (sv <= (double)(int)0x80000000) { + d[i] = 0x80000000; + } else { + switch ((op >> 21) & 0x1f) + { + case 16: d[i] = (int)round_vfpu_n(sv); break; //(floor(sv + 0.5f)); break; //n (round_vfpu_n causes issue #3011 but seems right according to tests...) + case 17: d[i] = s[i]>=0 ? (int)floor(sv) : (int)ceil(sv); break; //z + case 18: d[i] = (int)ceil(sv); break; //u + case 19: d[i] = (int)floor(sv); break; //d + default: d[i] = 0x7FFFFFFF; break; + } } } ApplyPrefixD((float*)d, sz, true); diff --git a/Core/MIPS/x86/CompVFPU.cpp b/Core/MIPS/x86/CompVFPU.cpp index 97e3e657e..100d4a833 100644 --- a/Core/MIPS/x86/CompVFPU.cpp +++ b/Core/MIPS/x86/CompVFPU.cpp @@ -975,26 +975,24 @@ void Jit::Comp_Vi2f(u32 op) { fpr.ReleaseSpillLocks(); } -extern const float mulTableVf2i[32] = { - (float)(1ULL<<0),(float)(1ULL<<1),(float)(1ULL<<2),(float)(1ULL<<3), - (float)(1ULL<<4),(float)(1ULL<<5),(float)(1ULL<<6),(float)(1ULL<<7), - (float)(1ULL<<8),(float)(1ULL<<9),(float)(1ULL<<10),(float)(1ULL<<11), - (float)(1ULL<<12),(float)(1ULL<<13),(float)(1ULL<<14),(float)(1ULL<<15), - (float)(1ULL<<16),(float)(1ULL<<17),(float)(1ULL<<18),(float)(1ULL<<19), - (float)(1ULL<<20),(float)(1ULL<<21),(float)(1ULL<<22),(float)(1ULL<<23), - (float)(1ULL<<24),(float)(1ULL<<25),(float)(1ULL<<26),(float)(1ULL<<27), - (float)(1ULL<<28),(float)(1ULL<<29),(float)(1ULL<<30),(float)(1ULL<<31), +extern const double mulTableVf2i[32] = { + (1ULL<<0),(1ULL<<1),(1ULL<<2),(1ULL<<3), + (1ULL<<4),(1ULL<<5),(1ULL<<6),(1ULL<<7), + (1ULL<<8),(1ULL<<9),(1ULL<<10),(1ULL<<11), + (1ULL<<12),(1ULL<<13),(1ULL<<14),(1ULL<<15), + (1ULL<<16),(1ULL<<17),(1ULL<<18),(1ULL<<19), + (1ULL<<20),(1ULL<<21),(1ULL<<22),(1ULL<<23), + (1ULL<<24),(1ULL<<25),(1ULL<<26),(1ULL<<27), + (1ULL<<28),(1ULL<<29),(1ULL<<30),(1ULL<<31), }; static const float half = 0.5f; -static const float maxIntAsFloat = (float)(int)0x7FFFFFFF; -static const float minIntAsFloat = (float)(int)0x80000000; +static double maxIntAsDouble = (double)0x7fffffff; // that's not equal to 0x80000000 +static double minIntAsDouble = (double)(int)0x80000000; void Jit::Comp_Vf2i(u32 op) { CONDITIONAL_DISABLE; - DISABLE; // Broken :( (KH) - if (js.HasUnknownPrefix()) DISABLE; @@ -1002,7 +1000,7 @@ void Jit::Comp_Vf2i(u32 op) { int n = GetNumVectorElements(sz); int imm = (op >> 16) & 0x1f; - const float *mult = &mulTableVf2i[imm]; + const double *mult = &mulTableVf2i[imm]; switch ((op >> 21) & 0x1f) { @@ -1029,21 +1027,22 @@ void Jit::Comp_Vf2i(u32 op) { } if (*mult != 1.0f) - MOVSS(XMM1, M((void *)mult)); + MOVSD(XMM1, M((void *)mult)); fpr.MapRegsV(tempregs, sz, MAP_DIRTY | MAP_NOINIT); for (int i = 0; i < n; i++) { + // Need to do this in double precision to clamp correctly as float + // doesn't have enough precision to represent 0x7fffffff for example exactly. MOVSS(XMM0, fpr.V(sregs[i])); + CVTSS2SD(XMM0, R(XMM0)); // convert to double precision if (*mult != 1.0f) { - if (*mult != 1.0f) - MULSS(XMM0, R(XMM1)); + MULSD(XMM0, R(XMM1)); } - // Clamp to max and min - MINSS(XMM0, M((void *)&maxIntAsFloat)); - MAXSS(XMM0, M((void *)&minIntAsFloat)); + MINSD(XMM0, M((void *)&maxIntAsDouble)); + MAXSD(XMM0, M((void *)&minIntAsDouble)); switch ((op >> 21) & 0x1f) { case 16: /* TODO */ break; //n (round_vfpu_n causes issue #3011 but seems right according to tests...) - case 17: CVTTSS2SI(EAX, R(XMM0)); break; //z - truncate + case 17: CVTTSD2SI(EAX, R(XMM0)); break; //z - truncate case 18: /* TODO */ break; //u case 19: /* TODO */ break; //d }