diff --git a/pcsx2/Misc.h b/pcsx2/Misc.h index 934e08e..9bf6f3f 100644 --- a/pcsx2/Misc.h +++ b/pcsx2/Misc.h @@ -208,7 +208,7 @@ __forceinline void memcpy_pcsx2(void* dest, const void* src, size_t n) //FreezeMMXRegs(1); // mmx not used FreezeXMMRegs(1); memcpy(dest, src, n); - // have to be unfroze by parent call! + // have to be unfrozen by parent call! } #else #define memcpy_pcsx2 memcpy @@ -221,7 +221,8 @@ __forceinline void memcpy_pcsx2(void* dest, const void* src, size_t n) #if defined(_WIN32) && !defined(__x86_64__) // faster memcpy void * memcpy_amd_(void *dest, const void *src, size_t n); -#define memcpy_fast memcpy_amd_ +//#define memcpy_fast memcpy_amd_ +#define memcpy_fast memcpy #else // for now disable linux fast memcpy #define memcpy_fast memcpy_pcsx2 @@ -269,8 +270,9 @@ extern __forceinline void pcsx2_aligned_free(void* pmem) // cross-platform atomic operations #if defined (_WIN32) - +/* #ifndef __x86_64__ // for some reason x64 doesn't like this + LONG __cdecl _InterlockedIncrement(LONG volatile *Addend); LONG __cdecl _InterlockedDecrement(LONG volatile *Addend); LONG __cdecl _InterlockedCompareExchange(LPLONG volatile Dest, LONG Exchange, LONG Comp); @@ -286,7 +288,7 @@ LONG __cdecl _InterlockedAnd(LPLONG volatile Addend, LONG Value); #pragma intrinsic (_InterlockedExchangeAdd) #define InterlockedExchangeAdd _InterlockedExchangeAdd - +*/ #else typedef void* PVOID; diff --git a/pcsx2/windows/CpuDlg.c b/pcsx2/windows/CpuDlg.c index 02c3181..32598fe 100644 --- a/pcsx2/windows/CpuDlg.c +++ b/pcsx2/windows/CpuDlg.c @@ -71,6 +71,7 @@ BOOL CALLBACK CpuDlgProc(HWND hW, UINT uMsg, WPARAM wParam, LPARAM lParam) if(cpucaps.hasStreamingSIMDExtensions) strcat(features,",SSE"); if(cpucaps.hasStreamingSIMD2Extensions) strcat(features,",SSE2"); if(cpucaps.hasStreamingSIMD3Extensions) strcat(features,",SSE3"); + if(cpucaps.hasStreamingSIMD4Extensions) strcat(features,",SSE4.1"); // if(cpucaps.has3DNOWInstructionExtensions) strcat(features,",3DNOW"); // if(cpucaps.has3DNOWInstructionExtensionsExt)strcat(features,",3DNOW+"); if(cpucaps.hasAMD64BitArchitecture) strcat(features,",x86-64"); diff --git a/pcsx2/x86/iCore.cpp b/pcsx2/x86/iCore.cpp index c9503aa..53a90ea 100644 --- a/pcsx2/x86/iCore.cpp +++ b/pcsx2/x86/iCore.cpp @@ -1315,6 +1315,41 @@ extern "C" void cpudetectSSE3(void* pfnCallSSE3) #endif } +extern "C" void cpudetectSSE4(void* pfnCallSSE4) +{ +return; + cpucaps.hasStreamingSIMD4Extensions = 1; + +#ifdef _MSC_VER + __try { + //__asm call pfnCallSSE4; + ((TESTFNPTR)pfnCallSSE4)(); + } + __except(EXCEPTION_EXECUTE_HANDLER) { + cpucaps.hasStreamingSIMD4Extensions = 0; +#ifdef PCSX2_VIRTUAL_MEM + // necessary since can potentially kill the custom handler + install_my_handler(); +#endif + } +#else // linux + +#ifdef PCSX2_FORCESSE4 + cpucaps.hasStreamingSIMD4Extensions = 1; +#else + // exception handling doesn't work, so disable for x86 builds of linux + cpucaps.hasStreamingSIMD4Extensions = 0; +#endif +// try { +// __asm__("call *%0" : : "m"(pfnCallSSE4) ); +// } +// catch(...) { +// SysPrintf("no SSE4.1 found\n"); +// cpucaps.hasStreamingSIMD4Extensions = 0; +// } +#endif +} + struct BASEBLOCKS { // 0 - ee, 1 - iop diff --git a/pcsx2/x86/iCore.h b/pcsx2/x86/iCore.h index 36f476c..9720fd6 100644 --- a/pcsx2/x86/iCore.h +++ b/pcsx2/x86/iCore.h @@ -396,6 +396,7 @@ BASEBLOCKEX** GetAllBaseBlocks(int* pnum, int cpu); void SetMMXstate(); void cpudetectSSE3(void* pfnCallSSE3); +void cpudetectSSE4(void* pfnCallSSE4); void _recMove128MtoM(u32 to, u32 from); diff --git a/pcsx2/x86/iFPU.c b/pcsx2/x86/iFPU.c index fad9107..b939262 100644 --- a/pcsx2/x86/iFPU.c +++ b/pcsx2/x86/iFPU.c @@ -985,7 +985,7 @@ void recRSQRT_S_xmm(int info) switch(info & (PROCESS_EE_S|PROCESS_EE_T) ) { case PROCESS_EE_S: if( EEREC_D == EEREC_S ) { - ///SysPrintf("RSQRT1\n"); + SysPrintf("RSQRT1\n"); SSE_SQRTSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]); SSE_DIVSS_XMM_to_XMM(EEREC_D, t0reg); } @@ -998,15 +998,19 @@ void recRSQRT_S_xmm(int info) break; case PROCESS_EE_T: - //SysPrintf("RSQRT3\n"); + SysPrintf("RSQRT3\n"); if(EEREC_D == EEREC_T) { SSE_SQRTSS_XMM_to_XMM(t0reg, EEREC_T); SSE_MOVSS_M32_to_XMM(EEREC_D, (uptr)&fpuRegs.fpr[_Fs_]); - } + }else if(EEREC_D == EEREC_S) { SSE_SQRTSS_XMM_to_XMM(t0reg, EEREC_T); } - else SSE_MOVSS_M32_to_XMM(EEREC_D, (uptr)&fpuRegs.fpr[_Fs_]); + else { + SysPrintf("RSQ3 Whoops\n"); + SSE_SQRTSS_XMM_to_XMM(t0reg, EEREC_T); + SSE_MOVSS_M32_to_XMM(EEREC_D, (uptr)&fpuRegs.fpr[_Fs_]); + } SSE_DIVSS_XMM_to_XMM(EEREC_D, t0reg); break; @@ -1034,7 +1038,10 @@ void recRSQRT_S_xmm(int info) SSE_SQRTSS_XMM_to_XMM(t0reg, EEREC_T); } else {*/ - SSE_SQRTSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]); + //SSE_MOVSS_XMM_to_XMM(t0reg, EEREC_D); + SSE_SQRTSS_M32_to_XMM(t0reg, (uptr)&fpuRegs.fpr[_Ft_]); + + //} SysPrintf("RSQRT4\n"); diff --git a/pcsx2/x86/iVUmicro.c b/pcsx2/x86/iVUmicro.c index 5a2ad96..011d288 100644 --- a/pcsx2/x86/iVUmicro.c +++ b/pcsx2/x86/iVUmicro.c @@ -133,20 +133,41 @@ static int SSEmovMask[ 16 ][ 4 ] = void VU_MERGE0(int dest, int src) { // 0000 } void VU_MERGE1(int dest, int src) { // 1000 - SSE_MOVHLPS_XMM_to_XMM(src, dest); - SSE_SHUFPS_XMM_to_XMM(dest, src, 0xc4); + if( cpucaps.hasStreamingSIMD4Extensions ) + { + SSE4_INSERTPS_XMM_to_XMM(dest, src, _MM_MK_INSERTPS_NDX(3, 3, 0)); + } + else + { + SSE_MOVHLPS_XMM_to_XMM(src, dest); + SSE_SHUFPS_XMM_to_XMM(dest, src, 0xc4); + } } void VU_MERGE2(int dest, int src) { // 0100 - SSE_MOVHLPS_XMM_to_XMM(src, dest); - SSE_SHUFPS_XMM_to_XMM(dest, src, 0x64); + if( cpucaps.hasStreamingSIMD4Extensions ) + { + SSE4_INSERTPS_XMM_to_XMM(dest, src, _MM_MK_INSERTPS_NDX(2, 2, 0)); + } + else + { + SSE_MOVHLPS_XMM_to_XMM(src, dest); + SSE_SHUFPS_XMM_to_XMM(dest, src, 0x64); + } } void VU_MERGE3(int dest, int src) { // 1100 SSE_SHUFPS_XMM_to_XMM(dest, src, 0xe4); } void VU_MERGE4(int dest, int src) { // 0010s - SSE_MOVSS_XMM_to_XMM(src, dest); - SSE_SHUFPS_XMM_to_XMM(src, dest, 0xe4); - SSE_MOVAPS_XMM_to_XMM(dest, src); + if( cpucaps.hasStreamingSIMD4Extensions ) + { + SSE4_INSERTPS_XMM_to_XMM(dest, src, _MM_MK_INSERTPS_NDX(1, 1, 0)); + } + else + { + SSE_MOVSS_XMM_to_XMM(src, dest); + SSE_SHUFPS_XMM_to_XMM(src, dest, 0xe4); + SSE_MOVAPS_XMM_to_XMM(dest, src); + } } void VU_MERGE5(int dest, int src) { // 1010 SSE_SHUFPS_XMM_to_XMM(dest, src, 0xd8); @@ -221,6 +242,7 @@ void _unpackVF_xyzw(int dstreg, int srcreg, int xyzw) } } else { +/* switch (xyzw) { case 0: SSE3_MOVSLDUP_XMM_to_XMM(dstreg, srcreg); @@ -239,35 +261,60 @@ void _unpackVF_xyzw(int dstreg, int srcreg, int xyzw) SSE_MOVHLPS_XMM_to_XMM(dstreg, dstreg); break; } +*/ + switch (xyzw) { + case 0: + SSE2_PSHUFD_XMM_to_XMM(dstreg, srcreg, 0x00); + break; + case 1: + SSE2_PSHUFD_XMM_to_XMM(dstreg, srcreg, 0x55); + break; + case 2: + SSE2_PSHUFD_XMM_to_XMM(dstreg, srcreg, 0xaa); + break; + case 3: + SSE2_PSHUFD_XMM_to_XMM(dstreg, srcreg, 0xff); + break; + } } } void _unpackVFSS_xyzw(int dstreg, int srcreg, int xyzw) { - switch (xyzw) { - case 0: - if( dstreg != srcreg ) SSE_MOVAPS_XMM_to_XMM(dstreg, srcreg); - break; - case 1: - if( cpucaps.hasStreamingSIMD3Extensions ) SSE3_MOVSHDUP_XMM_to_XMM(dstreg, srcreg); - else { + if( cpucaps.hasStreamingSIMD4Extensions ) { + switch (xyzw) { + case 0: SSE4_INSERTPS_XMM_to_XMM(dstreg, srcreg, _MM_MK_INSERTPS_NDX(0, 0, 0)); break; + case 1: SSE4_INSERTPS_XMM_to_XMM(dstreg, srcreg, _MM_MK_INSERTPS_NDX(1, 0, 0)); break; + case 2: SSE4_INSERTPS_XMM_to_XMM(dstreg, srcreg, _MM_MK_INSERTPS_NDX(2, 0, 0)); break; + case 3: SSE4_INSERTPS_XMM_to_XMM(dstreg, srcreg, _MM_MK_INSERTPS_NDX(3, 0, 0)); break; + } + } + else { + switch (xyzw) { + case 0: if( dstreg != srcreg ) SSE_MOVAPS_XMM_to_XMM(dstreg, srcreg); - SSE_SHUFPS_XMM_to_XMM(dstreg, dstreg, 0x55); - } - break; - case 2: - SSE_MOVHLPS_XMM_to_XMM(dstreg, srcreg); - break; - case 3: - if( cpucaps.hasStreamingSIMD3Extensions && dstreg != srcreg ) { - SSE3_MOVSHDUP_XMM_to_XMM(dstreg, srcreg); - SSE_MOVHLPS_XMM_to_XMM(dstreg, dstreg); - } - else { - if( dstreg != srcreg ) SSE_MOVAPS_XMM_to_XMM(dstreg, srcreg); - SSE_SHUFPS_XMM_to_XMM(dstreg, dstreg, 0xff); - } - break; + break; + case 1: + if( cpucaps.hasStreamingSIMD3Extensions ) SSE3_MOVSHDUP_XMM_to_XMM(dstreg, srcreg); + else { + if( dstreg != srcreg ) SSE_MOVAPS_XMM_to_XMM(dstreg, srcreg); + SSE_SHUFPS_XMM_to_XMM(dstreg, dstreg, 0x55); + } + break; + case 2: + SSE_MOVHLPS_XMM_to_XMM(dstreg, srcreg); + break; + case 3: + if( cpucaps.hasStreamingSIMD3Extensions && dstreg != srcreg ) { + SSE3_MOVSHDUP_XMM_to_XMM(dstreg, srcreg); + SSE_MOVHLPS_XMM_to_XMM(dstreg, dstreg); + } + else { + if( dstreg != srcreg ) SSE_MOVAPS_XMM_to_XMM(dstreg, srcreg); + SSE_SHUFPS_XMM_to_XMM(dstreg, dstreg, 0xff); + } + break; + } } } @@ -927,6 +974,7 @@ int _vuGetTempXMMreg(int info) if( _hasFreeXMMreg() ) { t1reg = _allocTempXMMreg(XMMT_FPS, -1); + /* if( t1reg == EEREC_TEMP && _hasFreeXMMreg() ) { int t = _allocTempXMMreg(XMMT_FPS, -1); _freeXMMreg(t1reg); @@ -937,6 +985,18 @@ int _vuGetTempXMMreg(int info) _freeXMMreg(t1reg); t1reg = -1; } + */ + if( t1reg == EEREC_TEMP ) { + if( _hasFreeXMMreg() ) { + int t = _allocTempXMMreg(XMMT_FPS, -1); + _freeXMMreg(t1reg); + t1reg = t; + } + else { + _freeXMMreg(t1reg); + t1reg = -1; + } + } } return t1reg; @@ -3073,6 +3133,8 @@ void recVUMI_CLIP(VURegs *VU, int info) void recVUMI_DIV(VURegs *VU, int info) { + int t1reg; + if( _Fs_ == 0 ) { if( _Ft_ == 0 ) { @@ -3101,10 +3163,31 @@ void recVUMI_DIV(VURegs *VU, int info) // don't use RCPSS (very bad precision) SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)&VU->VF[0].UL[3]); - if( _Ftf_ == 0 || (xmmregs[EEREC_T].mode & MODE_WRITE) ) { - if( _Ftf_ ) SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, (0xe4e4>>(2*_Ftf_))&0xff); - SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, EEREC_T); - if( _Ftf_ ) SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, (0xe4e4>>(8-2*_Ftf_))&0xff); + if( _Ftf_ != 0 || (xmmregs[EEREC_T].mode & MODE_WRITE) ) + { + if( _Ftf_ ) + { + t1reg = _vuGetTempXMMreg(info); + + if( t1reg >= 0 ) + { + _unpackVFSS_xyzw(t1reg, EEREC_T, _Ftf_); + + SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, t1reg); + + _freeXMMreg(t1reg); + } + else + { + SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, (0xe4e4>>(2*_Ftf_))&0xff); + SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, EEREC_T); + SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, (0xe4e4>>(8-2*_Ftf_))&0xff); // revert + } + } + else + { + SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, EEREC_T); + } } else { SSE_DIVSS_M32_to_XMM(EEREC_TEMP, (uptr)&VU->VF[_Ft_].UL[_Ftf_]); @@ -3136,14 +3219,33 @@ void recVUMI_DIV(VURegs *VU, int info) return; } + if( _Fsf_ == 0 ) SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); else _unpackVF_xyzw(EEREC_TEMP, EEREC_S, _Fsf_); - if( _Ftf_ ) SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, (0xe4e4>>(2*_Ftf_))&0xff); - SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, EEREC_T); + if( _Ftf_ ) + { + t1reg = _vuGetTempXMMreg(info); - // revert - if( _Ftf_ ) SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, (0xe4e4>>(8-2*_Ftf_))&0xff); + if( t1reg >= 0 ) + { + _unpackVFSS_xyzw(t1reg, EEREC_T, _Ftf_); + + SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, t1reg); + + _freeXMMreg(t1reg); + } + else + { + SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, (0xe4e4>>(2*_Ftf_))&0xff); + SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, EEREC_T); + SSE_SHUFPS_XMM_to_XMM(EEREC_T, EEREC_T, (0xe4e4>>(8-2*_Ftf_))&0xff); // revert + } + } + else + { + SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, EEREC_T); + } } //if( !CHECK_FORCEABS ) { @@ -3226,11 +3328,11 @@ void recVUMI_RSQRT(VURegs *VU, int info) if( _Fsf_ == 3 ) { if(_Ft_ != 0 ||_Ftf_ == 3 ) { - //SysPrintf("_Fs_ = 0.3 _Ft_ != 0 || _Ft_ = 0.3 \n"); + SysPrintf("_Fs_ = 0.3 _Ft_ != 0 || _Ft_ = 0.3 \n"); SSE_SQRTSS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); //Dont use RSQRT, terrible accuracy SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_Q, 0), EEREC_TEMP); - SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - _unpackVF_xyzw(EEREC_TEMP, EEREC_TEMP, _Fsf_); + //SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + _unpackVF_xyzw(EEREC_TEMP, EEREC_S, _Fsf_); SSE_DIVSS_M32_to_XMM(EEREC_TEMP, VU_VI_ADDR(REG_Q, 0)); @@ -3283,7 +3385,7 @@ void recVUMI_RSQRT(VURegs *VU, int info) } } - //SysPrintf("Normal RSQRT\n"); + SysPrintf("Normal RSQRT\n"); SSE_RSQRTSS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); if( _Fsf_ ) SSE_SHUFPS_XMM_to_XMM(EEREC_S, EEREC_S, (0xe4e4>>(2*_Fsf_))&0xff); SSE_MULSS_XMM_to_XMM(EEREC_TEMP, EEREC_S); @@ -4681,23 +4783,32 @@ void recVUMI_WAITP(VURegs *VU, int info) // in all EFU insts, EEREC_D is a temp reg void vuSqSumXYZ(int regd, int regs, int regtemp) { - SSE_MOVAPS_XMM_to_XMM(regtemp, regs); - SSE_MULPS_XMM_to_XMM(regtemp, regtemp); + if( cpucaps.hasStreamingSIMD4Extensions ) + { + SSE_MOVAPS_XMM_to_XMM(regd, regs); + SSE4_DPPS_XMM_to_XMM(regd, regd, 0x71); + } + else + { + SSE_MOVAPS_XMM_to_XMM(regtemp, regs); + SSE_MULPS_XMM_to_XMM(regtemp, regtemp); - if( cpucaps.hasStreamingSIMD3Extensions ) { - SSE3_HADDPS_XMM_to_XMM(regd, regtemp); - SSE_ADDPS_XMM_to_XMM(regd, regtemp); // regd.z = x+y+z - SSE_MOVHLPS_XMM_to_XMM(regd, regd); // move to x + if( cpucaps.hasStreamingSIMD3Extensions ) { + SSE3_HADDPS_XMM_to_XMM(regd, regtemp); + SSE_ADDPS_XMM_to_XMM(regd, regtemp); // regd.z = x+y+z + SSE_MOVHLPS_XMM_to_XMM(regd, regd); // move to x + } + else { + SSE_MOVSS_XMM_to_XMM(regd, regtemp); + SSE_SHUFPS_XMM_to_XMM(regtemp, regtemp, 0xE1); + SSE_ADDSS_XMM_to_XMM(regd, regtemp); + SSE_SHUFPS_XMM_to_XMM(regtemp, regtemp, 0xD2); + SSE_ADDSS_XMM_to_XMM(regd, regtemp); + SSE_SHUFPS_XMM_to_XMM(regtemp, regtemp, 0xC6); + } } - else { - SSE_MOVSS_XMM_to_XMM(regd, regtemp); - SSE_SHUFPS_XMM_to_XMM(regtemp, regtemp, 0xE1); - SSE_ADDSS_XMM_to_XMM(regd, regtemp); - SSE_SHUFPS_XMM_to_XMM(regtemp, regtemp, 0xD2); - SSE_ADDSS_XMM_to_XMM(regd, regtemp); - SSE_SHUFPS_XMM_to_XMM(regtemp, regtemp, 0xC6); - } - //SysPrintf("SUMXYZ\n"); + + //SysPrintf("SUMXYZ\n"); } void recVUMI_ESADD( VURegs *VU, int info) @@ -4717,24 +4828,34 @@ void recVUMI_ESADD( VURegs *VU, int info) void recVUMI_ERSADD( VURegs *VU, int info ) { assert( VU == &VU1 ); - // almost same as vuSqSumXYZ - SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); - SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); - if( cpucaps.hasStreamingSIMD3Extensions ) { - SSE3_HADDPS_XMM_to_XMM(EEREC_D, EEREC_TEMP); - SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_TEMP); // EEREC_D.z = x+y+z - SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)&VU->VF[0].UL[3]); - SSE_MOVHLPS_XMM_to_XMM(EEREC_D, EEREC_D); // move to x + // almost same as vuSqSumXYZ + + if( cpucaps.hasStreamingSIMD4Extensions ) + { + SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + SSE4_DPPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x71); } - else { - SSE_MOVHLPS_XMM_to_XMM(EEREC_D, EEREC_TEMP); - SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_TEMP); - SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x55); - SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_TEMP); - SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)&VU->VF[0].UL[3]); + else + { + SSE_MOVAPS_XMM_to_XMM(EEREC_TEMP, EEREC_S); + SSE_MULPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); + + if( cpucaps.hasStreamingSIMD3Extensions ) { + SSE3_HADDPS_XMM_to_XMM(EEREC_D, EEREC_TEMP); + SSE_ADDPS_XMM_to_XMM(EEREC_D, EEREC_TEMP); // EEREC_D.z = x+y+z + SSE_MOVHLPS_XMM_to_XMM(EEREC_D, EEREC_D); // move to x + } + else { + SSE_MOVHLPS_XMM_to_XMM(EEREC_D, EEREC_TEMP); + SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_TEMP); + SSE_SHUFPS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP, 0x55); + SSE_ADDSS_XMM_to_XMM(EEREC_D, EEREC_TEMP); + } } + SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)&VU->VF[0].UL[3]); + // don't use RCPSS (very bad precision) SSE_DIVSS_XMM_to_XMM(EEREC_TEMP, EEREC_D); CheckForOverflowSS_(EEREC_TEMP, EEREC_D); @@ -4756,9 +4877,9 @@ void recVUMI_ELENG( VURegs *VU, int info ) void recVUMI_ERLENG( VURegs *VU, int info ) { assert( VU == &VU1 ); - vuSqSumXYZ(EEREC_D, EEREC_S, EEREC_TEMP); - //SysPrintf("ERLENG\n"); - SSE_SQRTSS_XMM_to_XMM(EEREC_TEMP, EEREC_D); + vuSqSumXYZ(EEREC_TEMP, EEREC_S, EEREC_TEMP); //Dont want to use EEREC_D incase it overwrites something + SysPrintf("ERLENG\n"); + SSE_SQRTSS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP); SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)&VU->VF[0].UL[3]); SSE_DIVSS_M32_to_XMM(EEREC_TEMP, VU_VI_ADDR(REG_P, 0)); @@ -4946,7 +5067,9 @@ void recVUMI_ERSQRT( VURegs *VU, int info ) //SSE_CMPNESS_XMM_to_XMM(EEREC_D, EEREC_TEMP); SysPrintf("ERSQRT\n"); SSE_SQRTSS_XMM_to_XMM(EEREC_TEMP, EEREC_TEMP); - SSE_DIVSS_M32_to_XMM(EEREC_TEMP, (uptr)&VU->VF[0].UL[3]); + SSE_MOVSS_XMM_to_M32(VU_VI_ADDR(REG_P, 0), EEREC_TEMP); + SSE_MOVSS_M32_to_XMM(EEREC_TEMP, (uptr)&VU->VF[0].UL[3]); + SSE_DIVSS_M32_to_XMM(EEREC_TEMP, VU_VI_ADDR(REG_P, 0)); //SSE_ANDPS_XMM_to_XMM(EEREC_TEMP, EEREC_D); } else { diff --git a/pcsx2/x86/ix86-32/iR5900-32.c b/pcsx2/x86/ix86-32/iR5900-32.c index 39f8d35..239fc52 100644 --- a/pcsx2/x86/ix86-32/iR5900-32.c +++ b/pcsx2/x86/ix86-32/iR5900-32.c @@ -1,3316 +1,3323 @@ -/* Pcsx2 - Pc Ps2 Emulator - * Copyright (C) 2002-2005 Pcsx2 Team - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -// recompiler reworked to add dynamic linking zerofrog(@gmail.com) Jan06 -// Recompiled completely rewritten to add block level recompilation/reg-caching/ -// liveness analysis/constant propagation Apr06 (zerofrog@gmail.com) - -// stop compiling if NORECBUILD build (only for Visual Studio) -#if !(defined(_MSC_VER) && defined(PCSX2_NORECBUILD)) - -#include -#include -#include -#include - -#include "Common.h" -#include "Memory.h" -#include "InterTables.h" -#include "ix86/ix86.h" -#include "iR5900.h" -#include "iR5900AritImm.h" -#include "iR5900Arit.h" -#include "iR5900MultDiv.h" -#include "iR5900Shift.h" -#include "iR5900Branch.h" -#include "iR5900Jump.h" -#include "iR5900LoadStore.h" -#include "iR5900Move.h" -#include "iMMI.h" -#include "iFPU.h" -#include "iCP0.h" -#include "iVUmicro.h" -#include "iVU0micro.h" -#include "iVU1micro.h" -#include "VU.h" -#include "VUmicro.h" - -#include "iVUzerorec.h" - -#ifdef _WIN32 -#pragma warning(disable:4244) -#pragma warning(disable:4761) -#endif - -u32 maxrecmem = 0; -uptr *recLUT; - -#define X86 -#define RECSTACK_SIZE 0x00010000 - -#define EE_NUMBLOCKS (1<<15) - -static char *recMem = NULL; // the recompiled blocks will be here -static char* recStack = NULL; // stack mem -static BASEBLOCK *recRAM = NULL; // and the ptr to the blocks here -static BASEBLOCK *recROM = NULL; // and here -static BASEBLOCK *recROM1 = NULL; // also here -static BASEBLOCKEX *recBlocks = NULL; -static char *recPtr = NULL, *recStackPtr = NULL; -static EEINST* s_pInstCache = NULL; -static u32 s_nInstCacheSize = 0; - -u32 g_EEFreezeRegs = 0; // if set, should freeze the regs - -static BASEBLOCK* s_pCurBlock = NULL; -static BASEBLOCKEX* s_pCurBlockEx = NULL; -static BASEBLOCK* s_pDispatchBlock = NULL; -static u32 s_nEndBlock = 0; // what pc the current block ends -static u32 s_nHasDelay = 0; - -static u32 s_nNextBlock = 0; // next free block in recBlocks - -extern void (*recBSC[64])(); -extern void (*recBSC_co[64])(); -void rpropBSC(EEINST* prev, EEINST* pinst); - -// save states for branches -static u16 s_savex86FpuState, s_saveiCWstate; -static GPR_reg64 s_ConstGPRreg; -static u32 s_saveConstGPRreg = 0, s_saveHasConstReg = 0, s_saveFlushedConstReg = 0, s_saveRegHasLive1 = 0, s_saveRegHasSignExt = 0; -static EEINST* s_psaveInstInfo = NULL; - -u32 s_nBlockCycles = 0; // cycles of current block recompiling -static u32 s_savenBlockCycles = 0; - -void recCOP2RecompileInst(); -int recCOP2AnalyzeBlock(u32 startpc, u32 endpc); -void recCOP2EndBlock(void); - -#ifdef _DEBUG -u32 dumplog = 0; -#else -#define dumplog 0 -#endif - -u32 pc; // recompiler pc -int branch; // set for branch - -//#ifdef PCSX2_DEVBUILD -LARGE_INTEGER lbase = {0}, lfinal = {0}; -static u32 s_startcount = 0; -//#endif - -char *txt0 = "EAX = %x : ECX = %x : EDX = %x\n"; -char *txt0RC = "EAX = %x : EBX = %x : ECX = %x : EDX = %x : ESI = %x : EDI = %x\n"; -char *txt1 = "REG[%d] = %x_%x\n"; -char *txt2 = "M32 = %x\n"; - -void _cop2AnalyzeOp(EEINST* pinst, int dostalls); // reccop2.c -static void iBranchTest(u32 newpc, u32 cpuBranch); -void recRecompile( u32 startpc ); -void recCOP22( void ); - -BASEBLOCKEX* PC_GETBLOCKEX(BASEBLOCK* p) -{ -// BASEBLOCKEX* pex = *(BASEBLOCKEX**)(p+1); -// if( pex >= recBlocks && pex < recBlocks+EE_NUMBLOCKS ) -// return pex; - - // otherwise, use the sorted list - return GetBaseBlockEx(p->startpc, 0); -} - -//////////////////////////////////////////////////// -void iDumpBlock( int startpc, char * ptr ) -{ - FILE *f; - char filename[ 256 ]; - u32 i, j; - EEINST* pcur; - extern char *disRNameGPR[]; - u8 used[34]; - u8 fpuused[33]; - int numused, count, fpunumused; - - SysPrintf( "dump1 %x:%x, %x\n", startpc, pc, cpuRegs.cycle ); -#ifdef _WIN32 - CreateDirectory("dumps", NULL); - sprintf( filename, "dumps\\dump%.8X.txt", startpc); -#else - mkdir("dumps", 0755); - sprintf( filename, "dumps/dump%.8X.txt", startpc); -#endif - - fflush( stdout ); -// f = fopen( "dump1", "wb" ); -// fwrite( ptr, 1, (u32)x86Ptr - (u32)ptr, f ); -// fclose( f ); -// -// sprintf( command, "objdump -D --target=binary --architecture=i386 dump1 > %s", filename ); -// system( command ); - - f = fopen( filename, "w" ); - - if( disR5900GetSym(startpc) != NULL ) - fprintf(f, "%s\n", disR5900GetSym(startpc)); - for ( i = startpc; i < s_nEndBlock; i += 4 ) { - fprintf( f, "%s\n", disR5900Fasm( PSMu32( i ), i ) ); - } - - // write the instruction info - - fprintf(f, "\n\nlive0 - %x, live1 - %x, live2 - %x, lastuse - %x\nmmx - %x, xmm - %x, used - %x\n", - EEINST_LIVE0, EEINST_LIVE1, EEINST_LIVE2, EEINST_LASTUSE, EEINST_MMX, EEINST_XMM, EEINST_USED); - - memset(used, 0, sizeof(used)); - numused = 0; - for(i = 0; i < ARRAYSIZE(s_pInstCache->regs); ++i) { - if( s_pInstCache->regs[i] & EEINST_USED ) { - used[i] = 1; - numused++; - } - } - - memset(fpuused, 0, sizeof(fpuused)); - fpunumused = 0; - for(i = 0; i < ARRAYSIZE(s_pInstCache->fpuregs); ++i) { - if( s_pInstCache->fpuregs[i] & EEINST_USED ) { - fpuused[i] = 1; - fpunumused++; - } - } - - fprintf(f, " "); - for(i = 0; i < ARRAYSIZE(s_pInstCache->regs); ++i) { - if( used[i] ) fprintf(f, "%2d ", i); - } - for(i = 0; i < ARRAYSIZE(s_pInstCache->fpuregs); ++i) { - if( fpuused[i] ) fprintf(f, "%2d ", i); - } - fprintf(f, "\n"); - - fprintf(f, " "); - for(i = 0; i < ARRAYSIZE(s_pInstCache->regs); ++i) { - if( used[i] ) fprintf(f, "%s ", disRNameGPR[i]); - } - for(i = 0; i < ARRAYSIZE(s_pInstCache->fpuregs); ++i) { - if( fpuused[i] ) fprintf(f, "%s ", i<32?"FR":"FA"); - } - fprintf(f, "\n"); - - pcur = s_pInstCache+1; - for( i = 0; i < (s_nEndBlock-startpc)/4; ++i, ++pcur) { - fprintf(f, "%2d: %2.2x ", i+1, pcur->info); - - count = 1; - for(j = 0; j < ARRAYSIZE(s_pInstCache->regs); j++) { - if( used[j] ) { - fprintf(f, "%2.2x%s", pcur->regs[j], ((count%8)&&countfpuregs); j++) { - if( fpuused[j] ) { - fprintf(f, "%2.2x%s", pcur->fpuregs[j], ((count%8)&&count>26) { - case 26: // ldl - case 27: // ldr - case 32: case 33: case 34: case 35: case 36: case 37: case 38: case 39: - case 55: // LD - case 30: // lq - return ((tempcode>>21)&0x1f)==((tempcode>>16)&0x1f); // rs==rt - } - return 0; -} - -u8 _eeIsLoadStoreCoIssue(u32 firstcode, u32 secondcode) -{ - switch(firstcode>>26) { - case 34: // lwl - return (secondcode>>26)==38; - case 38: // lwr - return (secondcode>>26)==34; - case 42: // swl - return (secondcode>>26)==46; - case 46: // swr - return (secondcode>>26)==42; - case 26: // ldl - return (secondcode>>26)==27; - case 27: // ldr - return (secondcode>>26)==26; - case 44: // sdl - return (secondcode>>26)==45; - case 45: // sdr - return (secondcode>>26)==44; - - case 32: case 33: case 35: case 36: case 37: case 39: - case 55: // LD - - // stores - case 40: case 41: case 43: - case 63: // sd - return (secondcode>>26)==(firstcode>>26); - - case 30: // lq - case 31: // sq - case 49: // lwc1 - case 57: // swc1 - case 54: // lqc2 - case 62: // sqc2 - return (secondcode>>26)==(firstcode>>26)&&cpucaps.hasStreamingSIMDExtensions; - } - return 0; -} - -u8 _eeIsLoadStoreCoX(u32 tempcode) -{ - switch( tempcode>>26 ) { - case 30: case 31: case 49: case 57: case 55: case 63: - return 1; - } - return 0; -} - -void _eeFlushAllUnused() -{ - int i; - for(i = 0; i < 34; ++i) { - if( pc < s_nEndBlock ) { - if( (g_pCurInstInfo[1].regs[i]&EEINST_USED) ) - continue; - } - else if( (g_pCurInstInfo[0].regs[i]&EEINST_USED) ) - continue; - - if( i < 32 && GPR_IS_CONST1(i) ) _flushConstReg(i); - else { - _deleteMMXreg(MMX_GPR+i, 1); - _deleteGPRtoXMMreg(i, 1); - } - } - - //TODO when used info is done for FPU and VU0 - for(i = 0; i < XMMREGS; ++i) { - if( xmmregs[i].inuse && xmmregs[i].type != XMMTYPE_GPRREG ) - _freeXMMreg(i); - } -} - -u32* _eeGetConstReg(int reg) -{ - assert( GPR_IS_CONST1( reg ) ); - - if( g_cpuFlushedConstReg & (1<regs[xmmregs[i].reg]&EEINST_USED) ) { - if( !_recIsRegWritten(g_pCurInstInfo+1, (s_nEndBlock-pc)/4, XMMTYPE_GPRREG, xmmregs[i].reg) ) { - _freeXMMreg(i); - xmmregs[i].inuse = 1; - return 1; - } - } - } - - return 0; -} - -int _flushMMXunused() -{ - int i; - for (i=0; iregs[mmxregs[i].reg-MMX_GPR]&EEINST_USED) ) { - if( !_recIsRegWritten(g_pCurInstInfo+1, (s_nEndBlock-pc)/4, XMMTYPE_GPRREG, mmxregs[i].reg-MMX_GPR) ) { - _freeMMXreg(i); - mmxregs[i].inuse = 1; - return 1; - } - } - } - - return 0; -} - -int _flushUnusedConstReg() -{ - int i; - for(i = 1; i < 32; ++i) { - if( (g_cpuHasConstReg & (1<regs[reg]&EEINST_LASTUSE) ) { - if( usemmx ) return _allocMMXreg(-1, MMX_GPR+reg, mode); - return _allocGPRtoXMMreg(-1, reg, mode); - } - - return -1; -} - -#define PROCESS_EE_SETMODES(mmreg) ((mmxregs[mmreg].mode&MODE_WRITE)?PROCESS_EE_MODEWRITES:0) -#define PROCESS_EE_SETMODET(mmreg) ((mmxregs[mmreg].mode&MODE_WRITE)?PROCESS_EE_MODEWRITET:0) - -// ignores XMMINFO_READS, XMMINFO_READT, and XMMINFO_READD_LO from xmminfo -// core of reg caching -void eeRecompileCode0(R5900FNPTR constcode, R5900FNPTR_INFO constscode, R5900FNPTR_INFO consttcode, R5900FNPTR_INFO noconstcode, int xmminfo) -{ - int mmreg1, mmreg2, mmreg3, mmtemp, moded; - - if ( ! _Rd_ && (xmminfo&XMMINFO_WRITED) ) return; - - if( xmminfo&XMMINFO_WRITED) { - CHECK_SAVE_REG(_Rd_); - _eeProcessHasLive(_Rd_, 0); - EEINST_RESETSIGNEXT(_Rd_); - } - - if( GPR_IS_CONST2(_Rs_, _Rt_) ) { - if( xmminfo & XMMINFO_WRITED ) { - _deleteMMXreg(MMX_GPR+_Rd_, 2); - _deleteGPRtoXMMreg(_Rd_, 2); - } - if( xmminfo&XMMINFO_WRITED ) GPR_SET_CONST(_Rd_); - constcode(); - return; - } - - moded = MODE_WRITE|((xmminfo&XMMINFO_READD)?MODE_READ:0); - - // test if should write mmx - if( g_pCurInstInfo->info & EEINST_MMX ) { - - if( xmminfo & (XMMINFO_READLO|XMMINFO_WRITELO) ) _addNeededMMXreg(MMX_GPR+MMX_LO); - if( xmminfo & (XMMINFO_READHI|XMMINFO_WRITEHI) ) _addNeededMMXreg(MMX_GPR+MMX_HI); - _addNeededMMXreg(MMX_GPR+_Rs_); - _addNeededMMXreg(MMX_GPR+_Rt_); - - if( GPR_IS_CONST1(_Rs_) || GPR_IS_CONST1(_Rt_) ) { - int creg = GPR_IS_CONST1(_Rs_) ? _Rs_ : _Rt_; - int vreg = creg == _Rs_ ? _Rt_ : _Rs_; - -// if(g_pCurInstInfo->regs[vreg]&EEINST_MMX) { -// mmreg1 = _allocMMXreg(-1, MMX_GPR+vreg, MODE_READ); -// _addNeededMMXreg(MMX_GPR+vreg); -// } - mmreg1 = _allocCheckGPRtoMMX(g_pCurInstInfo, vreg, MODE_READ); - - if( mmreg1 >= 0 ) { - int info = PROCESS_EE_MMX; - - if( GPR_IS_CONST1(_Rs_) ) info |= PROCESS_EE_SETMODET(mmreg1); - else info |= PROCESS_EE_SETMODES(mmreg1); - - if( xmminfo & XMMINFO_WRITED ) { - _addNeededMMXreg(MMX_GPR+_Rd_); - mmreg3 = _checkMMXreg(MMX_GPR+_Rd_, moded); - - if( !(xmminfo&XMMINFO_READD) && mmreg3 < 0 && ((g_pCurInstInfo->regs[vreg] & EEINST_LASTUSE) || !EEINST_ISLIVE64(vreg)) ) { - if( EEINST_ISLIVE64(vreg) ) { - _freeMMXreg(mmreg1); - if( GPR_IS_CONST1(_Rs_) ) info &= ~PROCESS_EE_MODEWRITET; - else info &= ~PROCESS_EE_MODEWRITES; - } - _deleteGPRtoXMMreg(_Rd_, 2); - mmxregs[mmreg1].inuse = 1; - mmxregs[mmreg1].reg = _Rd_; - mmxregs[mmreg1].mode = moded; - mmreg3 = mmreg1; - } - else if( mmreg3 < 0 ) mmreg3 = _allocMMXreg(-1, MMX_GPR+_Rd_, moded); - - info |= PROCESS_EE_SET_D(mmreg3); - } - - if( xmminfo & (XMMINFO_READLO|XMMINFO_WRITELO) ) { - mmtemp = eeProcessHILO(MMX_LO, ((xmminfo&XMMINFO_READLO)?MODE_READ:0)|((xmminfo&XMMINFO_WRITELO)?MODE_WRITE:0), 1); - if( mmtemp >= 0 ) info |= PROCESS_EE_SET_LO(mmtemp); - } - if( xmminfo & (XMMINFO_READHI|XMMINFO_WRITEHI) ) { - mmtemp = eeProcessHILO(MMX_HI, ((xmminfo&XMMINFO_READLO)?MODE_READ:0)|((xmminfo&XMMINFO_WRITELO)?MODE_WRITE:0), 1); - if( mmtemp >= 0 ) info |= PROCESS_EE_SET_HI(mmtemp); - } - - SetMMXstate(); - if( creg == _Rs_ ) constscode(info|PROCESS_EE_SET_T(mmreg1)); - else consttcode(info|PROCESS_EE_SET_S(mmreg1)); - _clearNeededMMXregs(); - if( xmminfo & XMMINFO_WRITED ) GPR_DEL_CONST(_Rd_); - return; - } - } - else { - // no const regs - mmreg1 = _allocCheckGPRtoMMX(g_pCurInstInfo, _Rs_, MODE_READ); - mmreg2 = _allocCheckGPRtoMMX(g_pCurInstInfo, _Rt_, MODE_READ); - - if( mmreg1 >= 0 || mmreg2 >= 0 ) { - int info = PROCESS_EE_MMX; - - // do it all in mmx - if( mmreg1 < 0 ) mmreg1 = _allocMMXreg(-1, MMX_GPR+_Rs_, MODE_READ); - if( mmreg2 < 0 ) mmreg2 = _allocMMXreg(-1, MMX_GPR+_Rt_, MODE_READ); - - info |= PROCESS_EE_SETMODES(mmreg1)|PROCESS_EE_SETMODET(mmreg2); - - // check for last used, if so don't alloc a new MMX reg - if( xmminfo & XMMINFO_WRITED ) { - _addNeededMMXreg(MMX_GPR+_Rd_); - mmreg3 = _checkMMXreg(MMX_GPR+_Rd_, moded); - - if( mmreg3 < 0 ) { - if( !(xmminfo&XMMINFO_READD) && ((g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE) || !EEINST_ISLIVE64(_Rt_)) ) { - if( EEINST_ISLIVE64(_Rt_) ) { - _freeMMXreg(mmreg2); - info &= ~PROCESS_EE_MODEWRITET; - } - _deleteGPRtoXMMreg(_Rd_, 2); - mmxregs[mmreg2].inuse = 1; - mmxregs[mmreg2].reg = _Rd_; - mmxregs[mmreg2].mode = moded; - mmreg3 = mmreg2; - } - else if( !(xmminfo&XMMINFO_READD) && ((g_pCurInstInfo->regs[_Rs_] & EEINST_LASTUSE) || !EEINST_ISLIVE64(_Rs_)) ) { - if( EEINST_ISLIVE64(_Rs_) ) { - _freeMMXreg(mmreg1); - info &= ~PROCESS_EE_MODEWRITES; - } - _deleteGPRtoXMMreg(_Rd_, 2); - mmxregs[mmreg1].inuse = 1; - mmxregs[mmreg1].reg = _Rd_; - mmxregs[mmreg1].mode = moded; - mmreg3 = mmreg1; - } - else mmreg3 = _allocMMXreg(-1, MMX_GPR+_Rd_, moded); - } - - info |= PROCESS_EE_SET_D(mmreg3); - } - - if( xmminfo & (XMMINFO_READLO|XMMINFO_WRITELO) ) { - mmtemp = eeProcessHILO(MMX_LO, ((xmminfo&XMMINFO_READLO)?MODE_READ:0)|((xmminfo&XMMINFO_WRITELO)?MODE_WRITE:0), 1); - if( mmtemp >= 0 ) info |= PROCESS_EE_SET_LO(mmtemp); - } - if( xmminfo & (XMMINFO_READHI|XMMINFO_WRITEHI) ) { - mmtemp = eeProcessHILO(MMX_HI, ((xmminfo&XMMINFO_READLO)?MODE_READ:0)|((xmminfo&XMMINFO_WRITELO)?MODE_WRITE:0), 1); - if( mmtemp >= 0 ) info |= PROCESS_EE_SET_HI(mmtemp); - } - - SetMMXstate(); - noconstcode(info|PROCESS_EE_SET_S(mmreg1)|PROCESS_EE_SET_T(mmreg2)); - _clearNeededMMXregs(); - if( xmminfo & XMMINFO_WRITED ) GPR_DEL_CONST(_Rd_); - return; - } - } - - _clearNeededMMXregs(); - } - - // test if should write xmm, mirror to mmx code - if( g_pCurInstInfo->info & EEINST_XMM ) { - - if( xmminfo & (XMMINFO_READLO|XMMINFO_WRITELO) ) _addNeededGPRtoXMMreg(XMMGPR_LO); - if( xmminfo & (XMMINFO_READHI|XMMINFO_WRITEHI) ) _addNeededGPRtoXMMreg(XMMGPR_HI); - _addNeededGPRtoXMMreg(_Rs_); - _addNeededGPRtoXMMreg(_Rt_); - - if( GPR_IS_CONST1(_Rs_) || GPR_IS_CONST1(_Rt_) ) { - int creg = GPR_IS_CONST1(_Rs_) ? _Rs_ : _Rt_; - int vreg = creg == _Rs_ ? _Rt_ : _Rs_; - -// if(g_pCurInstInfo->regs[vreg]&EEINST_XMM) { -// mmreg1 = _allocGPRtoXMMreg(-1, vreg, MODE_READ); -// _addNeededGPRtoXMMreg(vreg); -// } - mmreg1 = _allocCheckGPRtoXMM(g_pCurInstInfo, vreg, MODE_READ); - - if( mmreg1 >= 0 ) { - int info = PROCESS_EE_XMM; - - if( GPR_IS_CONST1(_Rs_) ) info |= PROCESS_EE_SETMODET(mmreg1); - else info |= PROCESS_EE_SETMODES(mmreg1); - - if( xmminfo & XMMINFO_WRITED ) { - - _addNeededGPRtoXMMreg(_Rd_); - mmreg3 = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, MODE_WRITE); - - if( !(xmminfo&XMMINFO_READD) && mmreg3 < 0 && ((g_pCurInstInfo->regs[vreg] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(vreg)) ) { - _freeXMMreg(mmreg1); - if( GPR_IS_CONST1(_Rs_) ) info &= ~PROCESS_EE_MODEWRITET; - else info &= ~PROCESS_EE_MODEWRITES; - _deleteMMXreg(MMX_GPR+_Rd_, 2); - xmmregs[mmreg1].inuse = 1; - xmmregs[mmreg1].reg = _Rd_; - xmmregs[mmreg1].mode = moded; - mmreg3 = mmreg1; - } - else if( mmreg3 < 0 ) mmreg3 = _allocGPRtoXMMreg(-1, _Rd_, moded); - - info |= PROCESS_EE_SET_D(mmreg3); - } - - if( xmminfo & (XMMINFO_READLO|XMMINFO_WRITELO) ) { - mmtemp = eeProcessHILO(XMMGPR_LO, ((xmminfo&XMMINFO_READLO)?MODE_READ:0)|((xmminfo&XMMINFO_WRITELO)?MODE_WRITE:0), 0); - if( mmtemp >= 0 ) info |= PROCESS_EE_SET_LO(mmtemp); - } - if( xmminfo & (XMMINFO_READHI|XMMINFO_WRITEHI) ) { - mmtemp = eeProcessHILO(XMMGPR_HI, ((xmminfo&XMMINFO_READLO)?MODE_READ:0)|((xmminfo&XMMINFO_WRITELO)?MODE_WRITE:0), 0); - if( mmtemp >= 0 ) info |= PROCESS_EE_SET_HI(mmtemp); - } - - if( creg == _Rs_ ) constscode(info|PROCESS_EE_SET_T(mmreg1)); - else consttcode(info|PROCESS_EE_SET_S(mmreg1)); - _clearNeededXMMregs(); - if( xmminfo & XMMINFO_WRITED ) GPR_DEL_CONST(_Rd_); - return; - } - } - else { - // no const regs - mmreg1 = _allocCheckGPRtoXMM(g_pCurInstInfo, _Rs_, MODE_READ); - mmreg2 = _allocCheckGPRtoXMM(g_pCurInstInfo, _Rt_, MODE_READ); - - if( mmreg1 >= 0 || mmreg2 >= 0 ) { - int info = PROCESS_EE_XMM; - - // do it all in xmm - if( mmreg1 < 0 ) mmreg1 = _allocGPRtoXMMreg(-1, _Rs_, MODE_READ); - if( mmreg2 < 0 ) mmreg2 = _allocGPRtoXMMreg(-1, _Rt_, MODE_READ); - - info |= PROCESS_EE_SETMODES(mmreg1)|PROCESS_EE_SETMODET(mmreg2); - - if( xmminfo & XMMINFO_WRITED ) { - // check for last used, if so don't alloc a new XMM reg - _addNeededGPRtoXMMreg(_Rd_); - mmreg3 = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, moded); - - if( mmreg3 < 0 ) { - if( !(xmminfo&XMMINFO_READD) && ((g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rt_)) ) { - _freeXMMreg(mmreg2); - info &= ~PROCESS_EE_MODEWRITET; - _deleteMMXreg(MMX_GPR+_Rd_, 2); - xmmregs[mmreg2].inuse = 1; - xmmregs[mmreg2].reg = _Rd_; - xmmregs[mmreg2].mode = moded; - mmreg3 = mmreg2; - } - else if( !(xmminfo&XMMINFO_READD) && ((g_pCurInstInfo->regs[_Rs_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rs_)) ) { - _freeXMMreg(mmreg1); - info &= ~PROCESS_EE_MODEWRITES; - _deleteMMXreg(MMX_GPR+_Rd_, 2); - xmmregs[mmreg1].inuse = 1; - xmmregs[mmreg1].reg = _Rd_; - xmmregs[mmreg1].mode = moded; - mmreg3 = mmreg1; - } - else mmreg3 = _allocGPRtoXMMreg(-1, _Rd_, moded); - } - - info |= PROCESS_EE_SET_D(mmreg3); - } - - if( xmminfo & (XMMINFO_READLO|XMMINFO_WRITELO) ) { - mmtemp = eeProcessHILO(XMMGPR_LO, ((xmminfo&XMMINFO_READLO)?MODE_READ:0)|((xmminfo&XMMINFO_WRITELO)?MODE_WRITE:0), 0); - if( mmtemp >= 0 ) info |= PROCESS_EE_SET_LO(mmtemp); - } - if( xmminfo & (XMMINFO_READHI|XMMINFO_WRITEHI) ) { - mmtemp = eeProcessHILO(XMMGPR_HI, ((xmminfo&XMMINFO_READLO)?MODE_READ:0)|((xmminfo&XMMINFO_WRITELO)?MODE_WRITE:0), 0); - if( mmtemp >= 0 ) info |= PROCESS_EE_SET_HI(mmtemp); - } - - noconstcode(info|PROCESS_EE_SET_S(mmreg1)|PROCESS_EE_SET_T(mmreg2)); - _clearNeededXMMregs(); - if( xmminfo & XMMINFO_WRITED ) GPR_DEL_CONST(_Rd_); - return; - } - } - - _clearNeededXMMregs(); - } - - // regular x86 - _deleteGPRtoXMMreg(_Rs_, 1); - _deleteGPRtoXMMreg(_Rt_, 1); - if( xmminfo&XMMINFO_WRITED ) - _deleteGPRtoXMMreg(_Rd_, (xmminfo&XMMINFO_READD)?0:2); - _deleteMMXreg(MMX_GPR+_Rs_, 1); - _deleteMMXreg(MMX_GPR+_Rt_, 1); - if( xmminfo&XMMINFO_WRITED ) - _deleteMMXreg(MMX_GPR+_Rd_, (xmminfo&XMMINFO_READD)?0:2); - - // don't delete, fn will take care of them -// if( xmminfo & (XMMINFO_READLO|XMMINFO_WRITELO) ) { -// _deleteGPRtoXMMreg(XMMGPR_LO, (xmminfo&XMMINFO_READLO)?1:0); -// _deleteMMXreg(MMX_GPR+MMX_LO, (xmminfo&XMMINFO_READLO)?1:0); -// } -// if( xmminfo & (XMMINFO_READHI|XMMINFO_WRITEHI) ) { -// _deleteGPRtoXMMreg(XMMGPR_HI, (xmminfo&XMMINFO_READHI)?1:0); -// _deleteMMXreg(MMX_GPR+MMX_HI, (xmminfo&XMMINFO_READHI)?1:0); -// } - - if( GPR_IS_CONST1(_Rs_) ) { - constscode(0); - if( xmminfo&XMMINFO_WRITED ) GPR_DEL_CONST(_Rd_); - return; - } - - if( GPR_IS_CONST1(_Rt_) ) { - consttcode(0); - if( xmminfo&XMMINFO_WRITED ) GPR_DEL_CONST(_Rd_); - return; - } - - noconstcode(0); - if( xmminfo&XMMINFO_WRITED ) GPR_DEL_CONST(_Rd_); -} - -// rt = rs op imm16 -void eeRecompileCode1(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode) -{ - int mmreg1, mmreg2; - if ( ! _Rt_ ) return; - - CHECK_SAVE_REG(_Rt_); - _eeProcessHasLive(_Rt_, 0); - EEINST_RESETSIGNEXT(_Rt_); - - if( GPR_IS_CONST1(_Rs_) ) { - _deleteMMXreg(MMX_GPR+_Rt_, 2); - _deleteGPRtoXMMreg(_Rt_, 2); - GPR_SET_CONST(_Rt_); - constcode(); - return; - } - - // test if should write mmx - if( g_pCurInstInfo->info & EEINST_MMX ) { - - // no const regs - mmreg1 = _allocCheckGPRtoMMX(g_pCurInstInfo, _Rs_, MODE_READ); - - if( mmreg1 >= 0 ) { - int info = PROCESS_EE_MMX|PROCESS_EE_SETMODES(mmreg1); - - // check for last used, if so don't alloc a new MMX reg - _addNeededMMXreg(MMX_GPR+_Rt_); - mmreg2 = _checkMMXreg(MMX_GPR+_Rt_, MODE_WRITE); - - if( mmreg2 < 0 ) { - if( (g_pCurInstInfo->regs[_Rs_] & EEINST_LASTUSE) || !EEINST_ISLIVE64(_Rs_) ) { - if( EEINST_ISLIVE64(_Rs_) ) { - _freeMMXreg(mmreg1); - info &= ~PROCESS_EE_MODEWRITES; - } - _deleteGPRtoXMMreg(_Rt_, 2); - mmxregs[mmreg1].inuse = 1; - mmxregs[mmreg1].reg = _Rt_; - mmxregs[mmreg1].mode = MODE_WRITE|MODE_READ; - mmreg2 = mmreg1; - } - else mmreg2 = _allocMMXreg(-1, MMX_GPR+_Rt_, MODE_WRITE); - } - - SetMMXstate(); - noconstcode(info|PROCESS_EE_SET_S(mmreg1)|PROCESS_EE_SET_T(mmreg2)); - _clearNeededMMXregs(); - GPR_DEL_CONST(_Rt_); - return; - } - - _clearNeededMMXregs(); - } - - // test if should write xmm, mirror to mmx code - if( g_pCurInstInfo->info & EEINST_XMM ) { - - // no const regs - mmreg1 = _allocCheckGPRtoXMM(g_pCurInstInfo, _Rs_, MODE_READ); - - if( mmreg1 >= 0 ) { - int info = PROCESS_EE_XMM|PROCESS_EE_SETMODES(mmreg1); - - // check for last used, if so don't alloc a new XMM reg - _addNeededGPRtoXMMreg(_Rt_); - mmreg2 = _checkXMMreg(XMMTYPE_GPRREG, _Rt_, MODE_WRITE); - - if( mmreg2 < 0 ) { - if( (g_pCurInstInfo->regs[_Rs_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rs_) ) { - _freeXMMreg(mmreg1); - info &= ~PROCESS_EE_MODEWRITES; - _deleteMMXreg(MMX_GPR+_Rt_, 2); - xmmregs[mmreg1].inuse = 1; - xmmregs[mmreg1].reg = _Rt_; - xmmregs[mmreg1].mode = MODE_WRITE|MODE_READ; - mmreg2 = mmreg1; - } - else mmreg2 = _allocGPRtoXMMreg(-1, _Rt_, MODE_WRITE); - } - - noconstcode(info|PROCESS_EE_SET_S(mmreg1)|PROCESS_EE_SET_T(mmreg2)); - _clearNeededXMMregs(); - GPR_DEL_CONST(_Rt_); - return; - } - - _clearNeededXMMregs(); - } - - // regular x86 - _deleteGPRtoXMMreg(_Rs_, 1); - _deleteGPRtoXMMreg(_Rt_, 2); - _deleteMMXreg(MMX_GPR+_Rs_, 1); - _deleteMMXreg(MMX_GPR+_Rt_, 2); - - noconstcode(0); - GPR_DEL_CONST(_Rt_); -} - -// rd = rt op sa -void eeRecompileCode2(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode) -{ - int mmreg1, mmreg2; - if ( ! _Rd_ ) return; - - CHECK_SAVE_REG(_Rd_); - _eeProcessHasLive(_Rd_, 0); - EEINST_RESETSIGNEXT(_Rd_); - - if( GPR_IS_CONST1(_Rt_) ) { - _deleteMMXreg(MMX_GPR+_Rd_, 2); - _deleteGPRtoXMMreg(_Rd_, 2); - GPR_SET_CONST(_Rd_); - constcode(); - return; - } - - // test if should write mmx - if( g_pCurInstInfo->info & EEINST_MMX ) { - - // no const regs - mmreg1 = _allocCheckGPRtoMMX(g_pCurInstInfo, _Rt_, MODE_READ); - - if( mmreg1 >= 0 ) { - int info = PROCESS_EE_MMX|PROCESS_EE_SETMODET(mmreg1); - - // check for last used, if so don't alloc a new MMX reg - _addNeededMMXreg(MMX_GPR+_Rd_); - mmreg2 = _checkMMXreg(MMX_GPR+_Rd_, MODE_WRITE); - - if( mmreg2 < 0 ) { - if( (g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE) || !EEINST_ISLIVE64(_Rt_) ) { - if( EEINST_ISLIVE64(_Rt_) ) { - _freeMMXreg(mmreg1); - info &= ~PROCESS_EE_MODEWRITET; - } - _deleteGPRtoXMMreg(_Rd_, 2); - mmxregs[mmreg1].inuse = 1; - mmxregs[mmreg1].reg = _Rd_; - mmxregs[mmreg1].mode = MODE_WRITE|MODE_READ; - mmreg2 = mmreg1; - } - else mmreg2 = _allocMMXreg(-1, MMX_GPR+_Rd_, MODE_WRITE); - } - - SetMMXstate(); - noconstcode(info|PROCESS_EE_SET_T(mmreg1)|PROCESS_EE_SET_D(mmreg2)); - _clearNeededMMXregs(); - GPR_DEL_CONST(_Rd_); - return; - } - - _clearNeededMMXregs(); - } - - // test if should write xmm, mirror to mmx code - if( g_pCurInstInfo->info & EEINST_XMM ) { - - // no const regs - mmreg1 = _allocCheckGPRtoXMM(g_pCurInstInfo, _Rt_, MODE_READ); - - if( mmreg1 >= 0 ) { - int info = PROCESS_EE_XMM|PROCESS_EE_SETMODET(mmreg1); - - // check for last used, if so don't alloc a new XMM reg - _addNeededGPRtoXMMreg(_Rd_); - mmreg2 = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, MODE_WRITE); - - if( mmreg2 < 0 ) { - if( (g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE) || !EEINST_ISLIVE64(_Rt_) ) { - _freeXMMreg(mmreg1); - info &= ~PROCESS_EE_MODEWRITET; - _deleteMMXreg(MMX_GPR+_Rd_, 2); - xmmregs[mmreg1].inuse = 1; - xmmregs[mmreg1].reg = _Rd_; - xmmregs[mmreg1].mode = MODE_WRITE|MODE_READ; - mmreg2 = mmreg1; - } - else mmreg2 = _allocGPRtoXMMreg(-1, _Rd_, MODE_WRITE); - } - - noconstcode(info|PROCESS_EE_SET_T(mmreg1)|PROCESS_EE_SET_D(mmreg2)); - _clearNeededXMMregs(); - GPR_DEL_CONST(_Rd_); - return; - } - - _clearNeededXMMregs(); - } - - // regular x86 - _deleteGPRtoXMMreg(_Rt_, 1); - _deleteGPRtoXMMreg(_Rd_, 2); - _deleteMMXreg(MMX_GPR+_Rt_, 1); - _deleteMMXreg(MMX_GPR+_Rd_, 2); - - noconstcode(0); - GPR_DEL_CONST(_Rd_); -} - -// rt op rs -void eeRecompileCode3(R5900FNPTR constcode, R5900FNPTR_INFO multicode) -{ - assert(0); - // for now, don't support xmm - _deleteEEreg(_Rs_, 1); - _deleteEEreg(_Rt_, 1); - - if( GPR_IS_CONST2(_Rs_, _Rt_) ) { - constcode(); - return; - } - - if( GPR_IS_CONST1(_Rs_) ) { - //multicode(PROCESS_EE_CONSTT); - return; - } - - if( GPR_IS_CONST1(_Rt_) ) { - //multicode(PROCESS_EE_CONSTT); - return; - } - - multicode(0); -} - -// Simple Code Templates // - -// rd = rs op rt -void eeRecompileCodeConst0(R5900FNPTR constcode, R5900FNPTR_INFO constscode, R5900FNPTR_INFO consttcode, R5900FNPTR_INFO noconstcode) -{ - if ( ! _Rd_ ) return; - - // for now, don't support xmm - CHECK_SAVE_REG(_Rd_); - - _deleteGPRtoXMMreg(_Rs_, 1); - _deleteGPRtoXMMreg(_Rt_, 1); - _deleteGPRtoXMMreg(_Rd_, 0); - _deleteMMXreg(MMX_GPR+_Rs_, 1); - _deleteMMXreg(MMX_GPR+_Rt_, 1); - _deleteMMXreg(MMX_GPR+_Rd_, 0); - - if( GPR_IS_CONST2(_Rs_, _Rt_) ) { - GPR_SET_CONST(_Rd_); - constcode(); - return; - } - - if( GPR_IS_CONST1(_Rs_) ) { - constscode(0); - GPR_DEL_CONST(_Rd_); - return; - } - - if( GPR_IS_CONST1(_Rt_) ) { - consttcode(0); - GPR_DEL_CONST(_Rd_); - return; - } - - noconstcode(0); - GPR_DEL_CONST(_Rd_); -} - -// rt = rs op imm16 -void eeRecompileCodeConst1(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode) -{ - if ( ! _Rt_ ) - return; - - // for now, don't support xmm - CHECK_SAVE_REG(_Rt_); - - _deleteGPRtoXMMreg(_Rs_, 1); - _deleteGPRtoXMMreg(_Rt_, 0); - - if( GPR_IS_CONST1(_Rs_) ) { - GPR_SET_CONST(_Rt_); - constcode(); - return; - } - - noconstcode(0); - GPR_DEL_CONST(_Rt_); -} - -// rd = rt op sa -void eeRecompileCodeConst2(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode) -{ - if ( ! _Rd_ ) return; - - // for now, don't support xmm - CHECK_SAVE_REG(_Rd_); - - _deleteGPRtoXMMreg(_Rt_, 1); - _deleteGPRtoXMMreg(_Rd_, 0); - - if( GPR_IS_CONST1(_Rt_) ) { - GPR_SET_CONST(_Rd_); - constcode(); - return; - } - - noconstcode(0); - GPR_DEL_CONST(_Rd_); -} - -// rd = rt MULT rs (SPECIAL) -void eeRecompileCodeConstSPECIAL(R5900FNPTR constcode, R5900FNPTR_INFO multicode, int MULT) -{ - assert(0); - // for now, don't support xmm - if( MULT ) { - CHECK_SAVE_REG(_Rd_); - _deleteGPRtoXMMreg(_Rd_, 0); - } - - _deleteGPRtoXMMreg(_Rs_, 1); - _deleteGPRtoXMMreg(_Rt_, 1); - - if( GPR_IS_CONST2(_Rs_, _Rt_) ) { - if( MULT && _Rd_ ) GPR_SET_CONST(_Rd_); - constcode(); - return; - } - - if( GPR_IS_CONST1(_Rs_) ) { - //multicode(PROCESS_EE_CONSTS); - if( MULT && _Rd_ ) GPR_DEL_CONST(_Rd_); - return; - } - - if( GPR_IS_CONST1(_Rt_) ) { - //multicode(PROCESS_EE_CONSTT); - if( MULT && _Rd_ ) GPR_DEL_CONST(_Rd_); - return; - } - - multicode(0); - if( MULT && _Rd_ ) GPR_DEL_CONST(_Rd_); -} - -// EE XMM allocation code -int eeRecompileCodeXMM(int xmminfo) -{ - int info = PROCESS_EE_XMM; - - // save state - if( xmminfo & XMMINFO_WRITED ) { - CHECK_SAVE_REG(_Rd_); - _eeProcessHasLive(_Rd_, 0); - EEINST_RESETSIGNEXT(_Rd_); - } - - // flush consts - if( xmminfo & XMMINFO_READT ) { - if( GPR_IS_CONST1( _Rt_ ) && !(g_cpuFlushedConstReg&(1<<_Rt_)) ) { - MOV32ItoM((int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], g_cpuConstRegs[_Rt_].UL[0]); - MOV32ItoM((int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ], g_cpuConstRegs[_Rt_].UL[1]); - g_cpuFlushedConstReg |= (1<<_Rt_); - } - } - if( xmminfo & XMMINFO_READS) { - if( GPR_IS_CONST1( _Rs_ ) && !(g_cpuFlushedConstReg&(1<<_Rs_)) ) { - MOV32ItoM((int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ], g_cpuConstRegs[_Rs_].UL[0]); - MOV32ItoM((int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 1 ], g_cpuConstRegs[_Rs_].UL[1]); - g_cpuFlushedConstReg |= (1<<_Rs_); - } - } - - if( xmminfo & XMMINFO_WRITED ) { - GPR_DEL_CONST(_Rd_); - } - - // add needed - if( xmminfo & (XMMINFO_READLO|XMMINFO_WRITELO) ) { - _addNeededGPRtoXMMreg(XMMGPR_LO); - } - if( xmminfo & (XMMINFO_READHI|XMMINFO_WRITEHI) ) { - _addNeededGPRtoXMMreg(XMMGPR_HI); - } - if( xmminfo & XMMINFO_READS) _addNeededGPRtoXMMreg(_Rs_); - if( xmminfo & XMMINFO_READT) _addNeededGPRtoXMMreg(_Rt_); - if( xmminfo & XMMINFO_WRITED ) _addNeededGPRtoXMMreg(_Rd_); - - // allocate - if( xmminfo & XMMINFO_READS) { - int reg = _allocGPRtoXMMreg(-1, _Rs_, MODE_READ); - info |= PROCESS_EE_SET_S(reg)|PROCESS_EE_SETMODES(reg); - } - if( xmminfo & XMMINFO_READT) { - int reg = _allocGPRtoXMMreg(-1, _Rt_, MODE_READ); - info |= PROCESS_EE_SET_T(reg)|PROCESS_EE_SETMODET(reg); - } - - if( xmminfo & XMMINFO_WRITED ) { - int readd = MODE_WRITE|((xmminfo&XMMINFO_READD)?((xmminfo&XMMINFO_READD_LO)?(MODE_READ|MODE_READHALF):MODE_READ):0); - - int regd = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, readd); - - if( regd < 0 ) { - if( !(xmminfo&XMMINFO_READD) && (xmminfo & XMMINFO_READT) && (_Rt_ == 0 || (g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rt_)) ) { - _freeXMMreg(EEREC_T); - _deleteMMXreg(MMX_GPR+_Rd_, 2); - xmmregs[EEREC_T].inuse = 1; - xmmregs[EEREC_T].reg = _Rd_; - xmmregs[EEREC_T].mode = readd; - regd = EEREC_T; - } - else if( !(xmminfo&XMMINFO_READD) && (xmminfo & XMMINFO_READS) && (_Rs_ == 0 || (g_pCurInstInfo->regs[_Rs_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rs_)) ) { - _freeXMMreg(EEREC_S); - _deleteMMXreg(MMX_GPR+_Rd_, 2); - xmmregs[EEREC_S].inuse = 1; - xmmregs[EEREC_S].reg = _Rd_; - xmmregs[EEREC_S].mode = readd; - regd = EEREC_S; - } - else regd = _allocGPRtoXMMreg(-1, _Rd_, readd); - } - - info |= PROCESS_EE_SET_D(regd); - } - if( xmminfo & (XMMINFO_READLO|XMMINFO_WRITELO) ) { - info |= PROCESS_EE_SET_LO(_allocGPRtoXMMreg(-1, XMMGPR_LO, ((xmminfo&XMMINFO_READLO)?MODE_READ:0)|((xmminfo&XMMINFO_WRITELO)?MODE_WRITE:0))); - info |= PROCESS_EE_LO; - } - if( xmminfo & (XMMINFO_READHI|XMMINFO_WRITEHI) ) { - info |= PROCESS_EE_SET_HI(_allocGPRtoXMMreg(-1, XMMGPR_HI, ((xmminfo&XMMINFO_READHI)?MODE_READ:0)|((xmminfo&XMMINFO_WRITEHI)?MODE_WRITE:0))); - info |= PROCESS_EE_HI; - } - return info; -} - -// EE COP1(FPU) XMM allocation code -#define _Ft_ _Rt_ -#define _Fs_ _Rd_ -#define _Fd_ _Sa_ - -#define PROCESS_EE_SETMODES_XMM(mmreg) ((xmmregs[mmreg].mode&MODE_WRITE)?PROCESS_EE_MODEWRITES:0) -#define PROCESS_EE_SETMODET_XMM(mmreg) ((xmmregs[mmreg].mode&MODE_WRITE)?PROCESS_EE_MODEWRITET:0) - -// rd = rs op rt -void eeFPURecompileCode(R5900FNPTR_INFO xmmcode, R5900FNPTR_INFO fpucode, int xmminfo) -{ - int mmregs=-1, mmregt=-1, mmregd=-1, mmregacc=-1; - - if( EE_FPU_REGCACHING && cpucaps.hasStreamingSIMDExtensions ) { - int info = PROCESS_EE_XMM; - - if( xmminfo & XMMINFO_READS ) _addNeededFPtoXMMreg(_Fs_); - if( xmminfo & XMMINFO_READT ) _addNeededFPtoXMMreg(_Ft_); - if( xmminfo & (XMMINFO_WRITED|XMMINFO_READD) ) _addNeededFPtoXMMreg(_Fd_); - if( xmminfo & (XMMINFO_WRITEACC|XMMINFO_READACC) ) _addNeededFPACCtoXMMreg(); - - if( xmminfo & XMMINFO_READT ) { - if( g_pCurInstInfo->fpuregs[_Ft_] & EEINST_LASTUSE ) mmregt = _checkXMMreg(XMMTYPE_FPREG, _Ft_, MODE_READ); - else mmregt = _allocFPtoXMMreg(-1, _Ft_, MODE_READ); - } - - if( xmminfo & XMMINFO_READS ) { - if( (!(xmminfo&XMMINFO_READT)||mmregt>=0) && (g_pCurInstInfo->fpuregs[_Fs_] & EEINST_LASTUSE) ) - mmregs = _checkXMMreg(XMMTYPE_FPREG, _Fs_, MODE_READ); - else mmregs = _allocFPtoXMMreg(-1, _Fs_, MODE_READ); - } - - if( mmregs >= 0 ) info |= PROCESS_EE_SETMODES_XMM(mmregs); - if( mmregt >= 0 ) info |= PROCESS_EE_SETMODET_XMM(mmregt); - - if( xmminfo & XMMINFO_READD ) { - assert( xmminfo & XMMINFO_WRITED ); - mmregd = _allocFPtoXMMreg(-1, _Fd_, MODE_READ); - } - - if( xmminfo & XMMINFO_READACC ) { - if( !(xmminfo&XMMINFO_WRITEACC) && (g_pCurInstInfo->fpuregs[_Ft_] & EEINST_LASTUSE) ) - mmregacc = _checkXMMreg(XMMTYPE_FPACC, 0, MODE_READ); - else mmregacc = _allocFPACCtoXMMreg(-1, MODE_READ); - } - - if( xmminfo & XMMINFO_WRITEACC ) { - - // check for last used, if so don't alloc a new XMM reg - int readacc = MODE_WRITE|((xmminfo&XMMINFO_READACC)?MODE_READ:0); - - mmregacc = _checkXMMreg(XMMTYPE_FPACC, 0, readacc); - - if( mmregacc < 0 ) { - if( (xmminfo&XMMINFO_READT) && mmregt >= 0 && (FPUINST_LASTUSE(_Ft_) || !FPUINST_ISLIVE(_Ft_)) ) { - if( FPUINST_ISLIVE(_Ft_) ) { - _freeXMMreg(mmregt); - info &= ~PROCESS_EE_MODEWRITET; - } - _deleteMMXreg(MMX_FPU+XMMFPU_ACC, 2); - xmmregs[mmregt].inuse = 1; - xmmregs[mmregt].reg = 0; - xmmregs[mmregt].mode = readacc; - xmmregs[mmregt].type = XMMTYPE_FPACC; - mmregacc = mmregt; - } - else if( (xmminfo&XMMINFO_READS) && mmregs >= 0 && (FPUINST_LASTUSE(_Fs_) || !FPUINST_ISLIVE(_Fs_)) ) { - if( FPUINST_ISLIVE(_Fs_) ) { - _freeXMMreg(mmregs); - info &= ~PROCESS_EE_MODEWRITES; - } - _deleteMMXreg(MMX_FPU+XMMFPU_ACC, 2); - xmmregs[mmregs].inuse = 1; - xmmregs[mmregs].reg = 0; - xmmregs[mmregs].mode = readacc; - xmmregs[mmregs].type = XMMTYPE_FPACC; - mmregacc = mmregs; - } - else mmregacc = _allocFPACCtoXMMreg(-1, readacc); - } - - xmmregs[mmregacc].mode |= MODE_WRITE; - } - else if( xmminfo & XMMINFO_WRITED ) { - // check for last used, if so don't alloc a new XMM reg - int readd = MODE_WRITE|((xmminfo&XMMINFO_READD)?MODE_READ:0); - if( xmminfo&XMMINFO_READD ) mmregd = _allocFPtoXMMreg(-1, _Fd_, readd); - else mmregd = _checkXMMreg(XMMTYPE_FPREG, _Fd_, readd); - - if( mmregd < 0 ) { - if( (xmminfo&XMMINFO_READT) && mmregt >= 0 && (FPUINST_LASTUSE(_Ft_) || !FPUINST_ISLIVE(_Ft_)) ) { - if( FPUINST_ISLIVE(_Ft_) ) { - _freeXMMreg(mmregt); - info &= ~PROCESS_EE_MODEWRITET; - } - _deleteMMXreg(MMX_FPU+_Fd_, 2); - xmmregs[mmregt].inuse = 1; - xmmregs[mmregt].reg = _Fd_; - xmmregs[mmregt].mode = readd; - mmregd = mmregt; - } - else if( (xmminfo&XMMINFO_READS) && mmregs >= 0 && (FPUINST_LASTUSE(_Fs_) || !FPUINST_ISLIVE(_Fs_)) ) { - if( FPUINST_ISLIVE(_Fs_) ) { - _freeXMMreg(mmregs); - info &= ~PROCESS_EE_MODEWRITES; - } - _deleteMMXreg(MMX_FPU+_Fd_, 2); - xmmregs[mmregs].inuse = 1; - xmmregs[mmregs].reg = _Fd_; - xmmregs[mmregs].mode = readd; - mmregd = mmregs; - } - else if( (xmminfo&XMMINFO_READACC) && mmregacc >= 0 && (FPUINST_LASTUSE(XMMFPU_ACC) || !FPUINST_ISLIVE(XMMFPU_ACC)) ) { - if( FPUINST_ISLIVE(XMMFPU_ACC) ) - _freeXMMreg(mmregacc); - _deleteMMXreg(MMX_FPU+_Fd_, 2); - xmmregs[mmregacc].inuse = 1; - xmmregs[mmregacc].reg = _Fd_; - xmmregs[mmregacc].mode = readd; - xmmregs[mmregacc].type = XMMTYPE_FPREG; - mmregd = mmregacc; - } - else mmregd = _allocFPtoXMMreg(-1, _Fd_, readd); - } - } - - assert( mmregs >= 0 || mmregt >= 0 || mmregd >= 0 || mmregacc >= 0 ); - - if( xmminfo & XMMINFO_WRITED ) { - assert( mmregd >= 0 ); - info |= PROCESS_EE_SET_D(mmregd); - } - if( xmminfo & (XMMINFO_WRITEACC|XMMINFO_READACC) ) { - if( mmregacc >= 0 ) info |= PROCESS_EE_SET_ACC(mmregacc)|PROCESS_EE_ACC; - else assert( !(xmminfo&XMMINFO_WRITEACC)); - } - - if( xmminfo & XMMINFO_READS ) { - if( mmregs >= 0 ) info |= PROCESS_EE_SET_S(mmregs)|PROCESS_EE_S; - } - if( xmminfo & XMMINFO_READT ) { - if( mmregt >= 0 ) info |= PROCESS_EE_SET_T(mmregt)|PROCESS_EE_T; - } - - // at least one must be in xmm - if( (xmminfo & (XMMINFO_READS|XMMINFO_READT)) == (XMMINFO_READS|XMMINFO_READT) ) { - assert( mmregs >= 0 || mmregt >= 0 ); - } - - xmmcode(info); - _clearNeededXMMregs(); - return; - } - - if( xmminfo & XMMINFO_READS ) _deleteFPtoXMMreg(_Fs_, 0); - if( xmminfo & XMMINFO_READT ) _deleteFPtoXMMreg(_Ft_, 0); - if( xmminfo & (XMMINFO_READD|XMMINFO_WRITED) ) _deleteFPtoXMMreg(_Fd_, 0); - if( xmminfo & (XMMINFO_READACC|XMMINFO_WRITEACC) ) _deleteFPtoXMMreg(XMMFPU_ACC, 0); - fpucode(0); -} - -#undef _Ft_ -#undef _Fs_ -#undef _Fd_ - -//////////////////////////////////////////////////// -extern u8 g_MACFlagTransform[256]; // for vus - -u32 g_sseMXCSR = 0x9fc0; // disable all exception, round to 0, flush to 0 -u32 g_sseVUMXCSR = 0xff80; - -void SetCPUState(u32 sseMXCSR, u32 sseVUMXCSR) -{ - // SSE STATE // - // WARNING: do not touch unless you know what you are doing - - if( cpucaps.hasStreamingSIMDExtensions ) { - g_sseMXCSR = sseMXCSR; - g_sseVUMXCSR = sseVUMXCSR; - // do NOT set Denormals-Are-Zero flag (charlie and chocfac messes up) - // Update 11/05/08 - Doesnt seem to effect it anymore, for the speed boost, its on :p - //g_sseMXCSR = 0x9f80; // changing the rounding mode to 0x2000 (near) kills grandia III! - // changing the rounding mode to 0x0000 or 0x4000 totally kills gitaroo - // so... grandia III wins (you can change individual games with the 'roundmode' patch command) - -#ifdef _MSC_VER - __asm ldmxcsr g_sseMXCSR; // set the new sse control -#else - __asm__("ldmxcsr %0" : : "m"(g_sseMXCSR) ); -#endif - //g_sseVUMXCSR = g_sseMXCSR|0x6000; - } -} - -#define REC_CACHEMEM 0x01000000 - -int recInit( void ) -{ - int i; - const u8 macarr[16] = {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15 }; - - recLUT = (uptr*) _aligned_malloc( 0x010000 * sizeof(uptr), 16 ); - memset( recLUT, 0, 0x010000 * sizeof(uptr) ); - - // can't have upper 4 bits nonzero! - recMem = (char*)SysMmap(0x0d000000, REC_CACHEMEM); - - // 32 alignment necessary - recRAM = (BASEBLOCK*) _aligned_malloc( sizeof(BASEBLOCK)/4*0x02000000 , 4*sizeof(BASEBLOCK)); - recROM = (BASEBLOCK*) _aligned_malloc( sizeof(BASEBLOCK)/4*0x00400000 , 4*sizeof(BASEBLOCK)); - recROM1= (BASEBLOCK*) _aligned_malloc( sizeof(BASEBLOCK)/4*0x00040000 , 4*sizeof(BASEBLOCK)); - recBlocks = (BASEBLOCKEX*) _aligned_malloc( sizeof(BASEBLOCKEX)*EE_NUMBLOCKS, 16); - recStack = (char*)malloc( RECSTACK_SIZE ); - - s_nInstCacheSize = 128; - s_pInstCache = (EEINST*)malloc( sizeof(EEINST) * s_nInstCacheSize ); - - if ( recBlocks == NULL || recRAM == NULL || recROM == NULL || recROM1 == NULL || recMem == NULL || recLUT == NULL ) { - SysMessage( _( "Error allocating memory" ) ); - return -1; - } - - for ( i = 0x0000; i < 0x0200; i++ ) - { - recLUT[ i + 0x0000 ] = (uptr)&recRAM[ i << 14 ]; - recLUT[ i + 0x2000 ] = (uptr)&recRAM[ i << 14 ]; - recLUT[ i + 0x3000 ] = (uptr)&recRAM[ i << 14 ]; - } - - for ( i = 0x0000; i < 0x0040; i++ ) - { - recLUT[ i + 0x1fc0 ] = (uptr)&recROM[ i << 14 ]; - recLUT[ i + 0x9fc0 ] = (uptr)&recROM[ i << 14 ]; - recLUT[ i + 0xbfc0 ] = (uptr)&recROM[ i << 14 ]; - } - - for ( i = 0x0000; i < 0x0004; i++ ) - { - recLUT[ i + 0x1e00 ] = (uptr)&recROM1[ i << 14 ]; - recLUT[ i + 0x9e00 ] = (uptr)&recROM1[ i << 14 ]; - recLUT[ i + 0xbe00 ] = (uptr)&recROM1[ i << 14 ]; - } - - memcpy( recLUT + 0x8000, recLUT, 0x2000 * sizeof(uptr) ); - memcpy( recLUT + 0xa000, recLUT, 0x2000 * sizeof(uptr) ); - - memset(recMem, 0xcd, REC_CACHEMEM); - memset(recStack, 0, RECSTACK_SIZE); - - // SSE3 detection, manually create the code - x86SetPtr(recMem); - SSE3_MOVSLDUP_XMM_to_XMM(XMM0, XMM0); - RET(); - - cpudetectSSE3(recMem); - - SysPrintf( "x86Init: \n" ); - SysPrintf( "\tCPU vender name = %s\n", cpuinfo.x86ID ); - SysPrintf( "\tFamilyID = %x\n", cpuinfo.x86StepID ); - SysPrintf( "\tx86Family = %s\n", cpuinfo.x86Fam ); - SysPrintf( "\tCPU speed = %d.%03d Ghz\n", cpuinfo.cpuspeed / 1000, cpuinfo.cpuspeed%1000); - SysPrintf( "\tx86PType = %s\n", cpuinfo.x86Type ); - SysPrintf( "\tx86Flags = %8.8x\n", cpuinfo.x86Flags ); - SysPrintf( "\tx86EFlags = %8.8x\n", cpuinfo.x86EFlags ); - SysPrintf( "Features: \n" ); - SysPrintf( "\t%sDetected MMX\n", cpucaps.hasMultimediaExtensions ? "" : "Not " ); - SysPrintf( "\t%sDetected SSE\n", cpucaps.hasStreamingSIMDExtensions ? "" : "Not " ); - SysPrintf( "\t%sDetected SSE2\n", cpucaps.hasStreamingSIMD2Extensions ? "" : "Not " ); - SysPrintf( "\t%sDetected SSE3\n", cpucaps.hasStreamingSIMD3Extensions ? "" : "Not " ); - - if ( cpuinfo.x86ID[0] == 'A' ) //AMD cpu - { - SysPrintf( " Extented AMD Features: \n" ); - SysPrintf( "\t%sDetected MMX2\n", cpucaps.hasMultimediaExtensionsExt ? "" : "Not " ); - SysPrintf( "\t%sDetected 3DNOW\n", cpucaps.has3DNOWInstructionExtensions ? "" : "Not " ); - SysPrintf( "\t%sDetected 3DNOW2\n", cpucaps.has3DNOWInstructionExtensionsExt ? "" : "Not " ); - } - if ( !( cpucaps.hasMultimediaExtensions ) ) - { - SysMessage( _( "Processor doesn't supports MMX, can't run recompiler without that" ) ); - return -1; - } - - x86FpuState = FPU_STATE; - - SuperVUInit(-1); - - for(i = 0; i < 256; ++i) { - g_MACFlagTransform[i] = macarr[i>>4]|(macarr[i&15]<<4); - } - - SetCPUState(g_sseMXCSR, g_sseVUMXCSR); - - return 0; -} - -//////////////////////////////////////////////////// -void recReset( void ) { -#ifdef PCSX2_DEVBUILD - SysPrintf("EE Recompiler data reset\n"); -#endif - - s_nNextBlock = 0; - maxrecmem = 0; - memset( recRAM, 0, sizeof(BASEBLOCK)/4*0x02000000 ); - memset( recROM, 0, sizeof(BASEBLOCK)/4*0x00400000 ); - memset( recROM1, 0, sizeof(BASEBLOCK)/4*0x00040000 ); - memset( recBlocks, 0, sizeof(BASEBLOCKEX)*EE_NUMBLOCKS ); - if( s_pInstCache ) memset( s_pInstCache, 0, sizeof(EEINST)*s_nInstCacheSize ); - ResetBaseBlockEx(0); - -#ifdef _MSC_VER - __asm emms; -#else - __asm__("emms"); -#endif - -#ifdef _DEBUG - // don't clear since save states won't work - //memset(recMem, 0xcd, REC_CACHEMEM); -#endif - - recPtr = recMem; - recStackPtr = recStack; - x86FpuState = FPU_STATE; - iCWstate = 0; - - branch = 0; -} - -void recShutdown( void ) -{ - if ( recMem == NULL ) { - return; - } - - _aligned_free( recLUT ); - SysMunmap((uptr)recMem, REC_CACHEMEM); recMem = NULL; - _aligned_free( recRAM ); recRAM = NULL; - _aligned_free( recROM ); recROM = NULL; - _aligned_free( recROM1 ); recROM1 = NULL; - _aligned_free( recBlocks ); recBlocks = NULL; - free( s_pInstCache ); s_pInstCache = NULL; s_nInstCacheSize = 0; - - SuperVUDestroy(-1); - - x86Shutdown( ); -} - -void recEnableVU0micro(int enable) { -} - -void recEnableVU1micro(int enable) { -} - -#pragma warning(disable:4731) // frame pointer register 'ebp' modified by inline assembly code -static u32 s_uSaveESP = 0, s_uSaveEBP; - -static void execute( void ) -{ -#ifdef _DEBUG - u8* fnptr; - u32 oldesi; -#else - R5900FNPTR pfn; -#endif - BASEBLOCK* pblock = PC_GETBLOCK(cpuRegs.pc); - - if ( !pblock->pFnptr || pblock->startpc != cpuRegs.pc ) { - recRecompile(cpuRegs.pc); - } - - assert( pblock->pFnptr != 0 ); - g_EEFreezeRegs = 1; - - // skip the POPs -#ifdef _DEBUG - fnptr = (u8*)pblock->pFnptr; - -#ifdef _MSC_VER - __asm { - // save data - mov oldesi, esi - mov s_uSaveESP, esp - sub s_uSaveESP, 8 - mov s_uSaveEBP, ebp - push ebp - - call fnptr // jump into function - // restore data - pop ebp - mov esi, oldesi - } -#else - - __asm__("movl %%esi, %0\n" - "movl %%esp, %1\n" - "sub $8, %1\n" - "push %%ebp\n" - "call *%2\n" - "pop %%ebp\n" - "movl %0, %%esi\n" : "=m"(oldesi), "=m"(s_uSaveESP) : "c"(fnptr) ); -#endif // _MSC_VER - -#else - -#ifdef _MSC_VER - pfn = ((R5900FNPTR)pblock->pFnptr); - // use call instead of pfn() - __asm call pfn; -#else - ((R5900FNPTR)pblock->pFnptr)(); -#endif - -#endif - - g_EEFreezeRegs = 0; -} - -void recStep( void ) { -} - -void recExecute( void ) { - //SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_HIGHEST); - //SetPriorityClass(GetCurrentProcess(), HIGH_PRIORITY_CLASS);//ABOVE_NORMAL_PRIORITY_CLASS); - //SetThreadAffinityMask(GetCurrentThread(), 0); - if( Config.Options & PCSX2_EEREC ) Config.Options |= PCSX2_COP2REC; - - for (;;) - execute(); -} - -void recExecuteBlock( void ) { - execute(); -} - -//////////////////////////////////////////////////// -extern u32 g_nextBranchCycle; - -u32 g_lastpc = 0; -u32 g_EEDispatchTemp; -u32 s_pCurBlock_ltime; - -#ifdef _MSC_VER - -// jumped to when invalid pc address -__declspec(naked,noreturn) void Dispatcher() -{ - // EDX contains the jump addr to modify - __asm push edx - - // calc PC_GETBLOCK - s_pDispatchBlock = PC_GETBLOCK(cpuRegs.pc); - - __asm { - mov eax, s_pDispatchBlock - - // check if startpc == cpuRegs.pc - mov ecx, cpuRegs.pc - //and ecx, 0x5fffffff // remove higher bits - cmp ecx, dword ptr [eax+BLOCKTYPE_STARTPC] - je CheckPtr - - // recompile - push cpuRegs.pc // pc - call recRecompile - add esp, 4 // pop old param - mov eax, s_pDispatchBlock -CheckPtr: - mov eax, dword ptr [eax] - } - -#ifdef _DEBUG - __asm mov g_EEDispatchTemp, eax - assert( g_EEDispatchTemp ); -#endif - -// __asm { -// test eax, 0x40000000 // BLOCKTYPE_NEEDCLEAR -// jz Done -// // move new pc -// and eax, 0x0fffffff -// mov ecx, cpuRegs.pc -// mov dword ptr [eax+1], ecx -// } - __asm { - and eax, 0x0fffffff - mov edx, eax - pop ecx // x86Ptr to mod - sub edx, ecx - sub edx, 4 - mov dword ptr [ecx], edx - - jmp eax - } -} - -__declspec(naked,noreturn) void DispatcherClear() -{ - // EDX contains the current pc - __asm mov cpuRegs.pc, edx - __asm push edx - - // calc PC_GETBLOCK - s_pDispatchBlock = PC_GETBLOCK(cpuRegs.pc); - - if( s_pDispatchBlock->startpc == cpuRegs.pc ) { - assert( s_pDispatchBlock->pFnptr != 0 ); - - // already modded the code, jump to the new place - __asm { - pop edx - add esp, 4 // ignore stack - mov eax, s_pDispatchBlock - mov eax, dword ptr [eax] - and eax, 0x0fffffff - jmp eax - } - } - - __asm { - call recRecompile - add esp, 4 // pop old param - mov eax, s_pDispatchBlock - mov eax, dword ptr [eax] - - pop ecx // old fnptr - - and eax, 0x0fffffff - mov byte ptr [ecx], 0xe9 // jmp32 - mov edx, eax - sub edx, ecx - sub edx, 5 - mov dword ptr [ecx+1], edx - - jmp eax - } -} - -// called when jumping to variable pc address -__declspec(naked,noreturn) void DispatcherReg() -{ - __asm { - //s_pDispatchBlock = PC_GETBLOCK(cpuRegs.pc); - mov edx, cpuRegs.pc - mov ecx, edx - } - - __asm { - shr edx, 14 - and edx, 0xfffffffc - add edx, recLUT - mov edx, dword ptr [edx] - - mov eax, ecx - and eax, 0xfffc - // edx += 2*eax - shl eax, 1 - add edx, eax - - // check if startpc == cpuRegs.pc - mov eax, ecx - //and eax, 0x5fffffff // remove higher bits - cmp eax, dword ptr [edx+BLOCKTYPE_STARTPC] - jne recomp - - mov eax, dword ptr [edx] - } - -#ifdef _DEBUG - __asm mov g_EEDispatchTemp, eax - assert( g_EEDispatchTemp ); -#endif - - __asm { - and eax, 0x0fffffff - jmp eax // fnptr - -recomp: - sub esp, 8 - mov dword ptr [esp+4], edx - mov dword ptr [esp], ecx - call recRecompile - mov edx, dword ptr [esp+4] - add esp, 8 - - mov eax, dword ptr [edx] - and eax, 0x0fffffff - jmp eax // fnptr - } -} - -#ifdef PCSX2_DEVBUILD -__declspec(naked) void _StartPerfCounter() -{ - __asm { - push eax - push ebx - push ecx - - rdtsc - mov dword ptr [offset lbase], eax - mov dword ptr [offset lbase + 4], edx - - pop ecx - pop ebx - pop eax - ret - } -} - -__declspec(naked) void _StopPerfCounter() -{ - __asm { - push eax - push ebx - push ecx - - rdtsc - - sub eax, dword ptr [offset lbase] - sbb edx, dword ptr [offset lbase + 4] - mov ecx, s_pCurBlock_ltime - add eax, dword ptr [ecx] - adc edx, dword ptr [ecx + 4] - mov dword ptr [ecx], eax - mov dword ptr [ecx + 4], edx - pop ecx - pop ebx - pop eax - ret - } -} - -#endif // PCSX2_DEVBUILD - -#else // _MSC_VER - -extern void Dispatcher(); -extern void DispatcherClear(); -extern void DispatcherReg(); -extern void _StartPerfCounter(); -extern void _StopPerfCounter(); - -#endif - -#ifdef PCSX2_DEVBUILD -void StartPerfCounter() -{ -#ifdef PCSX2_DEVBUILD - if( s_startcount ) { - CALLFunc((u32)_StartPerfCounter); - } -#endif -} - -void StopPerfCounter() -{ -#ifdef PCSX2_DEVBUILD - if( s_startcount ) { - MOV32ItoM((u32)&s_pCurBlock_ltime, (u32)&s_pCurBlockEx->ltime); - CALLFunc((u32)_StopPerfCounter); - } -#endif -} -#endif - -//////////////////////////////////////////////////// -void recClear64(BASEBLOCK* p) -{ - int left = 4 - ((u32)p % 16)/sizeof(BASEBLOCK); - recClearMem(p); - - if( left > 1 && *(u32*)(p+1) ) recClearMem(p+1); -} - -void recClear128(BASEBLOCK* p) -{ - int left = 4 - ((u32)p % 32)/sizeof(BASEBLOCK); - recClearMem(p); - - if( left > 1 && *(u32*)(p+1) ) recClearMem(p+1); - if( left > 2 && *(u32*)(p+2) ) recClearMem(p+2); - if( left > 3 && *(u32*)(p+3) ) recClearMem(p+3); -} - -void recClear( u32 Addr, u32 Size ) -{ - u32 i; - for(i = 0; i < Size; ++i, Addr+=4) { - REC_CLEARM(Addr); - } -} - -#define EE_MIN_BLOCK_BYTES 15 - -void recClearMem(BASEBLOCK* p) -{ - BASEBLOCKEX* pexblock; - BASEBLOCK* pstart; - int lastdelay; - - // necessary since recompiler doesn't call femms/emms -#ifdef _MSC_VER - if (cpucaps.has3DNOWInstructionExtensions) __asm femms; - else __asm emms; -#else - if( cpucaps.has3DNOWInstructionExtensions )__asm__("femms"); - else __asm__("emms"); -#endif - - assert( p != NULL ); - - if( p->uType & BLOCKTYPE_DELAYSLOT ) { - recClearMem(p-1); - if( p->pFnptr == 0 ) - return; - } - - assert( p->pFnptr != 0 ); - assert( p->startpc ); - - x86Ptr = (s8*)p->pFnptr; - - // there is a small problem: mem can be ored with 0xa<<28 or 0x8<<28, and don't know which - MOV32ItoR(EDX, p->startpc); - PUSH32I((u32)x86Ptr); // will be replaced by JMP32 - JMP32((u32)DispatcherClear - ( (u32)x86Ptr + 5 )); - assert( x86Ptr == (s8*)p->pFnptr + EE_MIN_BLOCK_BYTES ); - - pstart = PC_GETBLOCK(p->startpc); - pexblock = PC_GETBLOCKEX(pstart); - assert( pexblock->startpc == pstart->startpc ); - - if( pexblock->startpc != pstart->startpc ) { - // some bug with ffx after beating a big snake in sewers - RemoveBaseBlockEx(pexblock, 0); - pexblock->size = 0; - pexblock->startpc = 0; - return; - } - -// if( pexblock->pOldFnptr ) { -// // have to mod oldfnptr too -// x86Ptr = pexblock->pOldFnptr; -// -// MOV32ItoR(EDX, p->startpc); -// JMP32((u32)DispatcherClear - ( (u32)x86Ptr + 5 )); -// } -// else -// pexblock->pOldFnptr = (u8*)p->pFnptr; - - // don't delete if last is delay - lastdelay = pexblock->size; - if( pstart[pexblock->size-1].uType & BLOCKTYPE_DELAYSLOT ) { - assert( pstart[pexblock->size-1].pFnptr != pstart->pFnptr ); - if( pstart[pexblock->size-1].pFnptr != 0 ) { - pstart[pexblock->size-1].uType = 0; - --lastdelay; - } - } - - memset(pstart, 0, lastdelay*sizeof(BASEBLOCK)); - - RemoveBaseBlockEx(pexblock, 0); - pexblock->size = 0; - pexblock->startpc = 0; -} - -// check for end of bios -void CheckForBIOSEnd() -{ - MOV32MtoR(EAX, (int)&cpuRegs.pc); - - CMP32ItoR(EAX, 0x00200008); - j8Ptr[0] = JE8(0); - - CMP32ItoR(EAX, 0x00100008); - j8Ptr[1] = JE8(0); - - // return - j8Ptr[2] = JMP8(0); - - x86SetJ8( j8Ptr[0] ); - x86SetJ8( j8Ptr[1] ); - - // bios end - RET2(); - - x86SetJ8( j8Ptr[2] ); -} - -static int *s_pCode; - -void SetBranchReg( u32 reg ) -{ - branch = 1; - - if( reg != 0xffffffff ) { -// if( GPR_IS_CONST1(reg) ) -// MOV32ItoM( (u32)&cpuRegs.pc, g_cpuConstRegs[reg].UL[0] ); -// else { -// int mmreg; -// -// if( (mmreg = _checkXMMreg(XMMTYPE_GPRREG, reg, MODE_READ)) >= 0 ) { -// SSE_MOVSS_XMM_to_M32((u32)&cpuRegs.pc, mmreg); -// } -// else if( (mmreg = _checkMMXreg(MMX_GPR+reg, MODE_READ)) >= 0 ) { -// MOVDMMXtoM((u32)&cpuRegs.pc, mmreg); -// SetMMXstate(); -// } -// else { -// MOV32MtoR(EAX, (int)&cpuRegs.GPR.r[ reg ].UL[ 0 ] ); -// MOV32RtoM((u32)&cpuRegs.pc, EAX); -// } -// } - _allocX86reg(ESI, X86TYPE_PCWRITEBACK, 0, MODE_WRITE); - _eeMoveGPRtoR(ESI, reg); - - recompileNextInstruction(1); - - if( x86regs[ESI].inuse ) { - assert( x86regs[ESI].type == X86TYPE_PCWRITEBACK ); - MOV32RtoM((int)&cpuRegs.pc, ESI); - x86regs[ESI].inuse = 0; - } - else { - MOV32MtoR(EAX, (u32)&g_recWriteback); - MOV32RtoM((int)&cpuRegs.pc, EAX); - } - } - -// CMP32ItoM((u32)&cpuRegs.pc, 0); -// j8Ptr[5] = JNE8(0); -// CALLFunc((u32)tempfn); -// x86SetJ8( j8Ptr[5] ); - - iFlushCall(FLUSH_EVERYTHING); - - iBranchTest(0xffffffff, 1); - if( bExecBIOS ) CheckForBIOSEnd(); - - JMP32((u32)DispatcherReg - ( (u32)x86Ptr + 5 )); -} - -void SetBranchImm( u32 imm ) -{ - u32* ptr; - branch = 1; - - assert( imm ); - - // end the current block - MOV32ItoM( (u32)&cpuRegs.pc, imm ); - iFlushCall(FLUSH_EVERYTHING); - - iBranchTest(imm, imm <= pc); - if( bExecBIOS ) CheckForBIOSEnd(); - - MOV32ItoR(EDX, 0); - ptr = (u32*)(x86Ptr-4); - *ptr = (u32)JMP32((u32)Dispatcher - ( (u32)x86Ptr + 5 )); -} - -void SaveBranchState() -{ - s_savex86FpuState = x86FpuState; - s_saveiCWstate = iCWstate; - s_savenBlockCycles = s_nBlockCycles; - s_saveConstGPRreg = 0xffffffff; // indicate searching - s_saveHasConstReg = g_cpuHasConstReg; - s_saveFlushedConstReg = g_cpuFlushedConstReg; - s_psaveInstInfo = g_pCurInstInfo; - s_saveRegHasLive1 = g_cpuRegHasLive1; - s_saveRegHasSignExt = g_cpuRegHasSignExt; - - // save all mmx regs - memcpy(s_saveMMXregs, mmxregs, sizeof(mmxregs)); - memcpy(s_saveXMMregs, xmmregs, sizeof(xmmregs)); -} - -void LoadBranchState() -{ - x86FpuState = s_savex86FpuState; - iCWstate = s_saveiCWstate; - s_nBlockCycles = s_savenBlockCycles; - - if( s_saveConstGPRreg != 0xffffffff ) { - assert( s_saveConstGPRreg > 0 ); - - // make sure right GPR was saved - assert( g_cpuHasConstReg == s_saveHasConstReg || (g_cpuHasConstReg ^ s_saveHasConstReg) == (1<visited, 1 ); - } -#endif - -#ifdef _DEBUG - //CALLFunc((u32)testfpu); -#endif - - if( !USE_FAST_BRANCHES || cpuBranch ) { - MOV32MtoR(ECX, (int)&cpuRegs.cycle); - ADD32ItoR(ECX, s_nBlockCycles*EECYCLE_MULT); // NOTE: mulitply cycles here, 6/5 ratio stops pal ffx from randomly crashing, but crashes jakI - MOV32RtoM((int)&cpuRegs.cycle, ECX); // update cycles - } - else { - ADD32ItoM((int)&cpuRegs.cycle, s_nBlockCycles*EECYCLE_MULT); - return; - } - - SUB32MtoR(ECX, (int)&g_nextBranchCycle); - - // check if should branch - j8Ptr[0] = JS8( 0 ); - - // has to be in the middle of Save/LoadBranchState - CALLFunc( (int)cpuBranchTest ); - - if( newpc != 0xffffffff ) { - CMP32ItoM((int)&cpuRegs.pc, newpc); - JNE32((u32)DispatcherReg - ( (u32)x86Ptr + 6 )); - } - - x86SetJ8( j8Ptr[0] ); -} - - -//////////////////////////////////////////////////// -#ifndef CP2_RECOMPILE - -REC_SYS(COP2); - -#else - -void recCOP2( void ) -{ -#ifdef CPU_LOG - CPU_LOG( "Recompiling COP2:%s\n", disR5900Fasm( cpuRegs.code, cpuRegs.pc ) ); -#endif - - if ( !cpucaps.hasStreamingSIMDExtensions ) { - MOV32ItoM( (u32)&cpuRegs.code, cpuRegs.code ); - MOV32ItoM( (u32)&cpuRegs.pc, pc ); - iFlushCall(FLUSH_EVERYTHING); - g_cpuHasConstReg = 1; // reset all since COP2 can change regs - CALLFunc( (u32)COP2 ); - - CMP32ItoM((int)&cpuRegs.pc, pc); - j8Ptr[0] = JE8(0); - ADD32ItoM((u32)&cpuRegs.cycle, s_nBlockCycles); - JMP32((u32)DispatcherReg - ( (u32)x86Ptr + 5 )); - x86SetJ8(j8Ptr[0]); - } - else - { - recCOP22( ); - } -} - -#endif - -//////////////////////////////////////////////////// -void recSYSCALL( void ) { - MOV32ItoM( (u32)&cpuRegs.code, cpuRegs.code ); - MOV32ItoM( (u32)&cpuRegs.pc, pc ); - iFlushCall(FLUSH_NODESTROY); - CALLFunc( (u32)SYSCALL ); - - CMP32ItoM((int)&cpuRegs.pc, pc); - j8Ptr[0] = JE8(0); - ADD32ItoM((u32)&cpuRegs.cycle, s_nBlockCycles); - JMP32((u32)DispatcherReg - ( (u32)x86Ptr + 5 )); - x86SetJ8(j8Ptr[0]); - //branch = 2; -} - -//////////////////////////////////////////////////// -void recBREAK( void ) { - MOV32ItoM( (u32)&cpuRegs.code, cpuRegs.code ); - MOV32ItoM( (u32)&cpuRegs.pc, pc ); - iFlushCall(FLUSH_EVERYTHING); - CALLFunc( (u32)BREAK ); - - CMP32ItoM((int)&cpuRegs.pc, pc); - j8Ptr[0] = JE8(0); - ADD32ItoM((u32)&cpuRegs.cycle, s_nBlockCycles); - RET(); - x86SetJ8(j8Ptr[0]); - //branch = 2; -} - -//////////////////////////////////////////////////// -//static void recCACHE( void ) { -// MOV32ItoM( (u32)&cpuRegs.code, cpuRegs.code ); -// MOV32ItoM( (u32)&cpuRegs.pc, pc ); -// iFlushCall(FLUSH_EVERYTHING); -// CALLFunc( (u32)CACHE ); -// //branch = 2; -// -// CMP32ItoM((int)&cpuRegs.pc, pc); -// j8Ptr[0] = JE8(0); -// RET(); -// x86SetJ8(j8Ptr[0]); -//} - - -void recPREF( void ) -{ -} - -void recSYNC( void ) -{ -} - -void recMFSA( void ) -{ - int mmreg; - if (!_Rd_) return; - - mmreg = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, MODE_WRITE); - if( mmreg >= 0 ) { - SSE_MOVLPS_M64_to_XMM(mmreg, (u32)&cpuRegs.sa); - } - else if( (mmreg = _checkMMXreg(MMX_GPR+_Rd_, MODE_WRITE)) >= 0 ) { - MOVDMtoMMX(mmreg, (u32)&cpuRegs.sa); - SetMMXstate(); - } - else { - MOV32MtoR(EAX, (u32)&cpuRegs.sa); - _deleteEEreg(_Rd_, 0); - MOV32RtoM((u32)&cpuRegs.GPR.r[_Rd_].UL[0], EAX); - MOV32ItoM((u32)&cpuRegs.GPR.r[_Rd_].UL[1], 0); - } -} - -void recMTSA( void ) -{ - if( GPR_IS_CONST1(_Rs_) ) { - MOV32ItoM((u32)&cpuRegs.sa, g_cpuConstRegs[_Rs_].UL[0] ); - } - else { - int mmreg; - - if( (mmreg = _checkXMMreg(XMMTYPE_GPRREG, _Rs_, MODE_READ)) >= 0 ) { - SSE_MOVSS_XMM_to_M32((u32)&cpuRegs.sa, mmreg); - } - else if( (mmreg = _checkMMXreg(MMX_GPR+_Rs_, MODE_READ)) >= 0 ) { - MOVDMMXtoM((u32)&cpuRegs.sa, mmreg); - SetMMXstate(); - } - else { - MOV32MtoR(EAX, (u32)&cpuRegs.GPR.r[_Rs_].UL[0]); - MOV32RtoM((u32)&cpuRegs.sa, EAX); - } - } -} - -void recMTSAB( void ) -{ - if( GPR_IS_CONST1(_Rs_) ) { - MOV32ItoM((u32)&cpuRegs.sa, ((g_cpuConstRegs[_Rs_].UL[0] & 0xF) ^ (_Imm_ & 0xF)) << 3); - } - else { - _eeMoveGPRtoR(EAX, _Rs_); - AND32ItoR(EAX, 0xF); - XOR32ItoR(EAX, _Imm_&0xf); - SHL32ItoR(EAX, 3); - MOV32RtoM((u32)&cpuRegs.sa, EAX); - } -} - -void recMTSAH( void ) -{ - if( GPR_IS_CONST1(_Rs_) ) { - MOV32ItoM((u32)&cpuRegs.sa, ((g_cpuConstRegs[_Rs_].UL[0] & 0x7) ^ (_Imm_ & 0x7)) << 4); - } - else { - _eeMoveGPRtoR(EAX, _Rs_); - AND32ItoR(EAX, 0x7); - XOR32ItoR(EAX, _Imm_&0x7); - SHL32ItoR(EAX, 4); - MOV32RtoM((u32)&cpuRegs.sa, EAX); - } -} - -static void checkcodefn() -{ - int pctemp; - -#ifdef _MSC_VER - __asm mov pctemp, eax; -#else - __asm__("movl %%eax, %0" : "=m"(pctemp) ); -#endif - - SysPrintf("code changed! %x\n", pctemp); - assert(0); -} - -void checkpchanged(u32 startpc) -{ - assert(0); -} - -//#ifdef _DEBUG -//#define CHECK_XMMCHANGED() CALLFunc((u32)checkxmmchanged); -//#else -//#define CHECK_XMMCHANGED() -//#endif -// -//static void checkxmmchanged() -//{ -// assert( !g_globalMMXSaved ); -// assert( !g_globalXMMSaved ); -//} - -u32 recompileCodeSafe(u32 temppc) -{ - BASEBLOCK* pblock = PC_GETBLOCK(temppc); - - if( pblock->pFnptr != 0 && pblock->startpc != s_pCurBlock->startpc ) { - if( pc == pblock->startpc ) - return 0; - } - - return 1; -} - -void recompileNextInstruction(int delayslot) -{ - static u8 s_bFlushReg = 1; - int i, count; - - BASEBLOCK* pblock = PC_GETBLOCK(pc); - - // need *ppblock != s_pCurBlock because of branches - if( pblock->pFnptr != 0 && pblock->startpc != s_pCurBlock->startpc ) { - - if( !delayslot && pc == pblock->startpc ) { - // code already in place, so jump to it and exit recomp - assert( PC_GETBLOCKEX(pblock)->startpc == pblock->startpc ); - - iFlushCall(FLUSH_EVERYTHING); - MOV32ItoM((u32)&cpuRegs.pc, pc); - -// if( pexblock->pOldFnptr ) { -// // code already in place, so jump to it and exit recomp -// JMP32((u32)pexblock->pOldFnptr - ((u32)x86Ptr + 5)); -// branch = 3; -// return; -// } - - JMP32((u32)pblock->pFnptr - ((u32)x86Ptr + 5)); - branch = 3; - return; - } - else { - - if( !(delayslot && pblock->startpc == pc) ) { - s8* oldX86 = x86Ptr; - //__Log("clear block %x\n", pblock->startpc); - recClearMem(pblock); - x86Ptr = oldX86; - if( delayslot ) - SysPrintf("delay slot %x\n", pc); - } - } - } - - if( delayslot ) - pblock->uType = BLOCKTYPE_DELAYSLOT; - - s_pCode = (int *)PSM( pc ); - assert(s_pCode); - -#ifdef _DEBUG - MOV32ItoR(EAX, pc); -#endif - - cpuRegs.code = *(int *)s_pCode; - s_nBlockCycles++; - pc += 4; - -//#ifdef _DEBUG -// CMP32ItoM((u32)s_pCode, cpuRegs.code); -// j8Ptr[0] = JE8(0); -// MOV32ItoR(EAX, pc); -// CALLFunc((u32)checkcodefn); -// x86SetJ8( j8Ptr[ 0 ] ); -// -// if( !delayslot ) { -// CMP32ItoM((u32)&cpuRegs.pc, s_pCurBlockEx->startpc); -// j8Ptr[0] = JB8(0); -// CMP32ItoM((u32)&cpuRegs.pc, pc); -// j8Ptr[1] = JA8(0); -// j8Ptr[2] = JMP8(0); -// x86SetJ8( j8Ptr[ 0 ] ); -// x86SetJ8( j8Ptr[ 1 ] ); -// PUSH32I(s_pCurBlockEx->startpc); -// CALLFunc((u32)checkpchanged); -// ADD32ItoR(ESP, 4); -// x86SetJ8( j8Ptr[ 2 ] ); -// } -//#endif - - g_pCurInstInfo++; - - // reorder register priorities -// for(i = 0; i < X86REGS; ++i) { -// if( x86regs[i].inuse ) { -// if( count > 0 ) mmxregs[i].counter = 1000-count; -// else mmxregs[i].counter = 0; -// } -// } - - for(i = 0; i < MMXREGS; ++i) { - if( mmxregs[i].inuse ) { - assert( MMX_ISGPR(mmxregs[i].reg) ); - count = _recIsRegWritten(g_pCurInstInfo, (s_nEndBlock-pc)/4 + 1, XMMTYPE_GPRREG, mmxregs[i].reg-MMX_GPR); - if( count > 0 ) mmxregs[i].counter = 1000-count; - else mmxregs[i].counter = 0; - } - } - - for(i = 0; i < XMMREGS; ++i) { - if( xmmregs[i].inuse ) { - count = _recIsRegWritten(g_pCurInstInfo, (s_nEndBlock-pc)/4 + 1, xmmregs[i].type, xmmregs[i].reg); - if( count > 0 ) xmmregs[i].counter = 1000-count; - else xmmregs[i].counter = 0; - } - } - - // peephole optimizations - if( g_pCurInstInfo->info & EEINSTINFO_COREC ) { - -#ifdef PCSX2_VIRTUAL_MEM - if( g_pCurInstInfo->numpeeps > 1 ) { - switch(cpuRegs.code>>26) { - case 30: recLQ_coX(g_pCurInstInfo->numpeeps); break; - case 31: recSQ_coX(g_pCurInstInfo->numpeeps); break; - case 49: recLWC1_coX(g_pCurInstInfo->numpeeps); break; - case 57: recSWC1_coX(g_pCurInstInfo->numpeeps); break; - case 55: recLD_coX(g_pCurInstInfo->numpeeps); break; - case 63: recSD_coX(g_pCurInstInfo->numpeeps); break; - default: - assert(0); - } - - pc += g_pCurInstInfo->numpeeps*4; - s_nBlockCycles += g_pCurInstInfo->numpeeps; - g_pCurInstInfo += g_pCurInstInfo->numpeeps; - } - else { - recBSC_co[cpuRegs.code>>26](); - pc += 4; - s_nBlockCycles++; - g_pCurInstInfo++; - } -#else - assert(0); -#endif - } - else { - assert( !(g_pCurInstInfo->info & EEINSTINFO_NOREC) ); - - // if this instruction is a jump or a branch, exit right away - if( delayslot ) { - switch(cpuRegs.code>>26) { - case 1: - switch(_Rt_) { - case 0: case 1: case 2: case 3: case 0x10: case 0x11: case 0x12: case 0x13: - SysPrintf("branch %x in delay slot!\n", cpuRegs.code); - _clearNeededX86regs(); - _clearNeededMMXregs(); - _clearNeededXMMregs(); - return; - } - break; - - case 2: case 3: case 4: case 5: case 6: case 7: case 0x14: case 0x15: case 0x16: case 0x17: - SysPrintf("branch %x in delay slot!\n", cpuRegs.code); - _clearNeededX86regs(); - _clearNeededMMXregs(); - _clearNeededXMMregs(); - return; - } - } - recBSC[ cpuRegs.code >> 26 ](); - } - - if( !delayslot ) { - if( s_bFlushReg ) { - //if( !_flushUnusedConstReg() ) { - int flushed = 0; - if( _getNumMMXwrite() > 3 ) flushed = _flushMMXunused(); - if( !flushed && _getNumXMMwrite() > 2 ) _flushXMMunused(); - s_bFlushReg = !flushed; -// } -// else s_bFlushReg = 0; - } - else s_bFlushReg = 1; - } - else s_bFlushReg = 1; - - //CHECK_XMMCHANGED(); - _clearNeededX86regs(); - _clearNeededMMXregs(); - _clearNeededXMMregs(); - -// _freeXMMregs(); -// _freeMMXregs(); -// _flushCachedRegs(); -// g_cpuHasConstReg = 1; -} - -//__declspec(naked) void iDummyBlock() -//{ -//// g_lastpc = cpuRegs.pc; -//// -//// do { -//// cpuRegs.cycle = g_nextBranchCycle; -//// cpuBranchTest(); -//// } while(g_lastpc == cpuRegs.pc); -//// -//// __asm jmp DispatcherReg -// __asm { -//RepDummy: -// add cpuRegs.cycle, 9 -// call cpuBranchTest -// cmp cpuRegs.pc, 0x81fc0 -// je RepDummy -// jmp DispatcherReg -// } -//} - -//////////////////////////////////////////////////// -#include "R3000A.h" -#include "PsxCounters.h" -#include "PsxMem.h" -extern tIPU_BP g_BP; - -extern u32 psxdump; -extern u32 psxNextCounter, psxNextsCounter; -extern void iDumpPsxRegisters(u32 startpc, u32 temp); -extern Counter counters[6]; +/* Pcsx2 - Pc Ps2 Emulator + * Copyright (C) 2002-2005 Pcsx2 Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +// recompiler reworked to add dynamic linking zerofrog(@gmail.com) Jan06 +// Recompiled completely rewritten to add block level recompilation/reg-caching/ +// liveness analysis/constant propagation Apr06 (zerofrog@gmail.com) + +// stop compiling if NORECBUILD build (only for Visual Studio) +#if !(defined(_MSC_VER) && defined(PCSX2_NORECBUILD)) + +#include +#include +#include +#include + +#include "Common.h" +#include "Memory.h" +#include "InterTables.h" +#include "ix86/ix86.h" +#include "iR5900.h" +#include "iR5900AritImm.h" +#include "iR5900Arit.h" +#include "iR5900MultDiv.h" +#include "iR5900Shift.h" +#include "iR5900Branch.h" +#include "iR5900Jump.h" +#include "iR5900LoadStore.h" +#include "iR5900Move.h" +#include "iMMI.h" +#include "iFPU.h" +#include "iCP0.h" +#include "iVUmicro.h" +#include "iVU0micro.h" +#include "iVU1micro.h" +#include "VU.h" +#include "VUmicro.h" + +#include "iVUzerorec.h" + +#ifdef _WIN32 +#pragma warning(disable:4244) +#pragma warning(disable:4761) +#endif + +u32 maxrecmem = 0; +uptr *recLUT; + +#define X86 +#define RECSTACK_SIZE 0x00010000 + +#define EE_NUMBLOCKS (1<<15) + +static char *recMem = NULL; // the recompiled blocks will be here +static char* recStack = NULL; // stack mem +static BASEBLOCK *recRAM = NULL; // and the ptr to the blocks here +static BASEBLOCK *recROM = NULL; // and here +static BASEBLOCK *recROM1 = NULL; // also here +static BASEBLOCKEX *recBlocks = NULL; +static char *recPtr = NULL, *recStackPtr = NULL; +static EEINST* s_pInstCache = NULL; +static u32 s_nInstCacheSize = 0; + +u32 g_EEFreezeRegs = 0; // if set, should freeze the regs + +static BASEBLOCK* s_pCurBlock = NULL; +static BASEBLOCKEX* s_pCurBlockEx = NULL; +static BASEBLOCK* s_pDispatchBlock = NULL; +static u32 s_nEndBlock = 0; // what pc the current block ends +static u32 s_nHasDelay = 0; + +static u32 s_nNextBlock = 0; // next free block in recBlocks + +extern void (*recBSC[64])(); +extern void (*recBSC_co[64])(); +void rpropBSC(EEINST* prev, EEINST* pinst); + +// save states for branches +static u16 s_savex86FpuState, s_saveiCWstate; +static GPR_reg64 s_ConstGPRreg; +static u32 s_saveConstGPRreg = 0, s_saveHasConstReg = 0, s_saveFlushedConstReg = 0, s_saveRegHasLive1 = 0, s_saveRegHasSignExt = 0; +static EEINST* s_psaveInstInfo = NULL; + +u32 s_nBlockCycles = 0; // cycles of current block recompiling +static u32 s_savenBlockCycles = 0; + +void recCOP2RecompileInst(); +int recCOP2AnalyzeBlock(u32 startpc, u32 endpc); +void recCOP2EndBlock(void); + +#ifdef _DEBUG +u32 dumplog = 0; +#else +#define dumplog 0 +#endif + +u32 pc; // recompiler pc +int branch; // set for branch + +//#ifdef PCSX2_DEVBUILD +LARGE_INTEGER lbase = {0}, lfinal = {0}; +static u32 s_startcount = 0; +//#endif + +char *txt0 = "EAX = %x : ECX = %x : EDX = %x\n"; +char *txt0RC = "EAX = %x : EBX = %x : ECX = %x : EDX = %x : ESI = %x : EDI = %x\n"; +char *txt1 = "REG[%d] = %x_%x\n"; +char *txt2 = "M32 = %x\n"; + +void _cop2AnalyzeOp(EEINST* pinst, int dostalls); // reccop2.c +static void iBranchTest(u32 newpc, u32 cpuBranch); +void recRecompile( u32 startpc ); +void recCOP22( void ); + +BASEBLOCKEX* PC_GETBLOCKEX(BASEBLOCK* p) +{ +// BASEBLOCKEX* pex = *(BASEBLOCKEX**)(p+1); +// if( pex >= recBlocks && pex < recBlocks+EE_NUMBLOCKS ) +// return pex; + + // otherwise, use the sorted list + return GetBaseBlockEx(p->startpc, 0); +} + +//////////////////////////////////////////////////// +void iDumpBlock( int startpc, char * ptr ) +{ + FILE *f; + char filename[ 256 ]; + u32 i, j; + EEINST* pcur; + extern char *disRNameGPR[]; + u8 used[34]; + u8 fpuused[33]; + int numused, count, fpunumused; + + SysPrintf( "dump1 %x:%x, %x\n", startpc, pc, cpuRegs.cycle ); +#ifdef _WIN32 + CreateDirectory("dumps", NULL); + sprintf( filename, "dumps\\dump%.8X.txt", startpc); +#else + mkdir("dumps", 0755); + sprintf( filename, "dumps/dump%.8X.txt", startpc); +#endif + + fflush( stdout ); +// f = fopen( "dump1", "wb" ); +// fwrite( ptr, 1, (u32)x86Ptr - (u32)ptr, f ); +// fclose( f ); +// +// sprintf( command, "objdump -D --target=binary --architecture=i386 dump1 > %s", filename ); +// system( command ); + + f = fopen( filename, "w" ); + + if( disR5900GetSym(startpc) != NULL ) + fprintf(f, "%s\n", disR5900GetSym(startpc)); + for ( i = startpc; i < s_nEndBlock; i += 4 ) { + fprintf( f, "%s\n", disR5900Fasm( PSMu32( i ), i ) ); + } + + // write the instruction info + + fprintf(f, "\n\nlive0 - %x, live1 - %x, live2 - %x, lastuse - %x\nmmx - %x, xmm - %x, used - %x\n", + EEINST_LIVE0, EEINST_LIVE1, EEINST_LIVE2, EEINST_LASTUSE, EEINST_MMX, EEINST_XMM, EEINST_USED); + + memset(used, 0, sizeof(used)); + numused = 0; + for(i = 0; i < ARRAYSIZE(s_pInstCache->regs); ++i) { + if( s_pInstCache->regs[i] & EEINST_USED ) { + used[i] = 1; + numused++; + } + } + + memset(fpuused, 0, sizeof(fpuused)); + fpunumused = 0; + for(i = 0; i < ARRAYSIZE(s_pInstCache->fpuregs); ++i) { + if( s_pInstCache->fpuregs[i] & EEINST_USED ) { + fpuused[i] = 1; + fpunumused++; + } + } + + fprintf(f, " "); + for(i = 0; i < ARRAYSIZE(s_pInstCache->regs); ++i) { + if( used[i] ) fprintf(f, "%2d ", i); + } + for(i = 0; i < ARRAYSIZE(s_pInstCache->fpuregs); ++i) { + if( fpuused[i] ) fprintf(f, "%2d ", i); + } + fprintf(f, "\n"); + + fprintf(f, " "); + for(i = 0; i < ARRAYSIZE(s_pInstCache->regs); ++i) { + if( used[i] ) fprintf(f, "%s ", disRNameGPR[i]); + } + for(i = 0; i < ARRAYSIZE(s_pInstCache->fpuregs); ++i) { + if( fpuused[i] ) fprintf(f, "%s ", i<32?"FR":"FA"); + } + fprintf(f, "\n"); + + pcur = s_pInstCache+1; + for( i = 0; i < (s_nEndBlock-startpc)/4; ++i, ++pcur) { + fprintf(f, "%2d: %2.2x ", i+1, pcur->info); + + count = 1; + for(j = 0; j < ARRAYSIZE(s_pInstCache->regs); j++) { + if( used[j] ) { + fprintf(f, "%2.2x%s", pcur->regs[j], ((count%8)&&countfpuregs); j++) { + if( fpuused[j] ) { + fprintf(f, "%2.2x%s", pcur->fpuregs[j], ((count%8)&&count>26) { + case 26: // ldl + case 27: // ldr + case 32: case 33: case 34: case 35: case 36: case 37: case 38: case 39: + case 55: // LD + case 30: // lq + return ((tempcode>>21)&0x1f)==((tempcode>>16)&0x1f); // rs==rt + } + return 0; +} + +u8 _eeIsLoadStoreCoIssue(u32 firstcode, u32 secondcode) +{ + switch(firstcode>>26) { + case 34: // lwl + return (secondcode>>26)==38; + case 38: // lwr + return (secondcode>>26)==34; + case 42: // swl + return (secondcode>>26)==46; + case 46: // swr + return (secondcode>>26)==42; + case 26: // ldl + return (secondcode>>26)==27; + case 27: // ldr + return (secondcode>>26)==26; + case 44: // sdl + return (secondcode>>26)==45; + case 45: // sdr + return (secondcode>>26)==44; + + case 32: case 33: case 35: case 36: case 37: case 39: + case 55: // LD + + // stores + case 40: case 41: case 43: + case 63: // sd + return (secondcode>>26)==(firstcode>>26); + + case 30: // lq + case 31: // sq + case 49: // lwc1 + case 57: // swc1 + case 54: // lqc2 + case 62: // sqc2 + return (secondcode>>26)==(firstcode>>26)&&cpucaps.hasStreamingSIMDExtensions; + } + return 0; +} + +u8 _eeIsLoadStoreCoX(u32 tempcode) +{ + switch( tempcode>>26 ) { + case 30: case 31: case 49: case 57: case 55: case 63: + return 1; + } + return 0; +} + +void _eeFlushAllUnused() +{ + int i; + for(i = 0; i < 34; ++i) { + if( pc < s_nEndBlock ) { + if( (g_pCurInstInfo[1].regs[i]&EEINST_USED) ) + continue; + } + else if( (g_pCurInstInfo[0].regs[i]&EEINST_USED) ) + continue; + + if( i < 32 && GPR_IS_CONST1(i) ) _flushConstReg(i); + else { + _deleteMMXreg(MMX_GPR+i, 1); + _deleteGPRtoXMMreg(i, 1); + } + } + + //TODO when used info is done for FPU and VU0 + for(i = 0; i < XMMREGS; ++i) { + if( xmmregs[i].inuse && xmmregs[i].type != XMMTYPE_GPRREG ) + _freeXMMreg(i); + } +} + +u32* _eeGetConstReg(int reg) +{ + assert( GPR_IS_CONST1( reg ) ); + + if( g_cpuFlushedConstReg & (1<regs[xmmregs[i].reg]&EEINST_USED) ) { + if( !_recIsRegWritten(g_pCurInstInfo+1, (s_nEndBlock-pc)/4, XMMTYPE_GPRREG, xmmregs[i].reg) ) { + _freeXMMreg(i); + xmmregs[i].inuse = 1; + return 1; + } + } + } + + return 0; +} + +int _flushMMXunused() +{ + int i; + for (i=0; iregs[mmxregs[i].reg-MMX_GPR]&EEINST_USED) ) { + if( !_recIsRegWritten(g_pCurInstInfo+1, (s_nEndBlock-pc)/4, XMMTYPE_GPRREG, mmxregs[i].reg-MMX_GPR) ) { + _freeMMXreg(i); + mmxregs[i].inuse = 1; + return 1; + } + } + } + + return 0; +} + +int _flushUnusedConstReg() +{ + int i; + for(i = 1; i < 32; ++i) { + if( (g_cpuHasConstReg & (1<regs[reg]&EEINST_LASTUSE) ) { + if( usemmx ) return _allocMMXreg(-1, MMX_GPR+reg, mode); + return _allocGPRtoXMMreg(-1, reg, mode); + } + + return -1; +} + +#define PROCESS_EE_SETMODES(mmreg) ((mmxregs[mmreg].mode&MODE_WRITE)?PROCESS_EE_MODEWRITES:0) +#define PROCESS_EE_SETMODET(mmreg) ((mmxregs[mmreg].mode&MODE_WRITE)?PROCESS_EE_MODEWRITET:0) + +// ignores XMMINFO_READS, XMMINFO_READT, and XMMINFO_READD_LO from xmminfo +// core of reg caching +void eeRecompileCode0(R5900FNPTR constcode, R5900FNPTR_INFO constscode, R5900FNPTR_INFO consttcode, R5900FNPTR_INFO noconstcode, int xmminfo) +{ + int mmreg1, mmreg2, mmreg3, mmtemp, moded; + + if ( ! _Rd_ && (xmminfo&XMMINFO_WRITED) ) return; + + if( xmminfo&XMMINFO_WRITED) { + CHECK_SAVE_REG(_Rd_); + _eeProcessHasLive(_Rd_, 0); + EEINST_RESETSIGNEXT(_Rd_); + } + + if( GPR_IS_CONST2(_Rs_, _Rt_) ) { + if( xmminfo & XMMINFO_WRITED ) { + _deleteMMXreg(MMX_GPR+_Rd_, 2); + _deleteGPRtoXMMreg(_Rd_, 2); + } + if( xmminfo&XMMINFO_WRITED ) GPR_SET_CONST(_Rd_); + constcode(); + return; + } + + moded = MODE_WRITE|((xmminfo&XMMINFO_READD)?MODE_READ:0); + + // test if should write mmx + if( g_pCurInstInfo->info & EEINST_MMX ) { + + if( xmminfo & (XMMINFO_READLO|XMMINFO_WRITELO) ) _addNeededMMXreg(MMX_GPR+MMX_LO); + if( xmminfo & (XMMINFO_READHI|XMMINFO_WRITEHI) ) _addNeededMMXreg(MMX_GPR+MMX_HI); + _addNeededMMXreg(MMX_GPR+_Rs_); + _addNeededMMXreg(MMX_GPR+_Rt_); + + if( GPR_IS_CONST1(_Rs_) || GPR_IS_CONST1(_Rt_) ) { + int creg = GPR_IS_CONST1(_Rs_) ? _Rs_ : _Rt_; + int vreg = creg == _Rs_ ? _Rt_ : _Rs_; + +// if(g_pCurInstInfo->regs[vreg]&EEINST_MMX) { +// mmreg1 = _allocMMXreg(-1, MMX_GPR+vreg, MODE_READ); +// _addNeededMMXreg(MMX_GPR+vreg); +// } + mmreg1 = _allocCheckGPRtoMMX(g_pCurInstInfo, vreg, MODE_READ); + + if( mmreg1 >= 0 ) { + int info = PROCESS_EE_MMX; + + if( GPR_IS_CONST1(_Rs_) ) info |= PROCESS_EE_SETMODET(mmreg1); + else info |= PROCESS_EE_SETMODES(mmreg1); + + if( xmminfo & XMMINFO_WRITED ) { + _addNeededMMXreg(MMX_GPR+_Rd_); + mmreg3 = _checkMMXreg(MMX_GPR+_Rd_, moded); + + if( !(xmminfo&XMMINFO_READD) && mmreg3 < 0 && ((g_pCurInstInfo->regs[vreg] & EEINST_LASTUSE) || !EEINST_ISLIVE64(vreg)) ) { + if( EEINST_ISLIVE64(vreg) ) { + _freeMMXreg(mmreg1); + if( GPR_IS_CONST1(_Rs_) ) info &= ~PROCESS_EE_MODEWRITET; + else info &= ~PROCESS_EE_MODEWRITES; + } + _deleteGPRtoXMMreg(_Rd_, 2); + mmxregs[mmreg1].inuse = 1; + mmxregs[mmreg1].reg = _Rd_; + mmxregs[mmreg1].mode = moded; + mmreg3 = mmreg1; + } + else if( mmreg3 < 0 ) mmreg3 = _allocMMXreg(-1, MMX_GPR+_Rd_, moded); + + info |= PROCESS_EE_SET_D(mmreg3); + } + + if( xmminfo & (XMMINFO_READLO|XMMINFO_WRITELO) ) { + mmtemp = eeProcessHILO(MMX_LO, ((xmminfo&XMMINFO_READLO)?MODE_READ:0)|((xmminfo&XMMINFO_WRITELO)?MODE_WRITE:0), 1); + if( mmtemp >= 0 ) info |= PROCESS_EE_SET_LO(mmtemp); + } + if( xmminfo & (XMMINFO_READHI|XMMINFO_WRITEHI) ) { + mmtemp = eeProcessHILO(MMX_HI, ((xmminfo&XMMINFO_READLO)?MODE_READ:0)|((xmminfo&XMMINFO_WRITELO)?MODE_WRITE:0), 1); + if( mmtemp >= 0 ) info |= PROCESS_EE_SET_HI(mmtemp); + } + + SetMMXstate(); + if( creg == _Rs_ ) constscode(info|PROCESS_EE_SET_T(mmreg1)); + else consttcode(info|PROCESS_EE_SET_S(mmreg1)); + _clearNeededMMXregs(); + if( xmminfo & XMMINFO_WRITED ) GPR_DEL_CONST(_Rd_); + return; + } + } + else { + // no const regs + mmreg1 = _allocCheckGPRtoMMX(g_pCurInstInfo, _Rs_, MODE_READ); + mmreg2 = _allocCheckGPRtoMMX(g_pCurInstInfo, _Rt_, MODE_READ); + + if( mmreg1 >= 0 || mmreg2 >= 0 ) { + int info = PROCESS_EE_MMX; + + // do it all in mmx + if( mmreg1 < 0 ) mmreg1 = _allocMMXreg(-1, MMX_GPR+_Rs_, MODE_READ); + if( mmreg2 < 0 ) mmreg2 = _allocMMXreg(-1, MMX_GPR+_Rt_, MODE_READ); + + info |= PROCESS_EE_SETMODES(mmreg1)|PROCESS_EE_SETMODET(mmreg2); + + // check for last used, if so don't alloc a new MMX reg + if( xmminfo & XMMINFO_WRITED ) { + _addNeededMMXreg(MMX_GPR+_Rd_); + mmreg3 = _checkMMXreg(MMX_GPR+_Rd_, moded); + + if( mmreg3 < 0 ) { + if( !(xmminfo&XMMINFO_READD) && ((g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE) || !EEINST_ISLIVE64(_Rt_)) ) { + if( EEINST_ISLIVE64(_Rt_) ) { + _freeMMXreg(mmreg2); + info &= ~PROCESS_EE_MODEWRITET; + } + _deleteGPRtoXMMreg(_Rd_, 2); + mmxregs[mmreg2].inuse = 1; + mmxregs[mmreg2].reg = _Rd_; + mmxregs[mmreg2].mode = moded; + mmreg3 = mmreg2; + } + else if( !(xmminfo&XMMINFO_READD) && ((g_pCurInstInfo->regs[_Rs_] & EEINST_LASTUSE) || !EEINST_ISLIVE64(_Rs_)) ) { + if( EEINST_ISLIVE64(_Rs_) ) { + _freeMMXreg(mmreg1); + info &= ~PROCESS_EE_MODEWRITES; + } + _deleteGPRtoXMMreg(_Rd_, 2); + mmxregs[mmreg1].inuse = 1; + mmxregs[mmreg1].reg = _Rd_; + mmxregs[mmreg1].mode = moded; + mmreg3 = mmreg1; + } + else mmreg3 = _allocMMXreg(-1, MMX_GPR+_Rd_, moded); + } + + info |= PROCESS_EE_SET_D(mmreg3); + } + + if( xmminfo & (XMMINFO_READLO|XMMINFO_WRITELO) ) { + mmtemp = eeProcessHILO(MMX_LO, ((xmminfo&XMMINFO_READLO)?MODE_READ:0)|((xmminfo&XMMINFO_WRITELO)?MODE_WRITE:0), 1); + if( mmtemp >= 0 ) info |= PROCESS_EE_SET_LO(mmtemp); + } + if( xmminfo & (XMMINFO_READHI|XMMINFO_WRITEHI) ) { + mmtemp = eeProcessHILO(MMX_HI, ((xmminfo&XMMINFO_READLO)?MODE_READ:0)|((xmminfo&XMMINFO_WRITELO)?MODE_WRITE:0), 1); + if( mmtemp >= 0 ) info |= PROCESS_EE_SET_HI(mmtemp); + } + + SetMMXstate(); + noconstcode(info|PROCESS_EE_SET_S(mmreg1)|PROCESS_EE_SET_T(mmreg2)); + _clearNeededMMXregs(); + if( xmminfo & XMMINFO_WRITED ) GPR_DEL_CONST(_Rd_); + return; + } + } + + _clearNeededMMXregs(); + } + + // test if should write xmm, mirror to mmx code + if( g_pCurInstInfo->info & EEINST_XMM ) { + + if( xmminfo & (XMMINFO_READLO|XMMINFO_WRITELO) ) _addNeededGPRtoXMMreg(XMMGPR_LO); + if( xmminfo & (XMMINFO_READHI|XMMINFO_WRITEHI) ) _addNeededGPRtoXMMreg(XMMGPR_HI); + _addNeededGPRtoXMMreg(_Rs_); + _addNeededGPRtoXMMreg(_Rt_); + + if( GPR_IS_CONST1(_Rs_) || GPR_IS_CONST1(_Rt_) ) { + int creg = GPR_IS_CONST1(_Rs_) ? _Rs_ : _Rt_; + int vreg = creg == _Rs_ ? _Rt_ : _Rs_; + +// if(g_pCurInstInfo->regs[vreg]&EEINST_XMM) { +// mmreg1 = _allocGPRtoXMMreg(-1, vreg, MODE_READ); +// _addNeededGPRtoXMMreg(vreg); +// } + mmreg1 = _allocCheckGPRtoXMM(g_pCurInstInfo, vreg, MODE_READ); + + if( mmreg1 >= 0 ) { + int info = PROCESS_EE_XMM; + + if( GPR_IS_CONST1(_Rs_) ) info |= PROCESS_EE_SETMODET(mmreg1); + else info |= PROCESS_EE_SETMODES(mmreg1); + + if( xmminfo & XMMINFO_WRITED ) { + + _addNeededGPRtoXMMreg(_Rd_); + mmreg3 = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, MODE_WRITE); + + if( !(xmminfo&XMMINFO_READD) && mmreg3 < 0 && ((g_pCurInstInfo->regs[vreg] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(vreg)) ) { + _freeXMMreg(mmreg1); + if( GPR_IS_CONST1(_Rs_) ) info &= ~PROCESS_EE_MODEWRITET; + else info &= ~PROCESS_EE_MODEWRITES; + _deleteMMXreg(MMX_GPR+_Rd_, 2); + xmmregs[mmreg1].inuse = 1; + xmmregs[mmreg1].reg = _Rd_; + xmmregs[mmreg1].mode = moded; + mmreg3 = mmreg1; + } + else if( mmreg3 < 0 ) mmreg3 = _allocGPRtoXMMreg(-1, _Rd_, moded); + + info |= PROCESS_EE_SET_D(mmreg3); + } + + if( xmminfo & (XMMINFO_READLO|XMMINFO_WRITELO) ) { + mmtemp = eeProcessHILO(XMMGPR_LO, ((xmminfo&XMMINFO_READLO)?MODE_READ:0)|((xmminfo&XMMINFO_WRITELO)?MODE_WRITE:0), 0); + if( mmtemp >= 0 ) info |= PROCESS_EE_SET_LO(mmtemp); + } + if( xmminfo & (XMMINFO_READHI|XMMINFO_WRITEHI) ) { + mmtemp = eeProcessHILO(XMMGPR_HI, ((xmminfo&XMMINFO_READLO)?MODE_READ:0)|((xmminfo&XMMINFO_WRITELO)?MODE_WRITE:0), 0); + if( mmtemp >= 0 ) info |= PROCESS_EE_SET_HI(mmtemp); + } + + if( creg == _Rs_ ) constscode(info|PROCESS_EE_SET_T(mmreg1)); + else consttcode(info|PROCESS_EE_SET_S(mmreg1)); + _clearNeededXMMregs(); + if( xmminfo & XMMINFO_WRITED ) GPR_DEL_CONST(_Rd_); + return; + } + } + else { + // no const regs + mmreg1 = _allocCheckGPRtoXMM(g_pCurInstInfo, _Rs_, MODE_READ); + mmreg2 = _allocCheckGPRtoXMM(g_pCurInstInfo, _Rt_, MODE_READ); + + if( mmreg1 >= 0 || mmreg2 >= 0 ) { + int info = PROCESS_EE_XMM; + + // do it all in xmm + if( mmreg1 < 0 ) mmreg1 = _allocGPRtoXMMreg(-1, _Rs_, MODE_READ); + if( mmreg2 < 0 ) mmreg2 = _allocGPRtoXMMreg(-1, _Rt_, MODE_READ); + + info |= PROCESS_EE_SETMODES(mmreg1)|PROCESS_EE_SETMODET(mmreg2); + + if( xmminfo & XMMINFO_WRITED ) { + // check for last used, if so don't alloc a new XMM reg + _addNeededGPRtoXMMreg(_Rd_); + mmreg3 = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, moded); + + if( mmreg3 < 0 ) { + if( !(xmminfo&XMMINFO_READD) && ((g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rt_)) ) { + _freeXMMreg(mmreg2); + info &= ~PROCESS_EE_MODEWRITET; + _deleteMMXreg(MMX_GPR+_Rd_, 2); + xmmregs[mmreg2].inuse = 1; + xmmregs[mmreg2].reg = _Rd_; + xmmregs[mmreg2].mode = moded; + mmreg3 = mmreg2; + } + else if( !(xmminfo&XMMINFO_READD) && ((g_pCurInstInfo->regs[_Rs_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rs_)) ) { + _freeXMMreg(mmreg1); + info &= ~PROCESS_EE_MODEWRITES; + _deleteMMXreg(MMX_GPR+_Rd_, 2); + xmmregs[mmreg1].inuse = 1; + xmmregs[mmreg1].reg = _Rd_; + xmmregs[mmreg1].mode = moded; + mmreg3 = mmreg1; + } + else mmreg3 = _allocGPRtoXMMreg(-1, _Rd_, moded); + } + + info |= PROCESS_EE_SET_D(mmreg3); + } + + if( xmminfo & (XMMINFO_READLO|XMMINFO_WRITELO) ) { + mmtemp = eeProcessHILO(XMMGPR_LO, ((xmminfo&XMMINFO_READLO)?MODE_READ:0)|((xmminfo&XMMINFO_WRITELO)?MODE_WRITE:0), 0); + if( mmtemp >= 0 ) info |= PROCESS_EE_SET_LO(mmtemp); + } + if( xmminfo & (XMMINFO_READHI|XMMINFO_WRITEHI) ) { + mmtemp = eeProcessHILO(XMMGPR_HI, ((xmminfo&XMMINFO_READLO)?MODE_READ:0)|((xmminfo&XMMINFO_WRITELO)?MODE_WRITE:0), 0); + if( mmtemp >= 0 ) info |= PROCESS_EE_SET_HI(mmtemp); + } + + noconstcode(info|PROCESS_EE_SET_S(mmreg1)|PROCESS_EE_SET_T(mmreg2)); + _clearNeededXMMregs(); + if( xmminfo & XMMINFO_WRITED ) GPR_DEL_CONST(_Rd_); + return; + } + } + + _clearNeededXMMregs(); + } + + // regular x86 + _deleteGPRtoXMMreg(_Rs_, 1); + _deleteGPRtoXMMreg(_Rt_, 1); + if( xmminfo&XMMINFO_WRITED ) + _deleteGPRtoXMMreg(_Rd_, (xmminfo&XMMINFO_READD)?0:2); + _deleteMMXreg(MMX_GPR+_Rs_, 1); + _deleteMMXreg(MMX_GPR+_Rt_, 1); + if( xmminfo&XMMINFO_WRITED ) + _deleteMMXreg(MMX_GPR+_Rd_, (xmminfo&XMMINFO_READD)?0:2); + + // don't delete, fn will take care of them +// if( xmminfo & (XMMINFO_READLO|XMMINFO_WRITELO) ) { +// _deleteGPRtoXMMreg(XMMGPR_LO, (xmminfo&XMMINFO_READLO)?1:0); +// _deleteMMXreg(MMX_GPR+MMX_LO, (xmminfo&XMMINFO_READLO)?1:0); +// } +// if( xmminfo & (XMMINFO_READHI|XMMINFO_WRITEHI) ) { +// _deleteGPRtoXMMreg(XMMGPR_HI, (xmminfo&XMMINFO_READHI)?1:0); +// _deleteMMXreg(MMX_GPR+MMX_HI, (xmminfo&XMMINFO_READHI)?1:0); +// } + + if( GPR_IS_CONST1(_Rs_) ) { + constscode(0); + if( xmminfo&XMMINFO_WRITED ) GPR_DEL_CONST(_Rd_); + return; + } + + if( GPR_IS_CONST1(_Rt_) ) { + consttcode(0); + if( xmminfo&XMMINFO_WRITED ) GPR_DEL_CONST(_Rd_); + return; + } + + noconstcode(0); + if( xmminfo&XMMINFO_WRITED ) GPR_DEL_CONST(_Rd_); +} + +// rt = rs op imm16 +void eeRecompileCode1(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode) +{ + int mmreg1, mmreg2; + if ( ! _Rt_ ) return; + + CHECK_SAVE_REG(_Rt_); + _eeProcessHasLive(_Rt_, 0); + EEINST_RESETSIGNEXT(_Rt_); + + if( GPR_IS_CONST1(_Rs_) ) { + _deleteMMXreg(MMX_GPR+_Rt_, 2); + _deleteGPRtoXMMreg(_Rt_, 2); + GPR_SET_CONST(_Rt_); + constcode(); + return; + } + + // test if should write mmx + if( g_pCurInstInfo->info & EEINST_MMX ) { + + // no const regs + mmreg1 = _allocCheckGPRtoMMX(g_pCurInstInfo, _Rs_, MODE_READ); + + if( mmreg1 >= 0 ) { + int info = PROCESS_EE_MMX|PROCESS_EE_SETMODES(mmreg1); + + // check for last used, if so don't alloc a new MMX reg + _addNeededMMXreg(MMX_GPR+_Rt_); + mmreg2 = _checkMMXreg(MMX_GPR+_Rt_, MODE_WRITE); + + if( mmreg2 < 0 ) { + if( (g_pCurInstInfo->regs[_Rs_] & EEINST_LASTUSE) || !EEINST_ISLIVE64(_Rs_) ) { + if( EEINST_ISLIVE64(_Rs_) ) { + _freeMMXreg(mmreg1); + info &= ~PROCESS_EE_MODEWRITES; + } + _deleteGPRtoXMMreg(_Rt_, 2); + mmxregs[mmreg1].inuse = 1; + mmxregs[mmreg1].reg = _Rt_; + mmxregs[mmreg1].mode = MODE_WRITE|MODE_READ; + mmreg2 = mmreg1; + } + else mmreg2 = _allocMMXreg(-1, MMX_GPR+_Rt_, MODE_WRITE); + } + + SetMMXstate(); + noconstcode(info|PROCESS_EE_SET_S(mmreg1)|PROCESS_EE_SET_T(mmreg2)); + _clearNeededMMXregs(); + GPR_DEL_CONST(_Rt_); + return; + } + + _clearNeededMMXregs(); + } + + // test if should write xmm, mirror to mmx code + if( g_pCurInstInfo->info & EEINST_XMM ) { + + // no const regs + mmreg1 = _allocCheckGPRtoXMM(g_pCurInstInfo, _Rs_, MODE_READ); + + if( mmreg1 >= 0 ) { + int info = PROCESS_EE_XMM|PROCESS_EE_SETMODES(mmreg1); + + // check for last used, if so don't alloc a new XMM reg + _addNeededGPRtoXMMreg(_Rt_); + mmreg2 = _checkXMMreg(XMMTYPE_GPRREG, _Rt_, MODE_WRITE); + + if( mmreg2 < 0 ) { + if( (g_pCurInstInfo->regs[_Rs_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rs_) ) { + _freeXMMreg(mmreg1); + info &= ~PROCESS_EE_MODEWRITES; + _deleteMMXreg(MMX_GPR+_Rt_, 2); + xmmregs[mmreg1].inuse = 1; + xmmregs[mmreg1].reg = _Rt_; + xmmregs[mmreg1].mode = MODE_WRITE|MODE_READ; + mmreg2 = mmreg1; + } + else mmreg2 = _allocGPRtoXMMreg(-1, _Rt_, MODE_WRITE); + } + + noconstcode(info|PROCESS_EE_SET_S(mmreg1)|PROCESS_EE_SET_T(mmreg2)); + _clearNeededXMMregs(); + GPR_DEL_CONST(_Rt_); + return; + } + + _clearNeededXMMregs(); + } + + // regular x86 + _deleteGPRtoXMMreg(_Rs_, 1); + _deleteGPRtoXMMreg(_Rt_, 2); + _deleteMMXreg(MMX_GPR+_Rs_, 1); + _deleteMMXreg(MMX_GPR+_Rt_, 2); + + noconstcode(0); + GPR_DEL_CONST(_Rt_); +} + +// rd = rt op sa +void eeRecompileCode2(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode) +{ + int mmreg1, mmreg2; + if ( ! _Rd_ ) return; + + CHECK_SAVE_REG(_Rd_); + _eeProcessHasLive(_Rd_, 0); + EEINST_RESETSIGNEXT(_Rd_); + + if( GPR_IS_CONST1(_Rt_) ) { + _deleteMMXreg(MMX_GPR+_Rd_, 2); + _deleteGPRtoXMMreg(_Rd_, 2); + GPR_SET_CONST(_Rd_); + constcode(); + return; + } + + // test if should write mmx + if( g_pCurInstInfo->info & EEINST_MMX ) { + + // no const regs + mmreg1 = _allocCheckGPRtoMMX(g_pCurInstInfo, _Rt_, MODE_READ); + + if( mmreg1 >= 0 ) { + int info = PROCESS_EE_MMX|PROCESS_EE_SETMODET(mmreg1); + + // check for last used, if so don't alloc a new MMX reg + _addNeededMMXreg(MMX_GPR+_Rd_); + mmreg2 = _checkMMXreg(MMX_GPR+_Rd_, MODE_WRITE); + + if( mmreg2 < 0 ) { + if( (g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE) || !EEINST_ISLIVE64(_Rt_) ) { + if( EEINST_ISLIVE64(_Rt_) ) { + _freeMMXreg(mmreg1); + info &= ~PROCESS_EE_MODEWRITET; + } + _deleteGPRtoXMMreg(_Rd_, 2); + mmxregs[mmreg1].inuse = 1; + mmxregs[mmreg1].reg = _Rd_; + mmxregs[mmreg1].mode = MODE_WRITE|MODE_READ; + mmreg2 = mmreg1; + } + else mmreg2 = _allocMMXreg(-1, MMX_GPR+_Rd_, MODE_WRITE); + } + + SetMMXstate(); + noconstcode(info|PROCESS_EE_SET_T(mmreg1)|PROCESS_EE_SET_D(mmreg2)); + _clearNeededMMXregs(); + GPR_DEL_CONST(_Rd_); + return; + } + + _clearNeededMMXregs(); + } + + // test if should write xmm, mirror to mmx code + if( g_pCurInstInfo->info & EEINST_XMM ) { + + // no const regs + mmreg1 = _allocCheckGPRtoXMM(g_pCurInstInfo, _Rt_, MODE_READ); + + if( mmreg1 >= 0 ) { + int info = PROCESS_EE_XMM|PROCESS_EE_SETMODET(mmreg1); + + // check for last used, if so don't alloc a new XMM reg + _addNeededGPRtoXMMreg(_Rd_); + mmreg2 = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, MODE_WRITE); + + if( mmreg2 < 0 ) { + if( (g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE) || !EEINST_ISLIVE64(_Rt_) ) { + _freeXMMreg(mmreg1); + info &= ~PROCESS_EE_MODEWRITET; + _deleteMMXreg(MMX_GPR+_Rd_, 2); + xmmregs[mmreg1].inuse = 1; + xmmregs[mmreg1].reg = _Rd_; + xmmregs[mmreg1].mode = MODE_WRITE|MODE_READ; + mmreg2 = mmreg1; + } + else mmreg2 = _allocGPRtoXMMreg(-1, _Rd_, MODE_WRITE); + } + + noconstcode(info|PROCESS_EE_SET_T(mmreg1)|PROCESS_EE_SET_D(mmreg2)); + _clearNeededXMMregs(); + GPR_DEL_CONST(_Rd_); + return; + } + + _clearNeededXMMregs(); + } + + // regular x86 + _deleteGPRtoXMMreg(_Rt_, 1); + _deleteGPRtoXMMreg(_Rd_, 2); + _deleteMMXreg(MMX_GPR+_Rt_, 1); + _deleteMMXreg(MMX_GPR+_Rd_, 2); + + noconstcode(0); + GPR_DEL_CONST(_Rd_); +} + +// rt op rs +void eeRecompileCode3(R5900FNPTR constcode, R5900FNPTR_INFO multicode) +{ + assert(0); + // for now, don't support xmm + _deleteEEreg(_Rs_, 1); + _deleteEEreg(_Rt_, 1); + + if( GPR_IS_CONST2(_Rs_, _Rt_) ) { + constcode(); + return; + } + + if( GPR_IS_CONST1(_Rs_) ) { + //multicode(PROCESS_EE_CONSTT); + return; + } + + if( GPR_IS_CONST1(_Rt_) ) { + //multicode(PROCESS_EE_CONSTT); + return; + } + + multicode(0); +} + +// Simple Code Templates // + +// rd = rs op rt +void eeRecompileCodeConst0(R5900FNPTR constcode, R5900FNPTR_INFO constscode, R5900FNPTR_INFO consttcode, R5900FNPTR_INFO noconstcode) +{ + if ( ! _Rd_ ) return; + + // for now, don't support xmm + CHECK_SAVE_REG(_Rd_); + + _deleteGPRtoXMMreg(_Rs_, 1); + _deleteGPRtoXMMreg(_Rt_, 1); + _deleteGPRtoXMMreg(_Rd_, 0); + _deleteMMXreg(MMX_GPR+_Rs_, 1); + _deleteMMXreg(MMX_GPR+_Rt_, 1); + _deleteMMXreg(MMX_GPR+_Rd_, 0); + + if( GPR_IS_CONST2(_Rs_, _Rt_) ) { + GPR_SET_CONST(_Rd_); + constcode(); + return; + } + + if( GPR_IS_CONST1(_Rs_) ) { + constscode(0); + GPR_DEL_CONST(_Rd_); + return; + } + + if( GPR_IS_CONST1(_Rt_) ) { + consttcode(0); + GPR_DEL_CONST(_Rd_); + return; + } + + noconstcode(0); + GPR_DEL_CONST(_Rd_); +} + +// rt = rs op imm16 +void eeRecompileCodeConst1(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode) +{ + if ( ! _Rt_ ) + return; + + // for now, don't support xmm + CHECK_SAVE_REG(_Rt_); + + _deleteGPRtoXMMreg(_Rs_, 1); + _deleteGPRtoXMMreg(_Rt_, 0); + + if( GPR_IS_CONST1(_Rs_) ) { + GPR_SET_CONST(_Rt_); + constcode(); + return; + } + + noconstcode(0); + GPR_DEL_CONST(_Rt_); +} + +// rd = rt op sa +void eeRecompileCodeConst2(R5900FNPTR constcode, R5900FNPTR_INFO noconstcode) +{ + if ( ! _Rd_ ) return; + + // for now, don't support xmm + CHECK_SAVE_REG(_Rd_); + + _deleteGPRtoXMMreg(_Rt_, 1); + _deleteGPRtoXMMreg(_Rd_, 0); + + if( GPR_IS_CONST1(_Rt_) ) { + GPR_SET_CONST(_Rd_); + constcode(); + return; + } + + noconstcode(0); + GPR_DEL_CONST(_Rd_); +} + +// rd = rt MULT rs (SPECIAL) +void eeRecompileCodeConstSPECIAL(R5900FNPTR constcode, R5900FNPTR_INFO multicode, int MULT) +{ + assert(0); + // for now, don't support xmm + if( MULT ) { + CHECK_SAVE_REG(_Rd_); + _deleteGPRtoXMMreg(_Rd_, 0); + } + + _deleteGPRtoXMMreg(_Rs_, 1); + _deleteGPRtoXMMreg(_Rt_, 1); + + if( GPR_IS_CONST2(_Rs_, _Rt_) ) { + if( MULT && _Rd_ ) GPR_SET_CONST(_Rd_); + constcode(); + return; + } + + if( GPR_IS_CONST1(_Rs_) ) { + //multicode(PROCESS_EE_CONSTS); + if( MULT && _Rd_ ) GPR_DEL_CONST(_Rd_); + return; + } + + if( GPR_IS_CONST1(_Rt_) ) { + //multicode(PROCESS_EE_CONSTT); + if( MULT && _Rd_ ) GPR_DEL_CONST(_Rd_); + return; + } + + multicode(0); + if( MULT && _Rd_ ) GPR_DEL_CONST(_Rd_); +} + +// EE XMM allocation code +int eeRecompileCodeXMM(int xmminfo) +{ + int info = PROCESS_EE_XMM; + + // save state + if( xmminfo & XMMINFO_WRITED ) { + CHECK_SAVE_REG(_Rd_); + _eeProcessHasLive(_Rd_, 0); + EEINST_RESETSIGNEXT(_Rd_); + } + + // flush consts + if( xmminfo & XMMINFO_READT ) { + if( GPR_IS_CONST1( _Rt_ ) && !(g_cpuFlushedConstReg&(1<<_Rt_)) ) { + MOV32ItoM((int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 0 ], g_cpuConstRegs[_Rt_].UL[0]); + MOV32ItoM((int)&cpuRegs.GPR.r[ _Rt_ ].UL[ 1 ], g_cpuConstRegs[_Rt_].UL[1]); + g_cpuFlushedConstReg |= (1<<_Rt_); + } + } + if( xmminfo & XMMINFO_READS) { + if( GPR_IS_CONST1( _Rs_ ) && !(g_cpuFlushedConstReg&(1<<_Rs_)) ) { + MOV32ItoM((int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ], g_cpuConstRegs[_Rs_].UL[0]); + MOV32ItoM((int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 1 ], g_cpuConstRegs[_Rs_].UL[1]); + g_cpuFlushedConstReg |= (1<<_Rs_); + } + } + + if( xmminfo & XMMINFO_WRITED ) { + GPR_DEL_CONST(_Rd_); + } + + // add needed + if( xmminfo & (XMMINFO_READLO|XMMINFO_WRITELO) ) { + _addNeededGPRtoXMMreg(XMMGPR_LO); + } + if( xmminfo & (XMMINFO_READHI|XMMINFO_WRITEHI) ) { + _addNeededGPRtoXMMreg(XMMGPR_HI); + } + if( xmminfo & XMMINFO_READS) _addNeededGPRtoXMMreg(_Rs_); + if( xmminfo & XMMINFO_READT) _addNeededGPRtoXMMreg(_Rt_); + if( xmminfo & XMMINFO_WRITED ) _addNeededGPRtoXMMreg(_Rd_); + + // allocate + if( xmminfo & XMMINFO_READS) { + int reg = _allocGPRtoXMMreg(-1, _Rs_, MODE_READ); + info |= PROCESS_EE_SET_S(reg)|PROCESS_EE_SETMODES(reg); + } + if( xmminfo & XMMINFO_READT) { + int reg = _allocGPRtoXMMreg(-1, _Rt_, MODE_READ); + info |= PROCESS_EE_SET_T(reg)|PROCESS_EE_SETMODET(reg); + } + + if( xmminfo & XMMINFO_WRITED ) { + int readd = MODE_WRITE|((xmminfo&XMMINFO_READD)?((xmminfo&XMMINFO_READD_LO)?(MODE_READ|MODE_READHALF):MODE_READ):0); + + int regd = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, readd); + + if( regd < 0 ) { + if( !(xmminfo&XMMINFO_READD) && (xmminfo & XMMINFO_READT) && (_Rt_ == 0 || (g_pCurInstInfo->regs[_Rt_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rt_)) ) { + _freeXMMreg(EEREC_T); + _deleteMMXreg(MMX_GPR+_Rd_, 2); + xmmregs[EEREC_T].inuse = 1; + xmmregs[EEREC_T].reg = _Rd_; + xmmregs[EEREC_T].mode = readd; + regd = EEREC_T; + } + else if( !(xmminfo&XMMINFO_READD) && (xmminfo & XMMINFO_READS) && (_Rs_ == 0 || (g_pCurInstInfo->regs[_Rs_] & EEINST_LASTUSE) || !EEINST_ISLIVEXMM(_Rs_)) ) { + _freeXMMreg(EEREC_S); + _deleteMMXreg(MMX_GPR+_Rd_, 2); + xmmregs[EEREC_S].inuse = 1; + xmmregs[EEREC_S].reg = _Rd_; + xmmregs[EEREC_S].mode = readd; + regd = EEREC_S; + } + else regd = _allocGPRtoXMMreg(-1, _Rd_, readd); + } + + info |= PROCESS_EE_SET_D(regd); + } + if( xmminfo & (XMMINFO_READLO|XMMINFO_WRITELO) ) { + info |= PROCESS_EE_SET_LO(_allocGPRtoXMMreg(-1, XMMGPR_LO, ((xmminfo&XMMINFO_READLO)?MODE_READ:0)|((xmminfo&XMMINFO_WRITELO)?MODE_WRITE:0))); + info |= PROCESS_EE_LO; + } + if( xmminfo & (XMMINFO_READHI|XMMINFO_WRITEHI) ) { + info |= PROCESS_EE_SET_HI(_allocGPRtoXMMreg(-1, XMMGPR_HI, ((xmminfo&XMMINFO_READHI)?MODE_READ:0)|((xmminfo&XMMINFO_WRITEHI)?MODE_WRITE:0))); + info |= PROCESS_EE_HI; + } + return info; +} + +// EE COP1(FPU) XMM allocation code +#define _Ft_ _Rt_ +#define _Fs_ _Rd_ +#define _Fd_ _Sa_ + +#define PROCESS_EE_SETMODES_XMM(mmreg) ((xmmregs[mmreg].mode&MODE_WRITE)?PROCESS_EE_MODEWRITES:0) +#define PROCESS_EE_SETMODET_XMM(mmreg) ((xmmregs[mmreg].mode&MODE_WRITE)?PROCESS_EE_MODEWRITET:0) + +// rd = rs op rt +void eeFPURecompileCode(R5900FNPTR_INFO xmmcode, R5900FNPTR_INFO fpucode, int xmminfo) +{ + int mmregs=-1, mmregt=-1, mmregd=-1, mmregacc=-1; + + if( EE_FPU_REGCACHING && cpucaps.hasStreamingSIMDExtensions ) { + int info = PROCESS_EE_XMM; + + if( xmminfo & XMMINFO_READS ) _addNeededFPtoXMMreg(_Fs_); + if( xmminfo & XMMINFO_READT ) _addNeededFPtoXMMreg(_Ft_); + if( xmminfo & (XMMINFO_WRITED|XMMINFO_READD) ) _addNeededFPtoXMMreg(_Fd_); + if( xmminfo & (XMMINFO_WRITEACC|XMMINFO_READACC) ) _addNeededFPACCtoXMMreg(); + + if( xmminfo & XMMINFO_READT ) { + if( g_pCurInstInfo->fpuregs[_Ft_] & EEINST_LASTUSE ) mmregt = _checkXMMreg(XMMTYPE_FPREG, _Ft_, MODE_READ); + else mmregt = _allocFPtoXMMreg(-1, _Ft_, MODE_READ); + } + + if( xmminfo & XMMINFO_READS ) { + if( (!(xmminfo&XMMINFO_READT)||mmregt>=0) && (g_pCurInstInfo->fpuregs[_Fs_] & EEINST_LASTUSE) ) + mmregs = _checkXMMreg(XMMTYPE_FPREG, _Fs_, MODE_READ); + else mmregs = _allocFPtoXMMreg(-1, _Fs_, MODE_READ); + } + + if( mmregs >= 0 ) info |= PROCESS_EE_SETMODES_XMM(mmregs); + if( mmregt >= 0 ) info |= PROCESS_EE_SETMODET_XMM(mmregt); + + if( xmminfo & XMMINFO_READD ) { + assert( xmminfo & XMMINFO_WRITED ); + mmregd = _allocFPtoXMMreg(-1, _Fd_, MODE_READ); + } + + if( xmminfo & XMMINFO_READACC ) { + if( !(xmminfo&XMMINFO_WRITEACC) && (g_pCurInstInfo->fpuregs[_Ft_] & EEINST_LASTUSE) ) + mmregacc = _checkXMMreg(XMMTYPE_FPACC, 0, MODE_READ); + else mmregacc = _allocFPACCtoXMMreg(-1, MODE_READ); + } + + if( xmminfo & XMMINFO_WRITEACC ) { + + // check for last used, if so don't alloc a new XMM reg + int readacc = MODE_WRITE|((xmminfo&XMMINFO_READACC)?MODE_READ:0); + + mmregacc = _checkXMMreg(XMMTYPE_FPACC, 0, readacc); + + if( mmregacc < 0 ) { + if( (xmminfo&XMMINFO_READT) && mmregt >= 0 && (FPUINST_LASTUSE(_Ft_) || !FPUINST_ISLIVE(_Ft_)) ) { + if( FPUINST_ISLIVE(_Ft_) ) { + _freeXMMreg(mmregt); + info &= ~PROCESS_EE_MODEWRITET; + } + _deleteMMXreg(MMX_FPU+XMMFPU_ACC, 2); + xmmregs[mmregt].inuse = 1; + xmmregs[mmregt].reg = 0; + xmmregs[mmregt].mode = readacc; + xmmregs[mmregt].type = XMMTYPE_FPACC; + mmregacc = mmregt; + } + else if( (xmminfo&XMMINFO_READS) && mmregs >= 0 && (FPUINST_LASTUSE(_Fs_) || !FPUINST_ISLIVE(_Fs_)) ) { + if( FPUINST_ISLIVE(_Fs_) ) { + _freeXMMreg(mmregs); + info &= ~PROCESS_EE_MODEWRITES; + } + _deleteMMXreg(MMX_FPU+XMMFPU_ACC, 2); + xmmregs[mmregs].inuse = 1; + xmmregs[mmregs].reg = 0; + xmmregs[mmregs].mode = readacc; + xmmregs[mmregs].type = XMMTYPE_FPACC; + mmregacc = mmregs; + } + else mmregacc = _allocFPACCtoXMMreg(-1, readacc); + } + + xmmregs[mmregacc].mode |= MODE_WRITE; + } + else if( xmminfo & XMMINFO_WRITED ) { + // check for last used, if so don't alloc a new XMM reg + int readd = MODE_WRITE|((xmminfo&XMMINFO_READD)?MODE_READ:0); + if( xmminfo&XMMINFO_READD ) mmregd = _allocFPtoXMMreg(-1, _Fd_, readd); + else mmregd = _checkXMMreg(XMMTYPE_FPREG, _Fd_, readd); + + if( mmregd < 0 ) { + if( (xmminfo&XMMINFO_READT) && mmregt >= 0 && (FPUINST_LASTUSE(_Ft_) || !FPUINST_ISLIVE(_Ft_)) ) { + if( FPUINST_ISLIVE(_Ft_) ) { + _freeXMMreg(mmregt); + info &= ~PROCESS_EE_MODEWRITET; + } + _deleteMMXreg(MMX_FPU+_Fd_, 2); + xmmregs[mmregt].inuse = 1; + xmmregs[mmregt].reg = _Fd_; + xmmregs[mmregt].mode = readd; + mmregd = mmregt; + } + else if( (xmminfo&XMMINFO_READS) && mmregs >= 0 && (FPUINST_LASTUSE(_Fs_) || !FPUINST_ISLIVE(_Fs_)) ) { + if( FPUINST_ISLIVE(_Fs_) ) { + _freeXMMreg(mmregs); + info &= ~PROCESS_EE_MODEWRITES; + } + _deleteMMXreg(MMX_FPU+_Fd_, 2); + xmmregs[mmregs].inuse = 1; + xmmregs[mmregs].reg = _Fd_; + xmmregs[mmregs].mode = readd; + mmregd = mmregs; + } + else if( (xmminfo&XMMINFO_READACC) && mmregacc >= 0 && (FPUINST_LASTUSE(XMMFPU_ACC) || !FPUINST_ISLIVE(XMMFPU_ACC)) ) { + if( FPUINST_ISLIVE(XMMFPU_ACC) ) + _freeXMMreg(mmregacc); + _deleteMMXreg(MMX_FPU+_Fd_, 2); + xmmregs[mmregacc].inuse = 1; + xmmregs[mmregacc].reg = _Fd_; + xmmregs[mmregacc].mode = readd; + xmmregs[mmregacc].type = XMMTYPE_FPREG; + mmregd = mmregacc; + } + else mmregd = _allocFPtoXMMreg(-1, _Fd_, readd); + } + } + + assert( mmregs >= 0 || mmregt >= 0 || mmregd >= 0 || mmregacc >= 0 ); + + if( xmminfo & XMMINFO_WRITED ) { + assert( mmregd >= 0 ); + info |= PROCESS_EE_SET_D(mmregd); + } + if( xmminfo & (XMMINFO_WRITEACC|XMMINFO_READACC) ) { + if( mmregacc >= 0 ) info |= PROCESS_EE_SET_ACC(mmregacc)|PROCESS_EE_ACC; + else assert( !(xmminfo&XMMINFO_WRITEACC)); + } + + if( xmminfo & XMMINFO_READS ) { + if( mmregs >= 0 ) info |= PROCESS_EE_SET_S(mmregs)|PROCESS_EE_S; + } + if( xmminfo & XMMINFO_READT ) { + if( mmregt >= 0 ) info |= PROCESS_EE_SET_T(mmregt)|PROCESS_EE_T; + } + + // at least one must be in xmm + if( (xmminfo & (XMMINFO_READS|XMMINFO_READT)) == (XMMINFO_READS|XMMINFO_READT) ) { + assert( mmregs >= 0 || mmregt >= 0 ); + } + + xmmcode(info); + _clearNeededXMMregs(); + return; + } + + if( xmminfo & XMMINFO_READS ) _deleteFPtoXMMreg(_Fs_, 0); + if( xmminfo & XMMINFO_READT ) _deleteFPtoXMMreg(_Ft_, 0); + if( xmminfo & (XMMINFO_READD|XMMINFO_WRITED) ) _deleteFPtoXMMreg(_Fd_, 0); + if( xmminfo & (XMMINFO_READACC|XMMINFO_WRITEACC) ) _deleteFPtoXMMreg(XMMFPU_ACC, 0); + fpucode(0); +} + +#undef _Ft_ +#undef _Fs_ +#undef _Fd_ + +//////////////////////////////////////////////////// +extern u8 g_MACFlagTransform[256]; // for vus + +u32 g_sseMXCSR = 0x9fc0; // disable all exception, round to 0, flush to 0 +u32 g_sseVUMXCSR = 0xff80; + +void SetCPUState(u32 sseMXCSR, u32 sseVUMXCSR) +{ + // SSE STATE // + // WARNING: do not touch unless you know what you are doing + + if( cpucaps.hasStreamingSIMDExtensions ) { + g_sseMXCSR = sseMXCSR; + g_sseVUMXCSR = sseVUMXCSR; + // do NOT set Denormals-Are-Zero flag (charlie and chocfac messes up) + // Update 11/05/08 - Doesnt seem to effect it anymore, for the speed boost, its on :p + //g_sseMXCSR = 0x9f80; // changing the rounding mode to 0x2000 (near) kills grandia III! + // changing the rounding mode to 0x0000 or 0x4000 totally kills gitaroo + // so... grandia III wins (you can change individual games with the 'roundmode' patch command) + +#ifdef _MSC_VER + __asm ldmxcsr g_sseMXCSR; // set the new sse control +#else + __asm__("ldmxcsr %0" : : "m"(g_sseMXCSR) ); +#endif + //g_sseVUMXCSR = g_sseMXCSR|0x6000; + } +} + +#define REC_CACHEMEM 0x01000000 + +int recInit( void ) +{ + int i; + const u8 macarr[16] = {0, 8, 4, 12, 2, 10, 6, 14, 1, 9, 5, 13, 3, 11, 7, 15 }; + + recLUT = (uptr*) _aligned_malloc( 0x010000 * sizeof(uptr), 16 ); + memset( recLUT, 0, 0x010000 * sizeof(uptr) ); + + // can't have upper 4 bits nonzero! + recMem = (char*)SysMmap(0x0d000000, REC_CACHEMEM); + + // 32 alignment necessary + recRAM = (BASEBLOCK*) _aligned_malloc( sizeof(BASEBLOCK)/4*0x02000000 , 4*sizeof(BASEBLOCK)); + recROM = (BASEBLOCK*) _aligned_malloc( sizeof(BASEBLOCK)/4*0x00400000 , 4*sizeof(BASEBLOCK)); + recROM1= (BASEBLOCK*) _aligned_malloc( sizeof(BASEBLOCK)/4*0x00040000 , 4*sizeof(BASEBLOCK)); + recBlocks = (BASEBLOCKEX*) _aligned_malloc( sizeof(BASEBLOCKEX)*EE_NUMBLOCKS, 16); + recStack = (char*)malloc( RECSTACK_SIZE ); + + s_nInstCacheSize = 128; + s_pInstCache = (EEINST*)malloc( sizeof(EEINST) * s_nInstCacheSize ); + + if ( recBlocks == NULL || recRAM == NULL || recROM == NULL || recROM1 == NULL || recMem == NULL || recLUT == NULL ) { + SysMessage( _( "Error allocating memory" ) ); + return -1; + } + + for ( i = 0x0000; i < 0x0200; i++ ) + { + recLUT[ i + 0x0000 ] = (uptr)&recRAM[ i << 14 ]; + recLUT[ i + 0x2000 ] = (uptr)&recRAM[ i << 14 ]; + recLUT[ i + 0x3000 ] = (uptr)&recRAM[ i << 14 ]; + } + + for ( i = 0x0000; i < 0x0040; i++ ) + { + recLUT[ i + 0x1fc0 ] = (uptr)&recROM[ i << 14 ]; + recLUT[ i + 0x9fc0 ] = (uptr)&recROM[ i << 14 ]; + recLUT[ i + 0xbfc0 ] = (uptr)&recROM[ i << 14 ]; + } + + for ( i = 0x0000; i < 0x0004; i++ ) + { + recLUT[ i + 0x1e00 ] = (uptr)&recROM1[ i << 14 ]; + recLUT[ i + 0x9e00 ] = (uptr)&recROM1[ i << 14 ]; + recLUT[ i + 0xbe00 ] = (uptr)&recROM1[ i << 14 ]; + } + + memcpy( recLUT + 0x8000, recLUT, 0x2000 * sizeof(uptr) ); + memcpy( recLUT + 0xa000, recLUT, 0x2000 * sizeof(uptr) ); + + memset(recMem, 0xcd, REC_CACHEMEM); + memset(recStack, 0, RECSTACK_SIZE); + + // SSE3 detection, manually create the code + x86SetPtr(recMem); + SSE3_MOVSLDUP_XMM_to_XMM(XMM0, XMM0); + RET(); + + cpudetectSSE3(recMem); + + x86SetPtr(recMem); + SSE4_DPPS_XMM_to_XMM(XMM0, XMM0, 0); + RET(); + + cpudetectSSE4(recMem); + + SysPrintf( "x86Init: \n" ); + SysPrintf( "\tCPU vender name = %s\n", cpuinfo.x86ID ); + SysPrintf( "\tFamilyID = %x\n", cpuinfo.x86StepID ); + SysPrintf( "\tx86Family = %s\n", cpuinfo.x86Fam ); + SysPrintf( "\tCPU speed = %d.%03d Ghz\n", cpuinfo.cpuspeed / 1000, cpuinfo.cpuspeed%1000); + SysPrintf( "\tx86PType = %s\n", cpuinfo.x86Type ); + SysPrintf( "\tx86Flags = %8.8x %8.8x\n", cpuinfo.x86Flags, cpuinfo.x86Flags2 ); + SysPrintf( "\tx86EFlags = %8.8x\n", cpuinfo.x86EFlags ); + SysPrintf( "Features: \n" ); + SysPrintf( "\t%sDetected MMX\n", cpucaps.hasMultimediaExtensions ? "" : "Not " ); + SysPrintf( "\t%sDetected SSE\n", cpucaps.hasStreamingSIMDExtensions ? "" : "Not " ); + SysPrintf( "\t%sDetected SSE2\n", cpucaps.hasStreamingSIMD2Extensions ? "" : "Not " ); + SysPrintf( "\t%sDetected SSE3\n", cpucaps.hasStreamingSIMD3Extensions ? "" : "Not " ); + SysPrintf( "\t%sDetected SSE4.1\n", cpucaps.hasStreamingSIMD4Extensions ? "" : "Not " ); + + if ( cpuinfo.x86ID[0] == 'A' ) //AMD cpu + { + SysPrintf( " Extented AMD Features: \n" ); + SysPrintf( "\t%sDetected MMX2\n", cpucaps.hasMultimediaExtensionsExt ? "" : "Not " ); + SysPrintf( "\t%sDetected 3DNOW\n", cpucaps.has3DNOWInstructionExtensions ? "" : "Not " ); + SysPrintf( "\t%sDetected 3DNOW2\n", cpucaps.has3DNOWInstructionExtensionsExt ? "" : "Not " ); + } + if ( !( cpucaps.hasMultimediaExtensions ) ) + { + SysMessage( _( "Processor doesn't supports MMX, can't run recompiler without that" ) ); + return -1; + } + + x86FpuState = FPU_STATE; + + SuperVUInit(-1); + + for(i = 0; i < 256; ++i) { + g_MACFlagTransform[i] = macarr[i>>4]|(macarr[i&15]<<4); + } + + SetCPUState(g_sseMXCSR, g_sseVUMXCSR); + + return 0; +} + +//////////////////////////////////////////////////// +void recReset( void ) { +#ifdef PCSX2_DEVBUILD + SysPrintf("EE Recompiler data reset\n"); +#endif + + s_nNextBlock = 0; + maxrecmem = 0; + memset( recRAM, 0, sizeof(BASEBLOCK)/4*0x02000000 ); + memset( recROM, 0, sizeof(BASEBLOCK)/4*0x00400000 ); + memset( recROM1, 0, sizeof(BASEBLOCK)/4*0x00040000 ); + memset( recBlocks, 0, sizeof(BASEBLOCKEX)*EE_NUMBLOCKS ); + if( s_pInstCache ) memset( s_pInstCache, 0, sizeof(EEINST)*s_nInstCacheSize ); + ResetBaseBlockEx(0); + +#ifdef _MSC_VER + __asm emms; +#else + __asm__("emms"); +#endif + +#ifdef _DEBUG + // don't clear since save states won't work + //memset(recMem, 0xcd, REC_CACHEMEM); +#endif + + recPtr = recMem; + recStackPtr = recStack; + x86FpuState = FPU_STATE; + iCWstate = 0; + + branch = 0; +} + +void recShutdown( void ) +{ + if ( recMem == NULL ) { + return; + } + + _aligned_free( recLUT ); + SysMunmap((uptr)recMem, REC_CACHEMEM); recMem = NULL; + _aligned_free( recRAM ); recRAM = NULL; + _aligned_free( recROM ); recROM = NULL; + _aligned_free( recROM1 ); recROM1 = NULL; + _aligned_free( recBlocks ); recBlocks = NULL; + free( s_pInstCache ); s_pInstCache = NULL; s_nInstCacheSize = 0; + + SuperVUDestroy(-1); + + x86Shutdown( ); +} + +void recEnableVU0micro(int enable) { +} + +void recEnableVU1micro(int enable) { +} + +#pragma warning(disable:4731) // frame pointer register 'ebp' modified by inline assembly code +static u32 s_uSaveESP = 0, s_uSaveEBP; + +static void execute( void ) +{ +#ifdef _DEBUG + u8* fnptr; + u32 oldesi; +#else + R5900FNPTR pfn; +#endif + BASEBLOCK* pblock = PC_GETBLOCK(cpuRegs.pc); + + if ( !pblock->pFnptr || pblock->startpc != cpuRegs.pc ) { + recRecompile(cpuRegs.pc); + } + + assert( pblock->pFnptr != 0 ); + g_EEFreezeRegs = 1; + + // skip the POPs +#ifdef _DEBUG + fnptr = (u8*)pblock->pFnptr; + +#ifdef _MSC_VER + __asm { + // save data + mov oldesi, esi + mov s_uSaveESP, esp + sub s_uSaveESP, 8 + mov s_uSaveEBP, ebp + push ebp + + call fnptr // jump into function + // restore data + pop ebp + mov esi, oldesi + } +#else + + __asm__("movl %%esi, %0\n" + "movl %%esp, %1\n" + "sub $8, %1\n" + "push %%ebp\n" + "call *%2\n" + "pop %%ebp\n" + "movl %0, %%esi\n" : "=m"(oldesi), "=m"(s_uSaveESP) : "c"(fnptr) ); +#endif // _MSC_VER + +#else + +#ifdef _MSC_VER + pfn = ((R5900FNPTR)pblock->pFnptr); + // use call instead of pfn() + __asm call pfn; +#else + ((R5900FNPTR)pblock->pFnptr)(); +#endif + +#endif + + g_EEFreezeRegs = 0; +} + +void recStep( void ) { +} + +void recExecute( void ) { + //SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_HIGHEST); + //SetPriorityClass(GetCurrentProcess(), HIGH_PRIORITY_CLASS);//ABOVE_NORMAL_PRIORITY_CLASS); + //SetThreadAffinityMask(GetCurrentThread(), 0); + if( Config.Options & PCSX2_EEREC ) Config.Options |= PCSX2_COP2REC; + + for (;;) + execute(); +} + +void recExecuteBlock( void ) { + execute(); +} + +//////////////////////////////////////////////////// +extern u32 g_nextBranchCycle; + +u32 g_lastpc = 0; +u32 g_EEDispatchTemp; +u32 s_pCurBlock_ltime; + +#ifdef _MSC_VER + +// jumped to when invalid pc address +__declspec(naked,noreturn) void Dispatcher() +{ + // EDX contains the jump addr to modify + __asm push edx + + // calc PC_GETBLOCK + s_pDispatchBlock = PC_GETBLOCK(cpuRegs.pc); + + __asm { + mov eax, s_pDispatchBlock + + // check if startpc == cpuRegs.pc + mov ecx, cpuRegs.pc + //and ecx, 0x5fffffff // remove higher bits + cmp ecx, dword ptr [eax+BLOCKTYPE_STARTPC] + je CheckPtr + + // recompile + push cpuRegs.pc // pc + call recRecompile + add esp, 4 // pop old param + mov eax, s_pDispatchBlock +CheckPtr: + mov eax, dword ptr [eax] + } + +#ifdef _DEBUG + __asm mov g_EEDispatchTemp, eax + assert( g_EEDispatchTemp ); +#endif + +// __asm { +// test eax, 0x40000000 // BLOCKTYPE_NEEDCLEAR +// jz Done +// // move new pc +// and eax, 0x0fffffff +// mov ecx, cpuRegs.pc +// mov dword ptr [eax+1], ecx +// } + __asm { + and eax, 0x0fffffff + mov edx, eax + pop ecx // x86Ptr to mod + sub edx, ecx + sub edx, 4 + mov dword ptr [ecx], edx + + jmp eax + } +} + +__declspec(naked,noreturn) void DispatcherClear() +{ + // EDX contains the current pc + __asm mov cpuRegs.pc, edx + __asm push edx + + // calc PC_GETBLOCK + s_pDispatchBlock = PC_GETBLOCK(cpuRegs.pc); + + if( s_pDispatchBlock->startpc == cpuRegs.pc ) { + assert( s_pDispatchBlock->pFnptr != 0 ); + + // already modded the code, jump to the new place + __asm { + pop edx + add esp, 4 // ignore stack + mov eax, s_pDispatchBlock + mov eax, dword ptr [eax] + and eax, 0x0fffffff + jmp eax + } + } + + __asm { + call recRecompile + add esp, 4 // pop old param + mov eax, s_pDispatchBlock + mov eax, dword ptr [eax] + + pop ecx // old fnptr + + and eax, 0x0fffffff + mov byte ptr [ecx], 0xe9 // jmp32 + mov edx, eax + sub edx, ecx + sub edx, 5 + mov dword ptr [ecx+1], edx + + jmp eax + } +} + +// called when jumping to variable pc address +__declspec(naked,noreturn) void DispatcherReg() +{ + __asm { + //s_pDispatchBlock = PC_GETBLOCK(cpuRegs.pc); + mov edx, cpuRegs.pc + mov ecx, edx + } + + __asm { + shr edx, 14 + and edx, 0xfffffffc + add edx, recLUT + mov edx, dword ptr [edx] + + mov eax, ecx + and eax, 0xfffc + // edx += 2*eax + shl eax, 1 + add edx, eax + + // check if startpc == cpuRegs.pc + mov eax, ecx + //and eax, 0x5fffffff // remove higher bits + cmp eax, dword ptr [edx+BLOCKTYPE_STARTPC] + jne recomp + + mov eax, dword ptr [edx] + } + +#ifdef _DEBUG + __asm mov g_EEDispatchTemp, eax + assert( g_EEDispatchTemp ); +#endif + + __asm { + and eax, 0x0fffffff + jmp eax // fnptr + +recomp: + sub esp, 8 + mov dword ptr [esp+4], edx + mov dword ptr [esp], ecx + call recRecompile + mov edx, dword ptr [esp+4] + add esp, 8 + + mov eax, dword ptr [edx] + and eax, 0x0fffffff + jmp eax // fnptr + } +} + +#ifdef PCSX2_DEVBUILD +__declspec(naked) void _StartPerfCounter() +{ + __asm { + push eax + push ebx + push ecx + + rdtsc + mov dword ptr [offset lbase], eax + mov dword ptr [offset lbase + 4], edx + + pop ecx + pop ebx + pop eax + ret + } +} + +__declspec(naked) void _StopPerfCounter() +{ + __asm { + push eax + push ebx + push ecx + + rdtsc + + sub eax, dword ptr [offset lbase] + sbb edx, dword ptr [offset lbase + 4] + mov ecx, s_pCurBlock_ltime + add eax, dword ptr [ecx] + adc edx, dword ptr [ecx + 4] + mov dword ptr [ecx], eax + mov dword ptr [ecx + 4], edx + pop ecx + pop ebx + pop eax + ret + } +} + +#endif // PCSX2_DEVBUILD + +#else // _MSC_VER + +extern void Dispatcher(); +extern void DispatcherClear(); +extern void DispatcherReg(); +extern void _StartPerfCounter(); +extern void _StopPerfCounter(); + +#endif + +#ifdef PCSX2_DEVBUILD +void StartPerfCounter() +{ +#ifdef PCSX2_DEVBUILD + if( s_startcount ) { + CALLFunc((u32)_StartPerfCounter); + } +#endif +} + +void StopPerfCounter() +{ +#ifdef PCSX2_DEVBUILD + if( s_startcount ) { + MOV32ItoM((u32)&s_pCurBlock_ltime, (u32)&s_pCurBlockEx->ltime); + CALLFunc((u32)_StopPerfCounter); + } +#endif +} +#endif + +//////////////////////////////////////////////////// +void recClear64(BASEBLOCK* p) +{ + int left = 4 - ((u32)p % 16)/sizeof(BASEBLOCK); + recClearMem(p); + + if( left > 1 && *(u32*)(p+1) ) recClearMem(p+1); +} + +void recClear128(BASEBLOCK* p) +{ + int left = 4 - ((u32)p % 32)/sizeof(BASEBLOCK); + recClearMem(p); + + if( left > 1 && *(u32*)(p+1) ) recClearMem(p+1); + if( left > 2 && *(u32*)(p+2) ) recClearMem(p+2); + if( left > 3 && *(u32*)(p+3) ) recClearMem(p+3); +} + +void recClear( u32 Addr, u32 Size ) +{ + u32 i; + for(i = 0; i < Size; ++i, Addr+=4) { + REC_CLEARM(Addr); + } +} + +#define EE_MIN_BLOCK_BYTES 15 + +void recClearMem(BASEBLOCK* p) +{ + BASEBLOCKEX* pexblock; + BASEBLOCK* pstart; + int lastdelay; + + // necessary since recompiler doesn't call femms/emms +#ifdef _MSC_VER + if (cpucaps.has3DNOWInstructionExtensions) __asm femms; + else __asm emms; +#else + if( cpucaps.has3DNOWInstructionExtensions )__asm__("femms"); + else __asm__("emms"); +#endif + + assert( p != NULL ); + + if( p->uType & BLOCKTYPE_DELAYSLOT ) { + recClearMem(p-1); + if( p->pFnptr == 0 ) + return; + } + + assert( p->pFnptr != 0 ); + assert( p->startpc ); + + x86Ptr = (s8*)p->pFnptr; + + // there is a small problem: mem can be ored with 0xa<<28 or 0x8<<28, and don't know which + MOV32ItoR(EDX, p->startpc); + PUSH32I((u32)x86Ptr); // will be replaced by JMP32 + JMP32((u32)DispatcherClear - ( (u32)x86Ptr + 5 )); + assert( x86Ptr == (s8*)p->pFnptr + EE_MIN_BLOCK_BYTES ); + + pstart = PC_GETBLOCK(p->startpc); + pexblock = PC_GETBLOCKEX(pstart); + assert( pexblock->startpc == pstart->startpc ); + + if( pexblock->startpc != pstart->startpc ) { + // some bug with ffx after beating a big snake in sewers + RemoveBaseBlockEx(pexblock, 0); + pexblock->size = 0; + pexblock->startpc = 0; + return; + } + +// if( pexblock->pOldFnptr ) { +// // have to mod oldfnptr too +// x86Ptr = pexblock->pOldFnptr; +// +// MOV32ItoR(EDX, p->startpc); +// JMP32((u32)DispatcherClear - ( (u32)x86Ptr + 5 )); +// } +// else +// pexblock->pOldFnptr = (u8*)p->pFnptr; + + // don't delete if last is delay + lastdelay = pexblock->size; + if( pstart[pexblock->size-1].uType & BLOCKTYPE_DELAYSLOT ) { + assert( pstart[pexblock->size-1].pFnptr != pstart->pFnptr ); + if( pstart[pexblock->size-1].pFnptr != 0 ) { + pstart[pexblock->size-1].uType = 0; + --lastdelay; + } + } + + memset(pstart, 0, lastdelay*sizeof(BASEBLOCK)); + + RemoveBaseBlockEx(pexblock, 0); + pexblock->size = 0; + pexblock->startpc = 0; +} + +// check for end of bios +void CheckForBIOSEnd() +{ + MOV32MtoR(EAX, (int)&cpuRegs.pc); + + CMP32ItoR(EAX, 0x00200008); + j8Ptr[0] = JE8(0); + + CMP32ItoR(EAX, 0x00100008); + j8Ptr[1] = JE8(0); + + // return + j8Ptr[2] = JMP8(0); + + x86SetJ8( j8Ptr[0] ); + x86SetJ8( j8Ptr[1] ); + + // bios end + RET2(); + + x86SetJ8( j8Ptr[2] ); +} + +static int *s_pCode; + +void SetBranchReg( u32 reg ) +{ + branch = 1; + + if( reg != 0xffffffff ) { +// if( GPR_IS_CONST1(reg) ) +// MOV32ItoM( (u32)&cpuRegs.pc, g_cpuConstRegs[reg].UL[0] ); +// else { +// int mmreg; +// +// if( (mmreg = _checkXMMreg(XMMTYPE_GPRREG, reg, MODE_READ)) >= 0 ) { +// SSE_MOVSS_XMM_to_M32((u32)&cpuRegs.pc, mmreg); +// } +// else if( (mmreg = _checkMMXreg(MMX_GPR+reg, MODE_READ)) >= 0 ) { +// MOVDMMXtoM((u32)&cpuRegs.pc, mmreg); +// SetMMXstate(); +// } +// else { +// MOV32MtoR(EAX, (int)&cpuRegs.GPR.r[ reg ].UL[ 0 ] ); +// MOV32RtoM((u32)&cpuRegs.pc, EAX); +// } +// } + _allocX86reg(ESI, X86TYPE_PCWRITEBACK, 0, MODE_WRITE); + _eeMoveGPRtoR(ESI, reg); + + recompileNextInstruction(1); + + if( x86regs[ESI].inuse ) { + assert( x86regs[ESI].type == X86TYPE_PCWRITEBACK ); + MOV32RtoM((int)&cpuRegs.pc, ESI); + x86regs[ESI].inuse = 0; + } + else { + MOV32MtoR(EAX, (u32)&g_recWriteback); + MOV32RtoM((int)&cpuRegs.pc, EAX); + } + } + +// CMP32ItoM((u32)&cpuRegs.pc, 0); +// j8Ptr[5] = JNE8(0); +// CALLFunc((u32)tempfn); +// x86SetJ8( j8Ptr[5] ); + + iFlushCall(FLUSH_EVERYTHING); + + iBranchTest(0xffffffff, 1); + if( bExecBIOS ) CheckForBIOSEnd(); + + JMP32((u32)DispatcherReg - ( (u32)x86Ptr + 5 )); +} + +void SetBranchImm( u32 imm ) +{ + u32* ptr; + branch = 1; + + assert( imm ); + + // end the current block + MOV32ItoM( (u32)&cpuRegs.pc, imm ); + iFlushCall(FLUSH_EVERYTHING); + + iBranchTest(imm, imm <= pc); + if( bExecBIOS ) CheckForBIOSEnd(); + + MOV32ItoR(EDX, 0); + ptr = (u32*)(x86Ptr-4); + *ptr = (u32)JMP32((u32)Dispatcher - ( (u32)x86Ptr + 5 )); +} + +void SaveBranchState() +{ + s_savex86FpuState = x86FpuState; + s_saveiCWstate = iCWstate; + s_savenBlockCycles = s_nBlockCycles; + s_saveConstGPRreg = 0xffffffff; // indicate searching + s_saveHasConstReg = g_cpuHasConstReg; + s_saveFlushedConstReg = g_cpuFlushedConstReg; + s_psaveInstInfo = g_pCurInstInfo; + s_saveRegHasLive1 = g_cpuRegHasLive1; + s_saveRegHasSignExt = g_cpuRegHasSignExt; + + // save all mmx regs + memcpy(s_saveMMXregs, mmxregs, sizeof(mmxregs)); + memcpy(s_saveXMMregs, xmmregs, sizeof(xmmregs)); +} + +void LoadBranchState() +{ + x86FpuState = s_savex86FpuState; + iCWstate = s_saveiCWstate; + s_nBlockCycles = s_savenBlockCycles; + + if( s_saveConstGPRreg != 0xffffffff ) { + assert( s_saveConstGPRreg > 0 ); + + // make sure right GPR was saved + assert( g_cpuHasConstReg == s_saveHasConstReg || (g_cpuHasConstReg ^ s_saveHasConstReg) == (1<visited, 1 ); + } +#endif + +#ifdef _DEBUG + //CALLFunc((u32)testfpu); +#endif + + if( !USE_FAST_BRANCHES || cpuBranch ) { + MOV32MtoR(ECX, (int)&cpuRegs.cycle); + ADD32ItoR(ECX, s_nBlockCycles*EECYCLE_MULT); // NOTE: mulitply cycles here, 6/5 ratio stops pal ffx from randomly crashing, but crashes jakI + MOV32RtoM((int)&cpuRegs.cycle, ECX); // update cycles + } + else { + ADD32ItoM((int)&cpuRegs.cycle, s_nBlockCycles*EECYCLE_MULT); + return; + } + + SUB32MtoR(ECX, (int)&g_nextBranchCycle); + + // check if should branch + j8Ptr[0] = JS8( 0 ); + + // has to be in the middle of Save/LoadBranchState + CALLFunc( (int)cpuBranchTest ); + + if( newpc != 0xffffffff ) { + CMP32ItoM((int)&cpuRegs.pc, newpc); + JNE32((u32)DispatcherReg - ( (u32)x86Ptr + 6 )); + } + + x86SetJ8( j8Ptr[0] ); +} + + +//////////////////////////////////////////////////// +#ifndef CP2_RECOMPILE + +REC_SYS(COP2); + +#else + +void recCOP2( void ) +{ +#ifdef CPU_LOG + CPU_LOG( "Recompiling COP2:%s\n", disR5900Fasm( cpuRegs.code, cpuRegs.pc ) ); +#endif + + if ( !cpucaps.hasStreamingSIMDExtensions ) { + MOV32ItoM( (u32)&cpuRegs.code, cpuRegs.code ); + MOV32ItoM( (u32)&cpuRegs.pc, pc ); + iFlushCall(FLUSH_EVERYTHING); + g_cpuHasConstReg = 1; // reset all since COP2 can change regs + CALLFunc( (u32)COP2 ); + + CMP32ItoM((int)&cpuRegs.pc, pc); + j8Ptr[0] = JE8(0); + ADD32ItoM((u32)&cpuRegs.cycle, s_nBlockCycles); + JMP32((u32)DispatcherReg - ( (u32)x86Ptr + 5 )); + x86SetJ8(j8Ptr[0]); + } + else + { + recCOP22( ); + } +} + +#endif + +//////////////////////////////////////////////////// +void recSYSCALL( void ) { + MOV32ItoM( (u32)&cpuRegs.code, cpuRegs.code ); + MOV32ItoM( (u32)&cpuRegs.pc, pc ); + iFlushCall(FLUSH_NODESTROY); + CALLFunc( (u32)SYSCALL ); + + CMP32ItoM((int)&cpuRegs.pc, pc); + j8Ptr[0] = JE8(0); + ADD32ItoM((u32)&cpuRegs.cycle, s_nBlockCycles); + JMP32((u32)DispatcherReg - ( (u32)x86Ptr + 5 )); + x86SetJ8(j8Ptr[0]); + //branch = 2; +} + +//////////////////////////////////////////////////// +void recBREAK( void ) { + MOV32ItoM( (u32)&cpuRegs.code, cpuRegs.code ); + MOV32ItoM( (u32)&cpuRegs.pc, pc ); + iFlushCall(FLUSH_EVERYTHING); + CALLFunc( (u32)BREAK ); + + CMP32ItoM((int)&cpuRegs.pc, pc); + j8Ptr[0] = JE8(0); + ADD32ItoM((u32)&cpuRegs.cycle, s_nBlockCycles); + RET(); + x86SetJ8(j8Ptr[0]); + //branch = 2; +} + +//////////////////////////////////////////////////// +//static void recCACHE( void ) { +// MOV32ItoM( (u32)&cpuRegs.code, cpuRegs.code ); +// MOV32ItoM( (u32)&cpuRegs.pc, pc ); +// iFlushCall(FLUSH_EVERYTHING); +// CALLFunc( (u32)CACHE ); +// //branch = 2; +// +// CMP32ItoM((int)&cpuRegs.pc, pc); +// j8Ptr[0] = JE8(0); +// RET(); +// x86SetJ8(j8Ptr[0]); +//} + + +void recPREF( void ) +{ +} + +void recSYNC( void ) +{ +} + +void recMFSA( void ) +{ + int mmreg; + if (!_Rd_) return; + + mmreg = _checkXMMreg(XMMTYPE_GPRREG, _Rd_, MODE_WRITE); + if( mmreg >= 0 ) { + SSE_MOVLPS_M64_to_XMM(mmreg, (u32)&cpuRegs.sa); + } + else if( (mmreg = _checkMMXreg(MMX_GPR+_Rd_, MODE_WRITE)) >= 0 ) { + MOVDMtoMMX(mmreg, (u32)&cpuRegs.sa); + SetMMXstate(); + } + else { + MOV32MtoR(EAX, (u32)&cpuRegs.sa); + _deleteEEreg(_Rd_, 0); + MOV32RtoM((u32)&cpuRegs.GPR.r[_Rd_].UL[0], EAX); + MOV32ItoM((u32)&cpuRegs.GPR.r[_Rd_].UL[1], 0); + } +} + +void recMTSA( void ) +{ + if( GPR_IS_CONST1(_Rs_) ) { + MOV32ItoM((u32)&cpuRegs.sa, g_cpuConstRegs[_Rs_].UL[0] ); + } + else { + int mmreg; + + if( (mmreg = _checkXMMreg(XMMTYPE_GPRREG, _Rs_, MODE_READ)) >= 0 ) { + SSE_MOVSS_XMM_to_M32((u32)&cpuRegs.sa, mmreg); + } + else if( (mmreg = _checkMMXreg(MMX_GPR+_Rs_, MODE_READ)) >= 0 ) { + MOVDMMXtoM((u32)&cpuRegs.sa, mmreg); + SetMMXstate(); + } + else { + MOV32MtoR(EAX, (u32)&cpuRegs.GPR.r[_Rs_].UL[0]); + MOV32RtoM((u32)&cpuRegs.sa, EAX); + } + } +} + +void recMTSAB( void ) +{ + if( GPR_IS_CONST1(_Rs_) ) { + MOV32ItoM((u32)&cpuRegs.sa, ((g_cpuConstRegs[_Rs_].UL[0] & 0xF) ^ (_Imm_ & 0xF)) << 3); + } + else { + _eeMoveGPRtoR(EAX, _Rs_); + AND32ItoR(EAX, 0xF); + XOR32ItoR(EAX, _Imm_&0xf); + SHL32ItoR(EAX, 3); + MOV32RtoM((u32)&cpuRegs.sa, EAX); + } +} + +void recMTSAH( void ) +{ + if( GPR_IS_CONST1(_Rs_) ) { + MOV32ItoM((u32)&cpuRegs.sa, ((g_cpuConstRegs[_Rs_].UL[0] & 0x7) ^ (_Imm_ & 0x7)) << 4); + } + else { + _eeMoveGPRtoR(EAX, _Rs_); + AND32ItoR(EAX, 0x7); + XOR32ItoR(EAX, _Imm_&0x7); + SHL32ItoR(EAX, 4); + MOV32RtoM((u32)&cpuRegs.sa, EAX); + } +} + +static void checkcodefn() +{ + int pctemp; + +#ifdef _MSC_VER + __asm mov pctemp, eax; +#else + __asm__("movl %%eax, %0" : "=m"(pctemp) ); +#endif + + SysPrintf("code changed! %x\n", pctemp); + assert(0); +} + +void checkpchanged(u32 startpc) +{ + assert(0); +} + +//#ifdef _DEBUG +//#define CHECK_XMMCHANGED() CALLFunc((u32)checkxmmchanged); +//#else +//#define CHECK_XMMCHANGED() +//#endif +// +//static void checkxmmchanged() +//{ +// assert( !g_globalMMXSaved ); +// assert( !g_globalXMMSaved ); +//} + +u32 recompileCodeSafe(u32 temppc) +{ + BASEBLOCK* pblock = PC_GETBLOCK(temppc); + + if( pblock->pFnptr != 0 && pblock->startpc != s_pCurBlock->startpc ) { + if( pc == pblock->startpc ) + return 0; + } + + return 1; +} + +void recompileNextInstruction(int delayslot) +{ + static u8 s_bFlushReg = 1; + int i, count; + + BASEBLOCK* pblock = PC_GETBLOCK(pc); + + // need *ppblock != s_pCurBlock because of branches + if( pblock->pFnptr != 0 && pblock->startpc != s_pCurBlock->startpc ) { + + if( !delayslot && pc == pblock->startpc ) { + // code already in place, so jump to it and exit recomp + assert( PC_GETBLOCKEX(pblock)->startpc == pblock->startpc ); + + iFlushCall(FLUSH_EVERYTHING); + MOV32ItoM((u32)&cpuRegs.pc, pc); + +// if( pexblock->pOldFnptr ) { +// // code already in place, so jump to it and exit recomp +// JMP32((u32)pexblock->pOldFnptr - ((u32)x86Ptr + 5)); +// branch = 3; +// return; +// } + + JMP32((u32)pblock->pFnptr - ((u32)x86Ptr + 5)); + branch = 3; + return; + } + else { + + if( !(delayslot && pblock->startpc == pc) ) { + s8* oldX86 = x86Ptr; + //__Log("clear block %x\n", pblock->startpc); + recClearMem(pblock); + x86Ptr = oldX86; + if( delayslot ) + SysPrintf("delay slot %x\n", pc); + } + } + } + + if( delayslot ) + pblock->uType = BLOCKTYPE_DELAYSLOT; + + s_pCode = (int *)PSM( pc ); + assert(s_pCode); + +#ifdef _DEBUG + MOV32ItoR(EAX, pc); +#endif + + cpuRegs.code = *(int *)s_pCode; + s_nBlockCycles++; + pc += 4; + +//#ifdef _DEBUG +// CMP32ItoM((u32)s_pCode, cpuRegs.code); +// j8Ptr[0] = JE8(0); +// MOV32ItoR(EAX, pc); +// CALLFunc((u32)checkcodefn); +// x86SetJ8( j8Ptr[ 0 ] ); +// +// if( !delayslot ) { +// CMP32ItoM((u32)&cpuRegs.pc, s_pCurBlockEx->startpc); +// j8Ptr[0] = JB8(0); +// CMP32ItoM((u32)&cpuRegs.pc, pc); +// j8Ptr[1] = JA8(0); +// j8Ptr[2] = JMP8(0); +// x86SetJ8( j8Ptr[ 0 ] ); +// x86SetJ8( j8Ptr[ 1 ] ); +// PUSH32I(s_pCurBlockEx->startpc); +// CALLFunc((u32)checkpchanged); +// ADD32ItoR(ESP, 4); +// x86SetJ8( j8Ptr[ 2 ] ); +// } +//#endif + + g_pCurInstInfo++; + + // reorder register priorities +// for(i = 0; i < X86REGS; ++i) { +// if( x86regs[i].inuse ) { +// if( count > 0 ) mmxregs[i].counter = 1000-count; +// else mmxregs[i].counter = 0; +// } +// } + + for(i = 0; i < MMXREGS; ++i) { + if( mmxregs[i].inuse ) { + assert( MMX_ISGPR(mmxregs[i].reg) ); + count = _recIsRegWritten(g_pCurInstInfo, (s_nEndBlock-pc)/4 + 1, XMMTYPE_GPRREG, mmxregs[i].reg-MMX_GPR); + if( count > 0 ) mmxregs[i].counter = 1000-count; + else mmxregs[i].counter = 0; + } + } + + for(i = 0; i < XMMREGS; ++i) { + if( xmmregs[i].inuse ) { + count = _recIsRegWritten(g_pCurInstInfo, (s_nEndBlock-pc)/4 + 1, xmmregs[i].type, xmmregs[i].reg); + if( count > 0 ) xmmregs[i].counter = 1000-count; + else xmmregs[i].counter = 0; + } + } + + // peephole optimizations + if( g_pCurInstInfo->info & EEINSTINFO_COREC ) { + +#ifdef PCSX2_VIRTUAL_MEM + if( g_pCurInstInfo->numpeeps > 1 ) { + switch(cpuRegs.code>>26) { + case 30: recLQ_coX(g_pCurInstInfo->numpeeps); break; + case 31: recSQ_coX(g_pCurInstInfo->numpeeps); break; + case 49: recLWC1_coX(g_pCurInstInfo->numpeeps); break; + case 57: recSWC1_coX(g_pCurInstInfo->numpeeps); break; + case 55: recLD_coX(g_pCurInstInfo->numpeeps); break; + case 63: recSD_coX(g_pCurInstInfo->numpeeps); break; + default: + assert(0); + } + + pc += g_pCurInstInfo->numpeeps*4; + s_nBlockCycles += g_pCurInstInfo->numpeeps; + g_pCurInstInfo += g_pCurInstInfo->numpeeps; + } + else { + recBSC_co[cpuRegs.code>>26](); + pc += 4; + s_nBlockCycles++; + g_pCurInstInfo++; + } +#else + assert(0); +#endif + } + else { + assert( !(g_pCurInstInfo->info & EEINSTINFO_NOREC) ); + + // if this instruction is a jump or a branch, exit right away + if( delayslot ) { + switch(cpuRegs.code>>26) { + case 1: + switch(_Rt_) { + case 0: case 1: case 2: case 3: case 0x10: case 0x11: case 0x12: case 0x13: + SysPrintf("branch %x in delay slot!\n", cpuRegs.code); + _clearNeededX86regs(); + _clearNeededMMXregs(); + _clearNeededXMMregs(); + return; + } + break; + + case 2: case 3: case 4: case 5: case 6: case 7: case 0x14: case 0x15: case 0x16: case 0x17: + SysPrintf("branch %x in delay slot!\n", cpuRegs.code); + _clearNeededX86regs(); + _clearNeededMMXregs(); + _clearNeededXMMregs(); + return; + } + } + recBSC[ cpuRegs.code >> 26 ](); + } + + if( !delayslot ) { + if( s_bFlushReg ) { + //if( !_flushUnusedConstReg() ) { + int flushed = 0; + if( _getNumMMXwrite() > 3 ) flushed = _flushMMXunused(); + if( !flushed && _getNumXMMwrite() > 2 ) _flushXMMunused(); + s_bFlushReg = !flushed; +// } +// else s_bFlushReg = 0; + } + else s_bFlushReg = 1; + } + else s_bFlushReg = 1; + + //CHECK_XMMCHANGED(); + _clearNeededX86regs(); + _clearNeededMMXregs(); + _clearNeededXMMregs(); + +// _freeXMMregs(); +// _freeMMXregs(); +// _flushCachedRegs(); +// g_cpuHasConstReg = 1; +} + +//__declspec(naked) void iDummyBlock() +//{ +//// g_lastpc = cpuRegs.pc; +//// +//// do { +//// cpuRegs.cycle = g_nextBranchCycle; +//// cpuBranchTest(); +//// } while(g_lastpc == cpuRegs.pc); +//// +//// __asm jmp DispatcherReg +// __asm { +//RepDummy: +// add cpuRegs.cycle, 9 +// call cpuBranchTest +// cmp cpuRegs.pc, 0x81fc0 +// je RepDummy +// jmp DispatcherReg +// } +//} + +//////////////////////////////////////////////////// +#include "R3000A.h" +#include "PsxCounters.h" +#include "PsxMem.h" +extern tIPU_BP g_BP; + +extern u32 psxdump; +extern u32 psxNextCounter, psxNextsCounter; +extern void iDumpPsxRegisters(u32 startpc, u32 temp); +extern Counter counters[6]; extern int rdram_devices; // put 8 for TOOL and 2 for PS2 and PSX -extern int rdram_sdevid; - -void iDumpRegisters(u32 startpc, u32 temp) -{ - int i; - char* pstr = temp ? "t" : ""; - const u32 dmacs[] = {0x8000, 0x9000, 0xa000, 0xb000, 0xb400, 0xc000, 0xc400, 0xc800, 0xd000, 0xd400 }; - extern char *disRNameGPR[]; - char* psymb; - - psymb = disR5900GetSym(startpc); - - if( psymb != NULL ) - __Log("%sreg(%s): %x %x c:%x\n", pstr, psymb, startpc, cpuRegs.interrupt, cpuRegs.cycle); - else - __Log("%sreg: %x %x c:%x\n", pstr, startpc, cpuRegs.interrupt, cpuRegs.cycle); - for(i = 1; i < 32; ++i) __Log("%s: %x_%x_%x_%x\n", disRNameGPR[i], cpuRegs.GPR.r[i].UL[3], cpuRegs.GPR.r[i].UL[2], cpuRegs.GPR.r[i].UL[1], cpuRegs.GPR.r[i].UL[0]); - //for(i = 0; i < 32; i+=4) __Log("cp%d: %x_%x_%x_%x\n", i, cpuRegs.CP0.r[i], cpuRegs.CP0.r[i+1], cpuRegs.CP0.r[i+2], cpuRegs.CP0.r[i+3]); - //for(i = 0; i < 32; ++i) __Log("%sf%d: %f %x\n", pstr, i, fpuRegs.fpr[i].f, fpuRegs.fprc[i]); - //for(i = 1; i < 32; ++i) __Log("%svf%d: %f %f %f %f, vi: %x\n", pstr, i, VU0.VF[i].F[3], VU0.VF[i].F[2], VU0.VF[i].F[1], VU0.VF[i].F[0], VU0.VI[i].UL); - for(i = 0; i < 32; ++i) __Log("%sf%d: %x %x\n", pstr, i, fpuRegs.fpr[i].UL, fpuRegs.fprc[i]); - for(i = 1; i < 32; ++i) __Log("%svf%d: %x %x %x %x, vi: %x\n", pstr, i, VU0.VF[i].UL[3], VU0.VF[i].UL[2], VU0.VF[i].UL[1], VU0.VF[i].UL[0], VU0.VI[i].UL); - __Log("%svfACC: %x %x %x %x\n", pstr, VU0.ACC.UL[3], VU0.ACC.UL[2], VU0.ACC.UL[1], VU0.ACC.UL[0]); - __Log("%sLO: %x_%x_%x_%x, HI: %x_%x_%x_%x\n", pstr, cpuRegs.LO.UL[3], cpuRegs.LO.UL[2], cpuRegs.LO.UL[1], cpuRegs.LO.UL[0], - cpuRegs.HI.UL[3], cpuRegs.HI.UL[2], cpuRegs.HI.UL[1], cpuRegs.HI.UL[0]); - __Log("%sCycle: %x %x, Count: %x\n", pstr, cpuRegs.cycle, g_nextBranchCycle, cpuRegs.CP0.n.Count); - iDumpPsxRegisters(psxRegs.pc, temp); - - __Log("f410,30,40: %x %x %x, %d %d\n", psHu32(0xf410), psHu32(0xf430), psHu32(0xf440), rdram_sdevid, rdram_devices); - __Log("cyc11: %x %x; vu0: %x, vu1: %x\n", cpuRegs.sCycle[1], cpuRegs.eCycle[1], VU0.cycle, VU1.cycle); - - __Log("%scounters: %x %x; psx: %x %x\n", pstr, nextsCounter, nextCounter, psxNextsCounter, psxNextCounter); - for(i = 0; i < 4; ++i) { - __Log("eetimer%d: count: %x mode: %x target: %x %x; %x %x; %x %x %x %x\n", i, - counters[i].count, counters[i].mode, counters[i].target, counters[i].hold, counters[i].rate, - counters[i].interrupt, counters[i].Cycle, counters[i].sCycle, counters[i].CycleT, counters[i].sCycleT); - } - __Log("VIF0_STAT = %x, VIF1_STAT = %x\n", psHu32(0x3800), psHu32(0x3C00)); - __Log("ipu %x %x %x %x; bp: %x %x %x %x\n", psHu32(0x2000), psHu32(0x2010), psHu32(0x2020), psHu32(0x2030), g_BP.BP, g_BP.bufferhasnew, g_BP.FP, g_BP.IFC); - __Log("gif: %x %x %x\n", psHu32(0x3000), psHu32(0x3010), psHu32(0x3020)); - for(i = 0; i < ARRAYSIZE(dmacs); ++i) { - DMACh* p = (DMACh*)(PS2MEM_HW+dmacs[i]); - __Log("dma%d c%x m%x q%x t%x s%x\n", i, p->chcr, p->madr, p->qwc, p->tadr, p->sadr); - } - __Log("dmac %x %x %x %x\n", psHu32(DMAC_CTRL), psHu32(DMAC_STAT), psHu32(DMAC_RBSR), psHu32(DMAC_RBOR)); - __Log("intc %x %x\n", psHu32(INTC_STAT), psHu32(INTC_MASK)); - __Log("sif: %x %x %x %x %x\n", psHu32(0xf200), psHu32(0xf220), psHu32(0xf230), psHu32(0xf240), psHu32(0xf260)); -} - -extern u32 psxdump; - -static void printfn() -{ - static int lastrec = 0; - static int curcount = 0, count2 = 0; - const int skip = 0; - static int i; - - assert( !g_globalMMXSaved ); - assert( !g_globalXMMSaved ); - - if( (dumplog&2) && g_lastpc != 0x81fc0 ) {//&& lastrec != g_lastpc ) { - curcount++; - - if( curcount > skip ) { - iDumpRegisters(g_lastpc, 1); - curcount = 0; - } - - lastrec = g_lastpc; - } -} - -u32 s_recblocks[] = {0}; - -void badespfn() { - assert(0); - SysPrintf("Bad esp!\n"); -} - -#define OPTIMIZE_COP2 0//CHECK_VU0REC - -void recRecompile( u32 startpc ) -{ - u32 i = 0; - u32 branchTo; - u32 willbranch3 = 0; - u32* ptr; - u32 usecop2; - -#ifdef _DEBUG - //dumplog |= 4; - if( dumplog & 4 ) - iDumpRegisters(startpc, 0); -#endif - - assert( startpc ); - - // if recPtr reached the mem limit reset whole mem - if ( ( (uptr)recPtr - (uptr)recMem ) >= REC_CACHEMEM-0x40000 || dumplog == 0xffffffff) { - recReset(); - } - if ( ( (uptr)recStackPtr - (uptr)recStack ) >= RECSTACK_SIZE-0x100 ) { -#ifdef _DEBUG - SysPrintf("stack reset\n"); -#endif - recReset(); - } - - s_pCurBlock = PC_GETBLOCK(startpc); - - if( s_pCurBlock->pFnptr ) { - // clear if already taken - assert( s_pCurBlock->startpc < startpc ); - recClearMem(s_pCurBlock); - } - - if( s_pCurBlock->startpc == startpc ) { - s_pCurBlockEx = PC_GETBLOCKEX(s_pCurBlock); - assert( s_pCurBlockEx->startpc == startpc ); - } - else { - s_pCurBlockEx = NULL; - for(i = 0; i < EE_NUMBLOCKS; ++i) { - if( recBlocks[(i+s_nNextBlock)%EE_NUMBLOCKS].size == 0 ) { - s_pCurBlockEx = recBlocks+(i+s_nNextBlock)%EE_NUMBLOCKS; - s_nNextBlock = (i+s_nNextBlock+1)%EE_NUMBLOCKS; - break; - } - } - - if( s_pCurBlockEx == NULL ) { - //SysPrintf("ee reset (blocks)\n"); - recReset(); - s_nNextBlock = 0; - s_pCurBlockEx = recBlocks; - } - - s_pCurBlockEx->startpc = startpc; - } - - x86SetPtr( recPtr ); - x86Align(16); - recPtr = x86Ptr; - s_pCurBlock->pFnptr = (u32)x86Ptr; - s_pCurBlock->startpc = startpc; - - // slower -// if( startpc == 0x81fc0 ) { -// -// MOV32MtoR(ECX, (u32)&g_nextBranchCycle); -// MOV32RtoM((u32)&cpuRegs.cycle, ECX); -// //ADD32ItoR(ECX, 9); -// //ADD32ItoM((u32)&cpuRegs.cycle, 512); -// CALLFunc((u32)cpuBranchTest); -// CMP32ItoM((u32)&cpuRegs.pc, 0x81fc0); -// JE8(s_pCurBlock->pFnptr - (u32)(x86Ptr+2) ); -// JMP32((u32)DispatcherReg - (u32)(x86Ptr+5)); -// -// pc = startpc + 9*4; -// assert( (pc-startpc)>>2 <= 0xffff ); -// s_pCurBlockEx->size = (pc-startpc)>>2; -// -// for(i = 1; i < (u32)s_pCurBlockEx->size-1; ++i) { -// s_pCurBlock[i].pFnptr = s_pCurBlock->pFnptr; -// s_pCurBlock[i].startpc = s_pCurBlock->startpc; -// } -// -// // don't overwrite if delay slot -// if( i < (u32)s_pCurBlockEx->size && !(s_pCurBlock[i].uType & BLOCKTYPE_DELAYSLOT) ) { -// s_pCurBlock[i].pFnptr = s_pCurBlock->pFnptr; -// s_pCurBlock[i].startpc = s_pCurBlock->startpc; -// } -// -// // set the block ptr -// AddBaseBlockEx(s_pCurBlockEx, 0); -// -// if( !(pc&0x10000000) ) -// maxrecmem = max( (pc&~0xa0000000), maxrecmem ); -// -// recPtr = x86Ptr; -// return; -// } - - branch = 0; - - // reset recomp state variables - s_nBlockCycles = 0; - pc = startpc; - x86FpuState = FPU_STATE; - iCWstate = 0; - s_saveConstGPRreg = 0; - g_cpuHasConstReg = g_cpuFlushedConstReg = 1; - g_cpuPrevRegHasLive1 = g_cpuRegHasLive1 = 0xffffffff; - g_cpuPrevRegHasSignExt = g_cpuRegHasSignExt = 0; - _recClearWritebacks(); - assert( g_cpuConstRegs[0].UD[0] == 0 ); - - _initX86regs(); - _initXMMregs(); - _initMMXregs(); - -#ifdef _DEBUG - // for debugging purposes - MOV32ItoM((u32)&g_lastpc, pc); - CALLFunc((u32)printfn); - -// CMP32MtoR(EBP, (u32)&s_uSaveEBP); -// j8Ptr[0] = JE8(0); -// CALLFunc((u32)badespfn); -// x86SetJ8(j8Ptr[0]); -#endif - - // go until the next branch - i = startpc; - s_nEndBlock = 0xffffffff; - s_nHasDelay = 0; - - while(1) { - BASEBLOCK* pblock = PC_GETBLOCK(i); - if( pblock->pFnptr != 0 && pblock->startpc != s_pCurBlock->startpc ) { - - if( i == pblock->startpc ) { - // branch = 3 - willbranch3 = 1; - s_nEndBlock = i; - break; - } - } - - cpuRegs.code = *(int *)PSM(i); - - switch(cpuRegs.code >> 26) { - case 0: // special - - if( _Funct_ == 8 || _Funct_ == 9 ) { // JR, JALR - s_nEndBlock = i + 8; - s_nHasDelay = 1; - goto StartRecomp; - } - - break; - case 1: // regimm - - if( _Rt_ < 4 || (_Rt_ >= 16 && _Rt_ < 20) ) { - // branches - if( _Rt_ == 2 && _Rt_ == 3 && _Rt_ == 18 && _Rt_ == 19 ) s_nHasDelay = 1; - else s_nHasDelay = 2; - - branchTo = _Imm_ * 4 + i + 4; - if( branchTo > startpc && branchTo < i ) s_nEndBlock = branchTo; - else s_nEndBlock = i+8; - - goto StartRecomp; - } - - break; - - case 2: // J - case 3: // JAL - s_nHasDelay = 1; - s_nEndBlock = i + 8; - goto StartRecomp; - - // branches - case 4: case 5: case 6: case 7: - case 20: case 21: case 22: case 23: - - if( (cpuRegs.code >> 26) >= 20 ) s_nHasDelay = 1; - else s_nHasDelay = 2; - - branchTo = _Imm_ * 4 + i + 4; - if( branchTo > startpc && branchTo < i ) s_nEndBlock = branchTo; - else s_nEndBlock = i+8; - - goto StartRecomp; - - case 16: // cp0 - if( _Rs_ == 16 ) { - if( _Funct_ == 24 ) { // eret - s_nEndBlock = i+4; - goto StartRecomp; - } - } - - break; - case 17: // cp1 - case 18: // cp2 - if( _Rs_ == 8 ) { - // BC1F, BC1T, BC1FL, BC1TL - // BC2F, BC2T, BC2FL, BC2TL - if( _Rt_ >= 2 ) s_nHasDelay = 1; - else s_nHasDelay = 2; - - branchTo = _Imm_ * 4 + i + 4; - if( branchTo > startpc && branchTo < i ) s_nEndBlock = branchTo; - else s_nEndBlock = i+8; - - goto StartRecomp; - } - break; - } - - i += 4; - } - -StartRecomp: - - // rec info // - { - EEINST* pcur; - - if( s_nInstCacheSize < (s_nEndBlock-startpc)/4+1 ) { - free(s_pInstCache); - s_nInstCacheSize = (s_nEndBlock-startpc)/4+10; - s_pInstCache = (EEINST*)malloc(sizeof(EEINST)*s_nInstCacheSize); - assert( s_pInstCache != NULL ); - } - - pcur = s_pInstCache + (s_nEndBlock-startpc)/4; - _recClearInst(pcur); - pcur->info = 0; - - for(i = s_nEndBlock; i > startpc; i -= 4 ) { - cpuRegs.code = *(int *)PSM(i-4); - pcur[-1] = pcur[0]; - rpropBSC(pcur-1, pcur); - pcur--; - } - } - - // analyze instructions // - { - usecop2 = 0; - g_pCurInstInfo = s_pInstCache; - - for(i = startpc; i < s_nEndBlock; i += 4) { - g_pCurInstInfo++; - cpuRegs.code = *(u32*)PSM(i); - - // cop2 // - if( g_pCurInstInfo->info & EEINSTINFO_COP2 ) { - - if( !usecop2 ) { - // init - if( OPTIMIZE_COP2 ) { - memset(VU0.fmac,0,sizeof(VU0.fmac)); - memset(&VU0.fdiv,0,sizeof(VU0.fdiv)); - memset(&VU0.efu,0,sizeof(VU0.efu)); - } - vucycle = 0; - usecop2 = 1; - } - - VU0.code = cpuRegs.code; - _cop2AnalyzeOp(g_pCurInstInfo, OPTIMIZE_COP2); - continue; - } - - if( usecop2 ) vucycle++; - - // peephole optimizations // -#ifdef PCSX2_VIRTUAL_MEM - if( i < s_nEndBlock-4 && recompileCodeSafe(i) ) { - u32 curcode = cpuRegs.code; - u32 nextcode = *(u32*)PSM(i+4); - if( _eeIsLoadStoreCoIssue(curcode, nextcode) && recBSC_co[curcode>>26] != NULL ) { - - // rs has to be the same, and cannot be just written - if( ((curcode >> 21) & 0x1F) == ((nextcode >> 21) & 0x1F) && !_eeLoadWritesRs(curcode) ) { - - if( _eeIsLoadStoreCoX(curcode) && ((nextcode>>16)&0x1f) != ((curcode>>21)&0x1f) ) { - // see how many stores there are - u32 j; - // use xmmregs since only supporting lwc1,lq,swc1,sq - for(j = i+8; j < s_nEndBlock && j < i+4*XMMREGS; j += 4 ) { - u32 nncode = *(u32*)PSM(j); - if( (nncode>>26) != (curcode>>26) || ((curcode>>21)&0x1f) != ((nncode>>21)&0x1f) || - _eeLoadWritesRs(nncode)) - break; - } - - if( j > i+8 ) { - u32 num = (j-i)>>2; // number of stores that can coissue - assert( num <= XMMREGS ); - - g_pCurInstInfo[0].numpeeps = num-1; - g_pCurInstInfo[0].info |= EEINSTINFO_COREC; - - while(i < j-4) { - g_pCurInstInfo++; - g_pCurInstInfo[0].info |= EEINSTINFO_NOREC; - i += 4; - } - - continue; - } - - // fall through - } - - // unaligned loadstores - - // if LWL, check if LWR and that offsets are +3 away - switch(curcode >> 26) { - case 0x22: // LWL - if( (nextcode>>26) != 0x26 || ((s16)nextcode)+3 != (s16)curcode ) - continue; - break; - case 0x26: // LWR - if( (nextcode>>26) != 0x22 || ((s16)nextcode) != (s16)curcode+3 ) - continue; - break; - - case 0x2a: // SWL - if( (nextcode>>26) != 0x2e || ((s16)nextcode)+3 != (s16)curcode ) - continue; - break; - case 0x2e: // SWR - if( (nextcode>>26) != 0x2a || ((s16)nextcode) != (s16)curcode+3 ) - continue; - break; - - case 0x1a: // LDL - if( (nextcode>>26) != 0x1b || ((s16)nextcode)+7 != (s16)curcode ) - continue; - break; - case 0x1b: // LWR - if( (nextcode>>26) != 0x1aa || ((s16)nextcode) != (s16)curcode+7 ) - continue; - break; - - case 0x2c: // SWL - if( (nextcode>>26) != 0x2d || ((s16)nextcode)+7 != (s16)curcode ) - continue; - break; - case 0x2d: // SWR - if( (nextcode>>26) != 0x2c || ((s16)nextcode) != (s16)curcode+7 ) - continue; - break; - } - - // good enough - g_pCurInstInfo[0].info |= EEINSTINFO_COREC; - g_pCurInstInfo[0].numpeeps = 1; - g_pCurInstInfo[1].info |= EEINSTINFO_NOREC; - g_pCurInstInfo++; - i += 4; - continue; - } - } - } -#endif // end peephole - } - - if( usecop2 ) { - // add necessary mac writebacks - g_pCurInstInfo = s_pInstCache; - - for(i = startpc; i < s_nEndBlock-4; i += 4) { - g_pCurInstInfo++; - - if( g_pCurInstInfo->info & EEINSTINFO_COP2 ) { - } - } - } - } - - // perf counters // -#ifdef PCSX2_DEVBUILD - s_startcount = 0; -// if( pc+32 < s_nEndBlock ) { -// // only blocks with more than 8 insts -// //PUSH32I((u32)&lbase); -// //CALLFunc((u32)QueryPerformanceCounter); -// lbase.QuadPart = GetCPUTick(); -// s_startcount = 1; -// } -#endif - -#ifdef _DEBUG - // dump code - for(i = 0; i < ARRAYSIZE(s_recblocks); ++i) { - if( startpc == s_recblocks[i] ) { - iDumpBlock(startpc, recPtr); - } - } - - if( (dumplog & 1) ) //|| usecop2 ) - iDumpBlock(startpc, recPtr); -#endif - - // finally recompile // - g_pCurInstInfo = s_pInstCache; - while (!branch && pc < s_nEndBlock) { - recompileNextInstruction(0); - } - -#ifdef _DEBUG - if( (dumplog & 1) ) - iDumpBlock(startpc, recPtr); -#endif - - assert( (pc-startpc)>>2 <= 0xffff ); - s_pCurBlockEx->size = (pc-startpc)>>2; - - for(i = 1; i < (u32)s_pCurBlockEx->size-1; ++i) { - s_pCurBlock[i].pFnptr = s_pCurBlock->pFnptr; - s_pCurBlock[i].startpc = s_pCurBlock->startpc; - } - - // don't overwrite if delay slot - if( i < (u32)s_pCurBlockEx->size && !(s_pCurBlock[i].uType & BLOCKTYPE_DELAYSLOT) ) { - s_pCurBlock[i].pFnptr = s_pCurBlock->pFnptr; - s_pCurBlock[i].startpc = s_pCurBlock->startpc; - } - - // set the block ptr - AddBaseBlockEx(s_pCurBlockEx, 0); -// if( p[1].startpc == p[0].startpc + 4 ) { -// assert( p[1].pFnptr != 0 ); -// // already fn in place, so add to list -// AddBaseBlockEx(s_pCurBlockEx, 0); -// } -// else -// *(BASEBLOCKEX**)(p+1) = pex; -// } - - //PC_SETBLOCKEX(s_pCurBlock, s_pCurBlockEx); - - if( !(pc&0x10000000) ) - maxrecmem = max( (pc&~0xa0000000), maxrecmem ); - - if( branch == 2 ) { - iFlushCall(FLUSH_EVERYTHING); - - iBranchTest(0xffffffff, 1); - if( bExecBIOS ) CheckForBIOSEnd(); - - JMP32((u32)DispatcherReg - ( (u32)x86Ptr + 5 )); - } - else { - assert( branch != 3 ); - if( branch ) assert( !willbranch3 ); - else ADD32ItoM((int)&cpuRegs.cycle, s_nBlockCycles*EECYCLE_MULT); - - if( willbranch3 ) { - BASEBLOCK* pblock = PC_GETBLOCK(s_nEndBlock); - assert( pc == s_nEndBlock ); - iFlushCall(FLUSH_EVERYTHING); - MOV32ItoM((u32)&cpuRegs.pc, pc); - JMP32((u32)pblock->pFnptr - ((u32)x86Ptr + 5)); - branch = 3; - } - else if( !branch ) { - // didn't branch, but had to stop - MOV32ItoM( (u32)&cpuRegs.pc, pc ); - - iFlushCall(FLUSH_EVERYTHING); - - ptr = JMP32(0); - } - } - - assert( x86Ptr >= (s8*)s_pCurBlock->pFnptr + EE_MIN_BLOCK_BYTES ); - assert( x86Ptr < recMem+REC_CACHEMEM ); - assert( recStackPtr < recStack+RECSTACK_SIZE ); - assert( x86FpuState == 0 ); - - recPtr = x86Ptr; - - assert( (g_cpuHasConstReg&g_cpuFlushedConstReg) == g_cpuHasConstReg ); - - if( !branch ) { - BASEBLOCK* pcurblock = s_pCurBlock; - u32 nEndBlock = s_nEndBlock; - s_pCurBlock = PC_GETBLOCK(pc); - assert( ptr != NULL ); - - if( s_pCurBlock->startpc != pc ) - recRecompile(pc); - - if( pcurblock->startpc == startpc ) { - assert( pcurblock->pFnptr ); - assert( s_pCurBlock->startpc == nEndBlock ); - *ptr = s_pCurBlock->pFnptr - ( (u32)ptr + 4 ); - } - else { - recRecompile(startpc); - assert( pcurblock->pFnptr != 0 ); - } - } -} - -R5900cpu recCpu = { - recInit, - recReset, - recStep, - recExecute, - recExecuteBlock, - recExecuteVU0Block, - recExecuteVU1Block, - recEnableVU0micro, - recEnableVU1micro, - recClear, - recClearVU0, - recClearVU1, - recShutdown -}; - -#endif // PCSX2_NORECBUILD +extern int rdram_sdevid; + +void iDumpRegisters(u32 startpc, u32 temp) +{ + int i; + char* pstr = temp ? "t" : ""; + const u32 dmacs[] = {0x8000, 0x9000, 0xa000, 0xb000, 0xb400, 0xc000, 0xc400, 0xc800, 0xd000, 0xd400 }; + extern char *disRNameGPR[]; + char* psymb; + + psymb = disR5900GetSym(startpc); + + if( psymb != NULL ) + __Log("%sreg(%s): %x %x c:%x\n", pstr, psymb, startpc, cpuRegs.interrupt, cpuRegs.cycle); + else + __Log("%sreg: %x %x c:%x\n", pstr, startpc, cpuRegs.interrupt, cpuRegs.cycle); + for(i = 1; i < 32; ++i) __Log("%s: %x_%x_%x_%x\n", disRNameGPR[i], cpuRegs.GPR.r[i].UL[3], cpuRegs.GPR.r[i].UL[2], cpuRegs.GPR.r[i].UL[1], cpuRegs.GPR.r[i].UL[0]); + //for(i = 0; i < 32; i+=4) __Log("cp%d: %x_%x_%x_%x\n", i, cpuRegs.CP0.r[i], cpuRegs.CP0.r[i+1], cpuRegs.CP0.r[i+2], cpuRegs.CP0.r[i+3]); + //for(i = 0; i < 32; ++i) __Log("%sf%d: %f %x\n", pstr, i, fpuRegs.fpr[i].f, fpuRegs.fprc[i]); + //for(i = 1; i < 32; ++i) __Log("%svf%d: %f %f %f %f, vi: %x\n", pstr, i, VU0.VF[i].F[3], VU0.VF[i].F[2], VU0.VF[i].F[1], VU0.VF[i].F[0], VU0.VI[i].UL); + for(i = 0; i < 32; ++i) __Log("%sf%d: %x %x\n", pstr, i, fpuRegs.fpr[i].UL, fpuRegs.fprc[i]); + for(i = 1; i < 32; ++i) __Log("%svf%d: %x %x %x %x, vi: %x\n", pstr, i, VU0.VF[i].UL[3], VU0.VF[i].UL[2], VU0.VF[i].UL[1], VU0.VF[i].UL[0], VU0.VI[i].UL); + __Log("%svfACC: %x %x %x %x\n", pstr, VU0.ACC.UL[3], VU0.ACC.UL[2], VU0.ACC.UL[1], VU0.ACC.UL[0]); + __Log("%sLO: %x_%x_%x_%x, HI: %x_%x_%x_%x\n", pstr, cpuRegs.LO.UL[3], cpuRegs.LO.UL[2], cpuRegs.LO.UL[1], cpuRegs.LO.UL[0], + cpuRegs.HI.UL[3], cpuRegs.HI.UL[2], cpuRegs.HI.UL[1], cpuRegs.HI.UL[0]); + __Log("%sCycle: %x %x, Count: %x\n", pstr, cpuRegs.cycle, g_nextBranchCycle, cpuRegs.CP0.n.Count); + iDumpPsxRegisters(psxRegs.pc, temp); + + __Log("f410,30,40: %x %x %x, %d %d\n", psHu32(0xf410), psHu32(0xf430), psHu32(0xf440), rdram_sdevid, rdram_devices); + __Log("cyc11: %x %x; vu0: %x, vu1: %x\n", cpuRegs.sCycle[1], cpuRegs.eCycle[1], VU0.cycle, VU1.cycle); + + __Log("%scounters: %x %x; psx: %x %x\n", pstr, nextsCounter, nextCounter, psxNextsCounter, psxNextCounter); + for(i = 0; i < 4; ++i) { + __Log("eetimer%d: count: %x mode: %x target: %x %x; %x %x; %x %x %x %x\n", i, + counters[i].count, counters[i].mode, counters[i].target, counters[i].hold, counters[i].rate, + counters[i].interrupt, counters[i].Cycle, counters[i].sCycle, counters[i].CycleT, counters[i].sCycleT); + } + __Log("VIF0_STAT = %x, VIF1_STAT = %x\n", psHu32(0x3800), psHu32(0x3C00)); + __Log("ipu %x %x %x %x; bp: %x %x %x %x\n", psHu32(0x2000), psHu32(0x2010), psHu32(0x2020), psHu32(0x2030), g_BP.BP, g_BP.bufferhasnew, g_BP.FP, g_BP.IFC); + __Log("gif: %x %x %x\n", psHu32(0x3000), psHu32(0x3010), psHu32(0x3020)); + for(i = 0; i < ARRAYSIZE(dmacs); ++i) { + DMACh* p = (DMACh*)(PS2MEM_HW+dmacs[i]); + __Log("dma%d c%x m%x q%x t%x s%x\n", i, p->chcr, p->madr, p->qwc, p->tadr, p->sadr); + } + __Log("dmac %x %x %x %x\n", psHu32(DMAC_CTRL), psHu32(DMAC_STAT), psHu32(DMAC_RBSR), psHu32(DMAC_RBOR)); + __Log("intc %x %x\n", psHu32(INTC_STAT), psHu32(INTC_MASK)); + __Log("sif: %x %x %x %x %x\n", psHu32(0xf200), psHu32(0xf220), psHu32(0xf230), psHu32(0xf240), psHu32(0xf260)); +} + +extern u32 psxdump; + +static void printfn() +{ + static int lastrec = 0; + static int curcount = 0, count2 = 0; + const int skip = 0; + static int i; + + assert( !g_globalMMXSaved ); + assert( !g_globalXMMSaved ); + + if( (dumplog&2) && g_lastpc != 0x81fc0 ) {//&& lastrec != g_lastpc ) { + curcount++; + + if( curcount > skip ) { + iDumpRegisters(g_lastpc, 1); + curcount = 0; + } + + lastrec = g_lastpc; + } +} + +u32 s_recblocks[] = {0}; + +void badespfn() { + assert(0); + SysPrintf("Bad esp!\n"); +} + +#define OPTIMIZE_COP2 0//CHECK_VU0REC + +void recRecompile( u32 startpc ) +{ + u32 i = 0; + u32 branchTo; + u32 willbranch3 = 0; + u32* ptr; + u32 usecop2; + +#ifdef _DEBUG + //dumplog |= 4; + if( dumplog & 4 ) + iDumpRegisters(startpc, 0); +#endif + + assert( startpc ); + + // if recPtr reached the mem limit reset whole mem + if ( ( (uptr)recPtr - (uptr)recMem ) >= REC_CACHEMEM-0x40000 || dumplog == 0xffffffff) { + recReset(); + } + if ( ( (uptr)recStackPtr - (uptr)recStack ) >= RECSTACK_SIZE-0x100 ) { +#ifdef _DEBUG + SysPrintf("stack reset\n"); +#endif + recReset(); + } + + s_pCurBlock = PC_GETBLOCK(startpc); + + if( s_pCurBlock->pFnptr ) { + // clear if already taken + assert( s_pCurBlock->startpc < startpc ); + recClearMem(s_pCurBlock); + } + + if( s_pCurBlock->startpc == startpc ) { + s_pCurBlockEx = PC_GETBLOCKEX(s_pCurBlock); + assert( s_pCurBlockEx->startpc == startpc ); + } + else { + s_pCurBlockEx = NULL; + for(i = 0; i < EE_NUMBLOCKS; ++i) { + if( recBlocks[(i+s_nNextBlock)%EE_NUMBLOCKS].size == 0 ) { + s_pCurBlockEx = recBlocks+(i+s_nNextBlock)%EE_NUMBLOCKS; + s_nNextBlock = (i+s_nNextBlock+1)%EE_NUMBLOCKS; + break; + } + } + + if( s_pCurBlockEx == NULL ) { + //SysPrintf("ee reset (blocks)\n"); + recReset(); + s_nNextBlock = 0; + s_pCurBlockEx = recBlocks; + } + + s_pCurBlockEx->startpc = startpc; + } + + x86SetPtr( recPtr ); + x86Align(16); + recPtr = x86Ptr; + s_pCurBlock->pFnptr = (u32)x86Ptr; + s_pCurBlock->startpc = startpc; + + // slower +// if( startpc == 0x81fc0 ) { +// +// MOV32MtoR(ECX, (u32)&g_nextBranchCycle); +// MOV32RtoM((u32)&cpuRegs.cycle, ECX); +// //ADD32ItoR(ECX, 9); +// //ADD32ItoM((u32)&cpuRegs.cycle, 512); +// CALLFunc((u32)cpuBranchTest); +// CMP32ItoM((u32)&cpuRegs.pc, 0x81fc0); +// JE8(s_pCurBlock->pFnptr - (u32)(x86Ptr+2) ); +// JMP32((u32)DispatcherReg - (u32)(x86Ptr+5)); +// +// pc = startpc + 9*4; +// assert( (pc-startpc)>>2 <= 0xffff ); +// s_pCurBlockEx->size = (pc-startpc)>>2; +// +// for(i = 1; i < (u32)s_pCurBlockEx->size-1; ++i) { +// s_pCurBlock[i].pFnptr = s_pCurBlock->pFnptr; +// s_pCurBlock[i].startpc = s_pCurBlock->startpc; +// } +// +// // don't overwrite if delay slot +// if( i < (u32)s_pCurBlockEx->size && !(s_pCurBlock[i].uType & BLOCKTYPE_DELAYSLOT) ) { +// s_pCurBlock[i].pFnptr = s_pCurBlock->pFnptr; +// s_pCurBlock[i].startpc = s_pCurBlock->startpc; +// } +// +// // set the block ptr +// AddBaseBlockEx(s_pCurBlockEx, 0); +// +// if( !(pc&0x10000000) ) +// maxrecmem = max( (pc&~0xa0000000), maxrecmem ); +// +// recPtr = x86Ptr; +// return; +// } + + branch = 0; + + // reset recomp state variables + s_nBlockCycles = 0; + pc = startpc; + x86FpuState = FPU_STATE; + iCWstate = 0; + s_saveConstGPRreg = 0; + g_cpuHasConstReg = g_cpuFlushedConstReg = 1; + g_cpuPrevRegHasLive1 = g_cpuRegHasLive1 = 0xffffffff; + g_cpuPrevRegHasSignExt = g_cpuRegHasSignExt = 0; + _recClearWritebacks(); + assert( g_cpuConstRegs[0].UD[0] == 0 ); + + _initX86regs(); + _initXMMregs(); + _initMMXregs(); + +#ifdef _DEBUG + // for debugging purposes + MOV32ItoM((u32)&g_lastpc, pc); + CALLFunc((u32)printfn); + +// CMP32MtoR(EBP, (u32)&s_uSaveEBP); +// j8Ptr[0] = JE8(0); +// CALLFunc((u32)badespfn); +// x86SetJ8(j8Ptr[0]); +#endif + + // go until the next branch + i = startpc; + s_nEndBlock = 0xffffffff; + s_nHasDelay = 0; + + while(1) { + BASEBLOCK* pblock = PC_GETBLOCK(i); + if( pblock->pFnptr != 0 && pblock->startpc != s_pCurBlock->startpc ) { + + if( i == pblock->startpc ) { + // branch = 3 + willbranch3 = 1; + s_nEndBlock = i; + break; + } + } + + cpuRegs.code = *(int *)PSM(i); + + switch(cpuRegs.code >> 26) { + case 0: // special + + if( _Funct_ == 8 || _Funct_ == 9 ) { // JR, JALR + s_nEndBlock = i + 8; + s_nHasDelay = 1; + goto StartRecomp; + } + + break; + case 1: // regimm + + if( _Rt_ < 4 || (_Rt_ >= 16 && _Rt_ < 20) ) { + // branches + if( _Rt_ == 2 && _Rt_ == 3 && _Rt_ == 18 && _Rt_ == 19 ) s_nHasDelay = 1; + else s_nHasDelay = 2; + + branchTo = _Imm_ * 4 + i + 4; + if( branchTo > startpc && branchTo < i ) s_nEndBlock = branchTo; + else s_nEndBlock = i+8; + + goto StartRecomp; + } + + break; + + case 2: // J + case 3: // JAL + s_nHasDelay = 1; + s_nEndBlock = i + 8; + goto StartRecomp; + + // branches + case 4: case 5: case 6: case 7: + case 20: case 21: case 22: case 23: + + if( (cpuRegs.code >> 26) >= 20 ) s_nHasDelay = 1; + else s_nHasDelay = 2; + + branchTo = _Imm_ * 4 + i + 4; + if( branchTo > startpc && branchTo < i ) s_nEndBlock = branchTo; + else s_nEndBlock = i+8; + + goto StartRecomp; + + case 16: // cp0 + if( _Rs_ == 16 ) { + if( _Funct_ == 24 ) { // eret + s_nEndBlock = i+4; + goto StartRecomp; + } + } + + break; + case 17: // cp1 + case 18: // cp2 + if( _Rs_ == 8 ) { + // BC1F, BC1T, BC1FL, BC1TL + // BC2F, BC2T, BC2FL, BC2TL + if( _Rt_ >= 2 ) s_nHasDelay = 1; + else s_nHasDelay = 2; + + branchTo = _Imm_ * 4 + i + 4; + if( branchTo > startpc && branchTo < i ) s_nEndBlock = branchTo; + else s_nEndBlock = i+8; + + goto StartRecomp; + } + break; + } + + i += 4; + } + +StartRecomp: + + // rec info // + { + EEINST* pcur; + + if( s_nInstCacheSize < (s_nEndBlock-startpc)/4+1 ) { + free(s_pInstCache); + s_nInstCacheSize = (s_nEndBlock-startpc)/4+10; + s_pInstCache = (EEINST*)malloc(sizeof(EEINST)*s_nInstCacheSize); + assert( s_pInstCache != NULL ); + } + + pcur = s_pInstCache + (s_nEndBlock-startpc)/4; + _recClearInst(pcur); + pcur->info = 0; + + for(i = s_nEndBlock; i > startpc; i -= 4 ) { + cpuRegs.code = *(int *)PSM(i-4); + pcur[-1] = pcur[0]; + rpropBSC(pcur-1, pcur); + pcur--; + } + } + + // analyze instructions // + { + usecop2 = 0; + g_pCurInstInfo = s_pInstCache; + + for(i = startpc; i < s_nEndBlock; i += 4) { + g_pCurInstInfo++; + cpuRegs.code = *(u32*)PSM(i); + + // cop2 // + if( g_pCurInstInfo->info & EEINSTINFO_COP2 ) { + + if( !usecop2 ) { + // init + if( OPTIMIZE_COP2 ) { + memset(VU0.fmac,0,sizeof(VU0.fmac)); + memset(&VU0.fdiv,0,sizeof(VU0.fdiv)); + memset(&VU0.efu,0,sizeof(VU0.efu)); + } + vucycle = 0; + usecop2 = 1; + } + + VU0.code = cpuRegs.code; + _cop2AnalyzeOp(g_pCurInstInfo, OPTIMIZE_COP2); + continue; + } + + if( usecop2 ) vucycle++; + + // peephole optimizations // +#ifdef PCSX2_VIRTUAL_MEM + if( i < s_nEndBlock-4 && recompileCodeSafe(i) ) { + u32 curcode = cpuRegs.code; + u32 nextcode = *(u32*)PSM(i+4); + if( _eeIsLoadStoreCoIssue(curcode, nextcode) && recBSC_co[curcode>>26] != NULL ) { + + // rs has to be the same, and cannot be just written + if( ((curcode >> 21) & 0x1F) == ((nextcode >> 21) & 0x1F) && !_eeLoadWritesRs(curcode) ) { + + if( _eeIsLoadStoreCoX(curcode) && ((nextcode>>16)&0x1f) != ((curcode>>21)&0x1f) ) { + // see how many stores there are + u32 j; + // use xmmregs since only supporting lwc1,lq,swc1,sq + for(j = i+8; j < s_nEndBlock && j < i+4*XMMREGS; j += 4 ) { + u32 nncode = *(u32*)PSM(j); + if( (nncode>>26) != (curcode>>26) || ((curcode>>21)&0x1f) != ((nncode>>21)&0x1f) || + _eeLoadWritesRs(nncode)) + break; + } + + if( j > i+8 ) { + u32 num = (j-i)>>2; // number of stores that can coissue + assert( num <= XMMREGS ); + + g_pCurInstInfo[0].numpeeps = num-1; + g_pCurInstInfo[0].info |= EEINSTINFO_COREC; + + while(i < j-4) { + g_pCurInstInfo++; + g_pCurInstInfo[0].info |= EEINSTINFO_NOREC; + i += 4; + } + + continue; + } + + // fall through + } + + // unaligned loadstores + + // if LWL, check if LWR and that offsets are +3 away + switch(curcode >> 26) { + case 0x22: // LWL + if( (nextcode>>26) != 0x26 || ((s16)nextcode)+3 != (s16)curcode ) + continue; + break; + case 0x26: // LWR + if( (nextcode>>26) != 0x22 || ((s16)nextcode) != (s16)curcode+3 ) + continue; + break; + + case 0x2a: // SWL + if( (nextcode>>26) != 0x2e || ((s16)nextcode)+3 != (s16)curcode ) + continue; + break; + case 0x2e: // SWR + if( (nextcode>>26) != 0x2a || ((s16)nextcode) != (s16)curcode+3 ) + continue; + break; + + case 0x1a: // LDL + if( (nextcode>>26) != 0x1b || ((s16)nextcode)+7 != (s16)curcode ) + continue; + break; + case 0x1b: // LWR + if( (nextcode>>26) != 0x1aa || ((s16)nextcode) != (s16)curcode+7 ) + continue; + break; + + case 0x2c: // SWL + if( (nextcode>>26) != 0x2d || ((s16)nextcode)+7 != (s16)curcode ) + continue; + break; + case 0x2d: // SWR + if( (nextcode>>26) != 0x2c || ((s16)nextcode) != (s16)curcode+7 ) + continue; + break; + } + + // good enough + g_pCurInstInfo[0].info |= EEINSTINFO_COREC; + g_pCurInstInfo[0].numpeeps = 1; + g_pCurInstInfo[1].info |= EEINSTINFO_NOREC; + g_pCurInstInfo++; + i += 4; + continue; + } + } + } +#endif // end peephole + } + + if( usecop2 ) { + // add necessary mac writebacks + g_pCurInstInfo = s_pInstCache; + + for(i = startpc; i < s_nEndBlock-4; i += 4) { + g_pCurInstInfo++; + + if( g_pCurInstInfo->info & EEINSTINFO_COP2 ) { + } + } + } + } + + // perf counters // +#ifdef PCSX2_DEVBUILD + s_startcount = 0; +// if( pc+32 < s_nEndBlock ) { +// // only blocks with more than 8 insts +// //PUSH32I((u32)&lbase); +// //CALLFunc((u32)QueryPerformanceCounter); +// lbase.QuadPart = GetCPUTick(); +// s_startcount = 1; +// } +#endif + +#ifdef _DEBUG + // dump code + for(i = 0; i < ARRAYSIZE(s_recblocks); ++i) { + if( startpc == s_recblocks[i] ) { + iDumpBlock(startpc, recPtr); + } + } + + if( (dumplog & 1) ) //|| usecop2 ) + iDumpBlock(startpc, recPtr); +#endif + + // finally recompile // + g_pCurInstInfo = s_pInstCache; + while (!branch && pc < s_nEndBlock) { + recompileNextInstruction(0); + } + +#ifdef _DEBUG + if( (dumplog & 1) ) + iDumpBlock(startpc, recPtr); +#endif + + assert( (pc-startpc)>>2 <= 0xffff ); + s_pCurBlockEx->size = (pc-startpc)>>2; + + for(i = 1; i < (u32)s_pCurBlockEx->size-1; ++i) { + s_pCurBlock[i].pFnptr = s_pCurBlock->pFnptr; + s_pCurBlock[i].startpc = s_pCurBlock->startpc; + } + + // don't overwrite if delay slot + if( i < (u32)s_pCurBlockEx->size && !(s_pCurBlock[i].uType & BLOCKTYPE_DELAYSLOT) ) { + s_pCurBlock[i].pFnptr = s_pCurBlock->pFnptr; + s_pCurBlock[i].startpc = s_pCurBlock->startpc; + } + + // set the block ptr + AddBaseBlockEx(s_pCurBlockEx, 0); +// if( p[1].startpc == p[0].startpc + 4 ) { +// assert( p[1].pFnptr != 0 ); +// // already fn in place, so add to list +// AddBaseBlockEx(s_pCurBlockEx, 0); +// } +// else +// *(BASEBLOCKEX**)(p+1) = pex; +// } + + //PC_SETBLOCKEX(s_pCurBlock, s_pCurBlockEx); + + if( !(pc&0x10000000) ) + maxrecmem = max( (pc&~0xa0000000), maxrecmem ); + + if( branch == 2 ) { + iFlushCall(FLUSH_EVERYTHING); + + iBranchTest(0xffffffff, 1); + if( bExecBIOS ) CheckForBIOSEnd(); + + JMP32((u32)DispatcherReg - ( (u32)x86Ptr + 5 )); + } + else { + assert( branch != 3 ); + if( branch ) assert( !willbranch3 ); + else ADD32ItoM((int)&cpuRegs.cycle, s_nBlockCycles*EECYCLE_MULT); + + if( willbranch3 ) { + BASEBLOCK* pblock = PC_GETBLOCK(s_nEndBlock); + assert( pc == s_nEndBlock ); + iFlushCall(FLUSH_EVERYTHING); + MOV32ItoM((u32)&cpuRegs.pc, pc); + JMP32((u32)pblock->pFnptr - ((u32)x86Ptr + 5)); + branch = 3; + } + else if( !branch ) { + // didn't branch, but had to stop + MOV32ItoM( (u32)&cpuRegs.pc, pc ); + + iFlushCall(FLUSH_EVERYTHING); + + ptr = JMP32(0); + } + } + + assert( x86Ptr >= (s8*)s_pCurBlock->pFnptr + EE_MIN_BLOCK_BYTES ); + assert( x86Ptr < recMem+REC_CACHEMEM ); + assert( recStackPtr < recStack+RECSTACK_SIZE ); + assert( x86FpuState == 0 ); + + recPtr = x86Ptr; + + assert( (g_cpuHasConstReg&g_cpuFlushedConstReg) == g_cpuHasConstReg ); + + if( !branch ) { + BASEBLOCK* pcurblock = s_pCurBlock; + u32 nEndBlock = s_nEndBlock; + s_pCurBlock = PC_GETBLOCK(pc); + assert( ptr != NULL ); + + if( s_pCurBlock->startpc != pc ) + recRecompile(pc); + + if( pcurblock->startpc == startpc ) { + assert( pcurblock->pFnptr ); + assert( s_pCurBlock->startpc == nEndBlock ); + *ptr = s_pCurBlock->pFnptr - ( (u32)ptr + 4 ); + } + else { + recRecompile(startpc); + assert( pcurblock->pFnptr != 0 ); + } + } +} + +R5900cpu recCpu = { + recInit, + recReset, + recStep, + recExecute, + recExecuteBlock, + recExecuteVU0Block, + recExecuteVU1Block, + recEnableVU0micro, + recEnableVU1micro, + recClear, + recClearVU0, + recClearVU1, + recShutdown +}; + +#endif // PCSX2_NORECBUILD diff --git a/pcsx2/x86/ix86/ix86.h b/pcsx2/x86/ix86/ix86.h index b52c858..de8ecfd 100644 --- a/pcsx2/x86/ix86/ix86.h +++ b/pcsx2/x86/ix86/ix86.h @@ -1,1768 +1,1792 @@ -/* - * ix86 definitions v0.6.2 - * Authors: linuzappz - * alexey silinov - * goldfinger - * shadow < shadow@pcsx2.net > - */ - -#ifndef __IX86_H__ -#define __IX86_H__ - -#ifdef __cplusplus -extern "C" { -#endif - -#include "PS2Etypes.h" // Basic types header - -#ifdef __x86_64__ -#define XMMREGS 16 -#define X86REGS 16 -#else -#define XMMREGS 8 -#define X86REGS 8 -#endif - -#define MMXREGS 8 - -#define SIB 4 -#define DISP32 5 - -// general types -typedef int x86IntRegType; -#define EAX 0 -#define EBX 3 -#define ECX 1 -#define EDX 2 -#define ESI 6 -#define EDI 7 -#define EBP 5 -#define ESP 4 - -#ifdef __x86_64__ -#define RAX 0 -#define RBX 3 -#define RCX 1 -#define RDX 2 -#define RSI 6 -#define RDI 7 -#define RBP 5 -#define RSP 4 -#define R8 8 -#define R9 9 -#define R10 10 -#define R11 11 -#define R12 12 -#define R13 13 -#define R14 14 -#define R15 15 - -#define X86_TEMP RAX // don't allocate anything - -#ifdef _MSC_VER -extern x86IntRegType g_x86savedregs[8]; -extern x86IntRegType g_x86tempregs[6]; -#else -extern x86IntRegType g_x86savedregs[6]; -extern x86IntRegType g_x86tempregs[8]; -#endif - -extern x86IntRegType g_x86allregs[14]; // all registers that can be used by the recs -extern x86IntRegType g_x868bitregs[11]; -extern x86IntRegType g_x86non8bitregs[3]; - -#ifdef _MSC_VER -#define X86ARG1 RCX -#define X86ARG2 RDX -#define X86ARG3 R8 -#define X86ARG4 R9 -#else -#define X86ARG1 RDI -#define X86ARG2 RSI -#define X86ARG3 RDX -#define X86ARG4 RCX -#endif - -#else - -#define X86ARG1 EAX -#define X86ARG2 ECX -#define X86ARG3 EDX -#define X86ARG4 EBX - -#endif // __x86_64__ - -#define MM0 0 -#define MM1 1 -#define MM2 2 -#define MM3 3 -#define MM4 4 -#define MM5 5 -#define MM6 6 -#define MM7 7 - -typedef int x86MMXRegType; - -#define XMM0 0 -#define XMM1 1 -#define XMM2 2 -#define XMM3 3 -#define XMM4 4 -#define XMM5 5 -#define XMM6 6 -#define XMM7 7 -#define XMM8 8 -#define XMM9 9 -#define XMM10 10 -#define XMM11 11 -#define XMM12 12 -#define XMM13 13 -#define XMM14 14 -#define XMM15 15 - -typedef int x86SSERegType; - -typedef enum -{ - XMMT_INT = 0, // integer (sse2 only) - XMMT_FPS = 1, // floating point - //XMMT_FPD = 3, // double -} XMMSSEType; - -extern XMMSSEType g_xmmtypes[XMMREGS]; - -void cpudetectInit( void );//this is all that needs to be called and will fill up the below structs - -//cpu capabilities structure -typedef struct { - u32 hasFloatingPointUnit; - u32 hasVirtual8086ModeEnhancements; - u32 hasDebuggingExtensions; - u32 hasPageSizeExtensions; - u32 hasTimeStampCounter; - u32 hasModelSpecificRegisters; - u32 hasPhysicalAddressExtension; - u32 hasCOMPXCHG8BInstruction; - u32 hasAdvancedProgrammableInterruptController; - u32 hasSEPFastSystemCall; - u32 hasMemoryTypeRangeRegisters; - u32 hasPTEGlobalFlag; - u32 hasMachineCheckArchitecture; - u32 hasConditionalMoveAndCompareInstructions; - u32 hasFGPageAttributeTable; - u32 has36bitPageSizeExtension; - u32 hasProcessorSerialNumber; - u32 hasCFLUSHInstruction; - u32 hasDebugStore; - u32 hasACPIThermalMonitorAndClockControl; - u32 hasMultimediaExtensions; - u32 hasFastStreamingSIMDExtensionsSaveRestore; - u32 hasStreamingSIMDExtensions; - u32 hasStreamingSIMD2Extensions; - u32 hasSelfSnoop; - u32 hasHyperThreading; - u32 hasThermalMonitor; - u32 hasIntel64BitArchitecture; - u32 hasStreamingSIMD3Extensions; - //that is only for AMDs - u32 hasMultimediaExtensionsExt; - u32 hasAMD64BitArchitecture; - u32 has3DNOWInstructionExtensionsExt; - u32 has3DNOWInstructionExtensions; -} CAPABILITIES; - -extern CAPABILITIES cpucaps; - -typedef struct { - - u32 x86Family; // Processor Family - u32 x86Model; // Processor Model - u32 x86PType; // Processor Type - u32 x86StepID; // Stepping ID - u32 x86Flags; // Feature Flags - u32 x86EFlags; // Extended Feature Flags - //all the above returns hex values - s8 x86ID[16]; // Vendor ID //the vendor creator (in %s) - s8 x86Type[20]; //cpu type in char format //the cpu type (in %s) - s8 x86Fam[50]; // family in char format //the original cpu name string (in %s) - u32 cpuspeed; // speed of cpu //this will give cpu speed (in %d) -} CPUINFO; - -extern CPUINFO cpuinfo; - -extern s8 *x86Ptr; -extern u8 *j8Ptr[32]; -extern u32 *j32Ptr[32]; - - -#ifdef __x86_64__ -#define X86_64ASSERT() assert(0) -#define MEMADDR(addr, oplen) ((addr) - ((u64)x86Ptr + ((u64)oplen))) -#else -#define X86_64ASSERT() -#define MEMADDR(addr, oplen) (addr) -#endif - -#ifdef __x86_64__ -#define Rex( w, r, x, b ) write8( 0x40 | ((w) << 3) | ((r) << 2) | ((x) << 1) | (b) ); -#define RexR(w, reg) if( w||(reg)>=8 ) { Rex(w, (reg)>=8, 0, 0); } -#define RexB(w, base) if( w||(base)>=8 ) { Rex(w, 0, 0, (base)>=8); } -#define RexRB(w, reg, base) if( w || (reg) >= 8 || (base)>=8 ) { Rex(w, (reg)>=8, 0, (base)>=8); } -#define RexRXB(w, reg, index, base) if( w||(reg) >= 8 || (index) >= 8 || (base) >= 8 ) { \ - Rex(w, (reg)>=8, (index)>=8, (base)>=8); \ - } -#else -#define Rex(w,r,x,b) assert(0); -#define RexR(w, reg) if( w||(reg)>=8 ) assert(0); -#define RexB(w, base) if( w||(base)>=8 ) assert(0); -#define RexRB(w, reg, base) if( w||(reg) >= 8 || (base)>=8 ) assert(0); -#define RexRXB(w, reg, index, base) if( w||(reg) >= 8 || (index) >= 8 || (base) >= 8 ) assert(0); -#endif - -void write8( int val ); -void write16( int val ); -void write32( u32 val ); -void write64( u64 val ); - - -void x86SetPtr( char *ptr ); -void x86Shutdown( void ); - -void x86SetJ8( u8 *j8 ); -void x86SetJ8A( u8 *j8 ); -void x86SetJ16( u16 *j16 ); -void x86SetJ16A( u16 *j16 ); -void x86SetJ32( u32 *j32 ); -void x86SetJ32A( u32 *j32 ); - -void x86Align( int bytes ); -u64 GetCPUTick( void ); - -// General Helper functions -void ModRM( int mod, int rm, int reg ); -void SibSB( int ss, int rm, int index ); -void SET8R( int cc, int to ); -u8* J8Rel( int cc, int to ); -u32* J32Rel( int cc, u32 to ); -void CMOV32RtoR( int cc, int to, int from ); -void CMOV32MtoR( int cc, int to, uptr from ); - -//****************** -// IX86 intructions -//****************** - -// -// * scale values: -// * 0 - *1 -// * 1 - *2 -// * 2 - *4 -// * 3 - *8 -// - -void STC( void ); -void CLC( void ); - -//////////////////////////////////// -// mov instructions // -//////////////////////////////////// - -// mov r64 to r64 -void MOV64RtoR( x86IntRegType to, x86IntRegType from ); -// mov r64 to m64 -void MOV64RtoM( uptr to, x86IntRegType from ); -// mov m64 to r64 -void MOV64MtoR( x86IntRegType to, uptr from ); -// mov sign ext imm32 to m64 -void MOV64I32toM( uptr to, u32 from ); -// mov sign ext imm32 to r64 -void MOV64I32toR( x86IntRegType to, s32 from); -// mov imm64 to r64 -void MOV64ItoR( x86IntRegType to, u64 from); -// mov imm64 to [r64+off] -void MOV64ItoRmOffset( x86IntRegType to, u32 from, int offset); -// mov [r64+offset] to r64 -void MOV64RmOffsettoR( x86IntRegType to, x86IntRegType from, int offset ); -// mov [r64][r64*scale] to r64 -void MOV64RmStoR( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale); -// mov r64 to [r64+offset] -void MOV64RtoRmOffset( x86IntRegType to, x86IntRegType from, int offset ); -// mov r64 to [r64][r64*scale] -void MOV64RtoRmS( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale); - -// mov r32 to r32 -void MOV32RtoR( x86IntRegType to, x86IntRegType from ); -// mov r32 to m32 -void MOV32RtoM( uptr to, x86IntRegType from ); -// mov m32 to r32 -void MOV32MtoR( x86IntRegType to, uptr from ); -// mov [r32] to r32 -void MOV32RmtoR( x86IntRegType to, x86IntRegType from ); -void MOV32RmtoROffset( x86IntRegType to, x86IntRegType from, int offset ); -// mov [r32][r32< subtract ST(0) from ST(1), store in ST(1) and POP stack -void FSUBP( void ); -// fmul ST(src) to fpu reg stack ST(0) -void FMUL32Rto0( x86IntRegType src ); -// fmul ST(0) to fpu reg stack ST(src) -void FMUL320toR( x86IntRegType src ); -// fdiv ST(src) to fpu reg stack ST(0) -void FDIV32Rto0( x86IntRegType src ); -// fdiv ST(0) to fpu reg stack ST(src) -void FDIV320toR( x86IntRegType src ); -// fdiv ST(0) to fpu reg stack ST(src), pop stack, store in ST(src) -void FDIV320toRP( x86IntRegType src ); - -// fadd m32 to fpu reg stack -void FADD32( u32 from ); -// fsub m32 to fpu reg stack -void FSUB32( u32 from ); -// fmul m32 to fpu reg stack -void FMUL32( u32 from ); -// fdiv m32 to fpu reg stack -void FDIV32( u32 from ); -// fcomi st, st( i) -void FCOMI( x86IntRegType src ); -// fcomip st, st( i) -void FCOMIP( x86IntRegType src ); -// fucomi st, st( i) -void FUCOMI( x86IntRegType src ); -// fucomip st, st( i) -void FUCOMIP( x86IntRegType src ); -// fcom m32 to fpu reg stack -void FCOM32( u32 from ); -// fabs fpu reg stack -void FABS( void ); -// fsqrt fpu reg stack -void FSQRT( void ); -// ftan fpu reg stack -void FPATAN( void ); -// fsin fpu reg stack -void FSIN( void ); -// fchs fpu reg stack -void FCHS( void ); - -// fcmovb fpu reg to fpu reg stack -void FCMOVB32( x86IntRegType from ); -// fcmove fpu reg to fpu reg stack -void FCMOVE32( x86IntRegType from ); -// fcmovbe fpu reg to fpu reg stack -void FCMOVBE32( x86IntRegType from ); -// fcmovu fpu reg to fpu reg stack -void FCMOVU32( x86IntRegType from ); -// fcmovnb fpu reg to fpu reg stack -void FCMOVNB32( x86IntRegType from ); -// fcmovne fpu reg to fpu reg stack -void FCMOVNE32( x86IntRegType from ); -// fcmovnbe fpu reg to fpu reg stack -void FCMOVNBE32( x86IntRegType from ); -// fcmovnu fpu reg to fpu reg stack -void FCMOVNU32( x86IntRegType from ); -void FCOMP32( u32 from ); -void FNSTSWtoAX( void ); - -// probably a little extreme here, but x86-64 should NOT use MMX -#ifdef __x86_64__ - -#define MMXONLY(code) - -#else - -#define MMXONLY(code) code - -//****************** -// MMX instructions -//****************** - -// r64 = mm - -// movq m64 to r64 -void MOVQMtoR( x86MMXRegType to, uptr from ); -// movq r64 to m64 -void MOVQRtoM( uptr to, x86MMXRegType from ); - -// pand r64 to r64 -void PANDRtoR( x86MMXRegType to, x86MMXRegType from ); -void PANDNRtoR( x86MMXRegType to, x86MMXRegType from ); -// pand m64 to r64 ; -void PANDMtoR( x86MMXRegType to, uptr from ); -// pandn r64 to r64 -void PANDNRtoR( x86MMXRegType to, x86MMXRegType from ); -// pandn r64 to r64 -void PANDNMtoR( x86MMXRegType to, uptr from ); -// por r64 to r64 -void PORRtoR( x86MMXRegType to, x86MMXRegType from ); -// por m64 to r64 -void PORMtoR( x86MMXRegType to, uptr from ); -// pxor r64 to r64 -void PXORRtoR( x86MMXRegType to, x86MMXRegType from ); -// pxor m64 to r64 -void PXORMtoR( x86MMXRegType to, uptr from ); - -// psllq r64 to r64 -void PSLLQRtoR( x86MMXRegType to, x86MMXRegType from ); -// psllq m64 to r64 -void PSLLQMtoR( x86MMXRegType to, uptr from ); -// psllq imm8 to r64 -void PSLLQItoR( x86MMXRegType to, u8 from ); -// psrlq r64 to r64 -void PSRLQRtoR( x86MMXRegType to, x86MMXRegType from ); -// psrlq m64 to r64 -void PSRLQMtoR( x86MMXRegType to, uptr from ); -// psrlq imm8 to r64 -void PSRLQItoR( x86MMXRegType to, u8 from ); - -// paddusb r64 to r64 -void PADDUSBRtoR( x86MMXRegType to, x86MMXRegType from ); -// paddusb m64 to r64 -void PADDUSBMtoR( x86MMXRegType to, uptr from ); -// paddusw r64 to r64 -void PADDUSWRtoR( x86MMXRegType to, x86MMXRegType from ); -// paddusw m64 to r64 -void PADDUSWMtoR( x86MMXRegType to, uptr from ); - -// paddb r64 to r64 -void PADDBRtoR( x86MMXRegType to, x86MMXRegType from ); -// paddb m64 to r64 -void PADDBMtoR( x86MMXRegType to, uptr from ); -// paddw r64 to r64 -void PADDWRtoR( x86MMXRegType to, x86MMXRegType from ); -// paddw m64 to r64 -void PADDWMtoR( x86MMXRegType to, uptr from ); -// paddd r64 to r64 -void PADDDRtoR( x86MMXRegType to, x86MMXRegType from ); -// paddd m64 to r64 -void PADDDMtoR( x86MMXRegType to, uptr from ); -void PADDSBRtoR( x86MMXRegType to, x86MMXRegType from ); -void PADDSWRtoR( x86MMXRegType to, x86MMXRegType from ); - -// paddq m64 to r64 (sse2 only?) -void PADDQMtoR( x86MMXRegType to, uptr from ); -// paddq r64 to r64 (sse2 only?) -void PADDQRtoR( x86MMXRegType to, x86MMXRegType from ); - -void PSUBSBRtoR( x86MMXRegType to, x86MMXRegType from ); -void PSUBSWRtoR( x86MMXRegType to, x86MMXRegType from ); - -void PSUBBRtoR( x86MMXRegType to, x86MMXRegType from ); -void PSUBWRtoR( x86MMXRegType to, x86MMXRegType from ); -void PSUBDRtoR( x86MMXRegType to, x86MMXRegType from ); -void PSUBDMtoR( x86MMXRegType to, uptr from ); - -// psubq m64 to r64 (sse2 only?) -void PSUBQMtoR( x86MMXRegType to, uptr from ); -// psubq r64 to r64 (sse2 only?) -void PSUBQRtoR( x86MMXRegType to, x86MMXRegType from ); - -// pmuludq m64 to r64 (sse2 only?) -void PMULUDQMtoR( x86MMXRegType to, uptr from ); -// pmuludq r64 to r64 (sse2 only?) -void PMULUDQRtoR( x86MMXRegType to, x86MMXRegType from ); - -void PCMPEQBRtoR( x86MMXRegType to, x86MMXRegType from ); -void PCMPEQWRtoR( x86MMXRegType to, x86MMXRegType from ); -void PCMPEQDRtoR( x86MMXRegType to, x86MMXRegType from ); -void PCMPEQDMtoR( x86MMXRegType to, uptr from ); -void PCMPGTBRtoR( x86MMXRegType to, x86MMXRegType from ); -void PCMPGTWRtoR( x86MMXRegType to, x86MMXRegType from ); -void PCMPGTDRtoR( x86MMXRegType to, x86MMXRegType from ); -void PCMPGTDMtoR( x86MMXRegType to, uptr from ); -void PSRLWItoR( x86MMXRegType to, u8 from ); -void PSRLDItoR( x86MMXRegType to, u8 from ); -void PSRLDRtoR( x86MMXRegType to, x86MMXRegType from ); -void PSLLWItoR( x86MMXRegType to, u8 from ); -void PSLLDItoR( x86MMXRegType to, u8 from ); -void PSLLDRtoR( x86MMXRegType to, x86MMXRegType from ); -void PSRAWItoR( x86MMXRegType to, u8 from ); -void PSRADItoR( x86MMXRegType to, u8 from ); -void PSRADRtoR( x86MMXRegType to, x86MMXRegType from ); -void PUNPCKLDQRtoR( x86MMXRegType to, x86MMXRegType from ); -void PUNPCKLDQMtoR( x86MMXRegType to, uptr from ); -void PUNPCKHDQRtoR( x86MMXRegType to, x86MMXRegType from ); -void PUNPCKHDQMtoR( x86MMXRegType to, uptr from ); -void MOVQ64ItoR( x86MMXRegType reg, u64 i ); //Prototype.Todo add all consts to end of block.not after jr $+8 -void MOVQRtoR( x86MMXRegType to, x86MMXRegType from ); -void MOVQRmtoROffset( x86MMXRegType to, x86IntRegType from, u32 offset ); -void MOVQRtoRmOffset( x86IntRegType to, x86MMXRegType from, u32 offset ); -void MOVDMtoMMX( x86MMXRegType to, uptr from ); -void MOVDMMXtoM( uptr to, x86MMXRegType from ); -void MOVD32RtoMMX( x86MMXRegType to, x86IntRegType from ); -void MOVD32RmtoMMX( x86MMXRegType to, x86IntRegType from ); -void MOVD32RmOffsettoMMX( x86MMXRegType to, x86IntRegType from, u32 offset ); -void MOVD32MMXtoR( x86IntRegType to, x86MMXRegType from ); -void MOVD32MMXtoRm( x86IntRegType to, x86MMXRegType from ); -void MOVD32MMXtoRmOffset( x86IntRegType to, x86MMXRegType from, u32 offset ); -void PINSRWRtoMMX( x86MMXRegType to, x86SSERegType from, u8 imm8 ); -void PSHUFWRtoR(x86MMXRegType to, x86MMXRegType from, u8 imm8); -void PSHUFWMtoR(x86MMXRegType to, uptr from, u8 imm8); -void MASKMOVQRtoR(x86MMXRegType to, x86MMXRegType from); - -// emms -void EMMS( void ); - -//**********************************************************************************/ -//PACKSSWB,PACKSSDW: Pack Saturate Signed Word 64bits -//********************************************************************************** -void PACKSSWBMMXtoMMX(x86MMXRegType to, x86MMXRegType from); -void PACKSSDWMMXtoMMX(x86MMXRegType to, x86MMXRegType from); - -void PMOVMSKBMMXtoR(x86IntRegType to, x86MMXRegType from); - -void SSE2_MOVDQ2Q_XMM_to_MM( x86MMXRegType to, x86SSERegType from); -void SSE2_MOVQ2DQ_MM_to_XMM( x86SSERegType to, x86MMXRegType from); - -#endif // !__x86_64__ - -//********************* -// SSE instructions * -//********************* -void SSE_MOVAPS_M128_to_XMM( x86SSERegType to, uptr from ); -void SSE_MOVAPS_XMM_to_M128( uptr to, x86SSERegType from ); -void SSE_MOVAPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); - -void SSE_MOVUPS_M128_to_XMM( x86SSERegType to, uptr from ); -void SSE_MOVUPS_XMM_to_M128( uptr to, x86SSERegType from ); - -void SSE_MOVSS_M32_to_XMM( x86SSERegType to, uptr from ); -void SSE_MOVSS_XMM_to_M32( u32 to, x86SSERegType from ); -void SSE_MOVSS_XMM_to_Rm( x86IntRegType to, x86SSERegType from ); -void SSE_MOVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE_MOVSS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset ); -void SSE_MOVSS_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset ); - -void SSE2_MOVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); - -void SSE2_MOVQ_M64_to_XMM( x86SSERegType to, uptr from ); -void SSE2_MOVQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE2_MOVQ_XMM_to_M64( u32 to, x86SSERegType from ); - -void SSE_MASKMOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); - -void SSE_MOVLPS_M64_to_XMM( x86SSERegType to, uptr from ); -void SSE_MOVLPS_XMM_to_M64( u32 to, x86SSERegType from ); -void SSE_MOVLPS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset ); -void SSE_MOVLPS_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset ); - -void SSE_MOVHPS_M64_to_XMM( x86SSERegType to, uptr from ); -void SSE_MOVHPS_XMM_to_M64( u32 to, x86SSERegType from ); -void SSE_MOVHPS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset ); -void SSE_MOVHPS_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset ); - -void SSE_MOVLHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from ); -void SSE_MOVLPSRmtoROffset( x86SSERegType to, x86IntRegType from, int offset ); -void SSE_MOVLPSRtoRm( x86SSERegType to, x86IntRegType from ); -void SSE_MOVLPSRtoRmOffset( x86SSERegType to, x86IntRegType from, int offset ); - -void SSE_MOVAPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ); -void SSE_MOVAPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ); -void SSE_MOVAPSRtoRmOffset( x86IntRegType to, x86SSERegType from, int offset ); -void SSE_MOVAPSRmtoROffset( x86SSERegType to, x86IntRegType from, int offset ); -void SSE_MOVUPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ); -void SSE_MOVUPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ); -void SSE_MOVUPSRtoRm( x86IntRegType to, x86IntRegType from ); -void SSE_MOVUPSRmtoR( x86IntRegType to, x86IntRegType from ); - -void SSE_MOVUPSRmtoROffset( x86SSERegType to, x86IntRegType from, int offset ); -void SSE_MOVUPSRtoRmOffset( x86SSERegType to, x86IntRegType from, int offset ); - -void SSE2_MOVDQARtoRmOffset( x86IntRegType to, x86SSERegType from, int offset ); -void SSE2_MOVDQARmtoROffset( x86SSERegType to, x86IntRegType from, int offset ); - -void SSE_RCPPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE_RCPPS_M128_to_XMM( x86SSERegType to, uptr from ); -void SSE_RCPSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE_RCPSS_M32_to_XMM( x86SSERegType to, uptr from ); - -void SSE_ORPS_M128_to_XMM( x86SSERegType to, uptr from ); -void SSE_ORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE_XORPS_M128_to_XMM( x86SSERegType to, uptr from ); -void SSE_XORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE_ANDPS_M128_to_XMM( x86SSERegType to, uptr from ); -void SSE_ANDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE_ANDNPS_M128_to_XMM( x86SSERegType to, uptr from ); -void SSE_ANDNPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE_ADDPS_M128_to_XMM( x86SSERegType to, uptr from ); -void SSE_ADDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE_ADDSS_M32_to_XMM( x86SSERegType to, uptr from ); -void SSE_ADDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE_SUBPS_M128_to_XMM( x86SSERegType to, uptr from ); -void SSE_SUBPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE_SUBSS_M32_to_XMM( x86SSERegType to, uptr from ); -void SSE_SUBSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE_MULPS_M128_to_XMM( x86SSERegType to, uptr from ); -void SSE_MULPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE_MULSS_M32_to_XMM( x86SSERegType to, uptr from ); -void SSE_MULSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE_CMPEQSS_M32_to_XMM( x86SSERegType to, uptr from ); -void SSE_CMPEQSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE_CMPLTSS_M32_to_XMM( x86SSERegType to, uptr from ); -void SSE_CMPLTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE_CMPLESS_M32_to_XMM( x86SSERegType to, uptr from ); -void SSE_CMPLESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE_CMPUNORDSS_M32_to_XMM( x86SSERegType to, uptr from ); -void SSE_CMPUNORDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE_CMPNESS_M32_to_XMM( x86SSERegType to, uptr from ); -void SSE_CMPNESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE_CMPNLTSS_M32_to_XMM( x86SSERegType to, uptr from ); -void SSE_CMPNLTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE_CMPNLESS_M32_to_XMM( x86SSERegType to, uptr from ); -void SSE_CMPNLESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE_CMPORDSS_M32_to_XMM( x86SSERegType to, uptr from ); -void SSE_CMPORDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); - -void SSE_UCOMISS_M32_to_XMM( x86SSERegType to, uptr from ); -void SSE_UCOMISS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); - -#ifndef __x86_64__ -void SSE_PMAXSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ); -void SSE_PMINSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ); -void SSE_CVTPI2PS_MM_to_XMM( x86SSERegType to, x86MMXRegType from ); -void SSE_CVTPS2PI_M64_to_MM( x86MMXRegType to, uptr from ); -void SSE_CVTPS2PI_XMM_to_MM( x86MMXRegType to, x86SSERegType from ); -#endif -void SSE_CVTPI2PS_M64_to_XMM( x86SSERegType to, uptr from ); -void SSE_CVTTSS2SI_M32_to_R32(x86IntRegType to, uptr from); -void SSE_CVTTSS2SI_XMM_to_R32(x86IntRegType to, x86SSERegType from); -void SSE_CVTSI2SS_M32_to_XMM(x86SSERegType to, uptr from); -void SSE_CVTSI2SS_R_to_XMM(x86SSERegType to, x86IntRegType from); - -void SSE2_CVTDQ2PS_M128_to_XMM( x86SSERegType to, uptr from ); -void SSE2_CVTDQ2PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE2_CVTPS2DQ_M128_to_XMM( x86SSERegType to, uptr from ); -void SSE2_CVTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE2_CVTTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); - -void SSE_MAXPS_M128_to_XMM( x86SSERegType to, uptr from ); -void SSE_MAXPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE_MAXSS_M32_to_XMM( x86SSERegType to, uptr from ); -void SSE_MAXSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE_MINPS_M128_to_XMM( x86SSERegType to, uptr from ); -void SSE_MINPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE_MINSS_M32_to_XMM( x86SSERegType to, uptr from ); -void SSE_MINSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE_RSQRTPS_M128_to_XMM( x86SSERegType to, uptr from ); -void SSE_RSQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE_RSQRTSS_M32_to_XMM( x86SSERegType to, uptr from ); -void SSE_RSQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE_SQRTPS_M128_to_XMM( x86SSERegType to, uptr from ); -void SSE_SQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE_SQRTSS_M32_to_XMM( x86SSERegType to, uptr from ); -void SSE_SQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE_UNPCKLPS_M128_to_XMM( x86SSERegType to, uptr from ); -void SSE_UNPCKLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE_UNPCKHPS_M128_to_XMM( x86SSERegType to, uptr from ); -void SSE_UNPCKHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE_SHUFPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ); -void SSE_SHUFPS_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ); -void SSE_SHUFPS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset, u8 imm8 ); -void SSE_CMPEQPS_M128_to_XMM( x86SSERegType to, uptr from ); -void SSE_CMPEQPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE_CMPLTPS_M128_to_XMM( x86SSERegType to, uptr from ); -void SSE_CMPLTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE_CMPLEPS_M128_to_XMM( x86SSERegType to, uptr from ); -void SSE_CMPLEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE_CMPUNORDPS_M128_to_XMM( x86SSERegType to, uptr from ); -void SSE_CMPUNORDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE_CMPNEPS_M128_to_XMM( x86SSERegType to, uptr from ); -void SSE_CMPNEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE_CMPNLTPS_M128_to_XMM( x86SSERegType to, uptr from ); -void SSE_CMPNLTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE_CMPNLEPS_M128_to_XMM( x86SSERegType to, uptr from ); -void SSE_CMPNLEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE_CMPORDPS_M128_to_XMM( x86SSERegType to, uptr from ); -void SSE_CMPORDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE_DIVPS_M128_to_XMM( x86SSERegType to, uptr from ); -void SSE_DIVPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE_DIVSS_M32_to_XMM( x86SSERegType to, uptr from ); -void SSE_DIVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -// VectorPath -void SSE2_PSHUFD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ); -void SSE2_PSHUFD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ); - -void SSE2_PSHUFLW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ); -void SSE2_PSHUFLW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ); -void SSE2_PSHUFHW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ); -void SSE2_PSHUFHW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ); - -void SSE_STMXCSR( uptr from ); -void SSE_LDMXCSR( uptr from ); - - -//********************* -// SSE 2 Instructions* -//********************* -void SSE2_MOVDQA_M128_to_XMM(x86SSERegType to, uptr from); -void SSE2_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from); -void SSE2_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from); - -void SSE2_MOVDQU_M128_to_XMM(x86SSERegType to, uptr from); -void SSE2_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from); -void SSE2_MOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from); - -void SSE2_PSRLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -void SSE2_PSRLW_M128_to_XMM(x86SSERegType to, uptr from); -void SSE2_PSRLW_I8_to_XMM(x86SSERegType to, u8 imm8); -void SSE2_PSRLD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -void SSE2_PSRLD_M128_to_XMM(x86SSERegType to, uptr from); -void SSE2_PSRLD_I8_to_XMM(x86SSERegType to, u8 imm8); -void SSE2_PSRLQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -void SSE2_PSRLQ_M128_to_XMM(x86SSERegType to, uptr from); -void SSE2_PSRLQ_I8_to_XMM(x86SSERegType to, u8 imm8); -void SSE2_PSRLDQ_I8_to_XMM(x86SSERegType to, u8 imm8); -void SSE2_PSRAW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -void SSE2_PSRAW_M128_to_XMM(x86SSERegType to, uptr from); -void SSE2_PSRAW_I8_to_XMM(x86SSERegType to, u8 imm8); -void SSE2_PSRAD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -void SSE2_PSRAD_M128_to_XMM(x86SSERegType to, uptr from); -void SSE2_PSRAD_I8_to_XMM(x86SSERegType to, u8 imm8); -void SSE2_PSLLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -void SSE2_PSLLW_M128_to_XMM(x86SSERegType to, uptr from); -void SSE2_PSLLW_I8_to_XMM(x86SSERegType to, u8 imm8); -void SSE2_PSLLD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -void SSE2_PSLLD_M128_to_XMM(x86SSERegType to, uptr from); -void SSE2_PSLLD_I8_to_XMM(x86SSERegType to, u8 imm8); -void SSE2_PSLLQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -void SSE2_PSLLQ_M128_to_XMM(x86SSERegType to, uptr from); -void SSE2_PSLLQ_I8_to_XMM(x86SSERegType to, u8 imm8); -void SSE2_PSLLDQ_I8_to_XMM(x86SSERegType to, u8 imm8); -void SSE2_PMAXSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE2_PMAXSW_M128_to_XMM( x86SSERegType to, uptr from ); -void SSE2_PMAXUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE2_PMAXUB_M128_to_XMM( x86SSERegType to, uptr from ); -void SSE2_PMINSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE2_PMINSW_M128_to_XMM( x86SSERegType to, uptr from ); -void SSE2_PMINUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE2_PMINUB_M128_to_XMM( x86SSERegType to, uptr from ); -void SSE2_PADDSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE2_PADDSB_M128_to_XMM( x86SSERegType to, uptr from ); -void SSE2_PADDSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE2_PADDSW_M128_to_XMM( x86SSERegType to, uptr from ); -void SSE2_PSUBSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE2_PSUBSB_M128_to_XMM( x86SSERegType to, uptr from ); -void SSE2_PSUBSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE2_PSUBSW_M128_to_XMM( x86SSERegType to, uptr from ); -void SSE2_PSUBUSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE2_PSUBUSB_M128_to_XMM( x86SSERegType to, uptr from ); -void SSE2_PSUBUSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE2_PSUBUSW_M128_to_XMM( x86SSERegType to, uptr from ); -void SSE2_PAND_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE2_PAND_M128_to_XMM( x86SSERegType to, uptr from ); -void SSE2_PANDN_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE2_PANDN_M128_to_XMM( x86SSERegType to, uptr from ); -void SSE2_PXOR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE2_PXOR_M128_to_XMM( x86SSERegType to, uptr from ); -void SSE2_PADDW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); -void SSE2_PADDW_M128_to_XMM(x86SSERegType to, uptr from ); -void SSE2_PADDUSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE2_PADDUSB_M128_to_XMM( x86SSERegType to, uptr from ); -void SSE2_PADDUSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE2_PADDUSW_M128_to_XMM( x86SSERegType to, uptr from ); -void SSE2_PADDB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); -void SSE2_PADDB_M128_to_XMM(x86SSERegType to, uptr from ); -void SSE2_PADDD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); -void SSE2_PADDD_M128_to_XMM(x86SSERegType to, uptr from ); -void SSE2_PADDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); -void SSE2_PADDQ_M128_to_XMM(x86SSERegType to, uptr from ); - -//**********************************************************************************/ -//PACKSSWB,PACKSSDW: Pack Saturate Signed Word -//********************************************************************************** -void SSE2_PACKSSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -void SSE2_PACKSSWB_M128_to_XMM(x86SSERegType to, uptr from); -void SSE2_PACKSSDW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -void SSE2_PACKSSDW_M128_to_XMM(x86SSERegType to, uptr from); - -void SSE2_PACKUSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -void SSE2_PACKUSWB_M128_to_XMM(x86SSERegType to, uptr from); - -//**********************************************************************************/ -//PUNPCKHWD: Unpack 16bit high -//********************************************************************************** -void SSE2_PUNPCKLBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -void SSE2_PUNPCKLBW_M128_to_XMM(x86SSERegType to, uptr from); -void SSE2_PUNPCKHBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -void SSE2_PUNPCKHBW_M128_to_XMM(x86SSERegType to, uptr from); - -void SSE2_PUNPCKLWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -void SSE2_PUNPCKLWD_M128_to_XMM(x86SSERegType to, uptr from); -void SSE2_PUNPCKHWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -void SSE2_PUNPCKHWD_M128_to_XMM(x86SSERegType to, uptr from); - -void SSE2_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -void SSE2_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from); -void SSE2_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -void SSE2_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from); - -void SSE2_PUNPCKLQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -void SSE2_PUNPCKLQDQ_M128_to_XMM(x86SSERegType to, uptr from); - -void SSE2_PUNPCKHQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -void SSE2_PUNPCKHQDQ_M128_to_XMM(x86SSERegType to, uptr from); - -// mult by half words -void SSE2_PMULLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -void SSE2_PMULLW_M128_to_XMM(x86SSERegType to, uptr from); -void SSE2_PMULHW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -void SSE2_PMULHW_M128_to_XMM(x86SSERegType to, uptr from); - -void SSE2_PMULUDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -void SSE2_PMULUDQ_M128_to_XMM(x86SSERegType to, uptr from); - - -//**********************************************************************************/ -//PMOVMSKB: Create 16bit mask from signs of 8bit integers -//********************************************************************************** -void SSE2_PMOVMSKB_XMM_to_R32(x86IntRegType to, x86SSERegType from); - -void SSE_MOVMSKPS_XMM_to_R32(x86IntRegType to, x86SSERegType from); -void SSE2_MOVMSKPD_XMM_to_R32(x86IntRegType to, x86SSERegType from); - -//**********************************************************************************/ -//PEXTRW,PINSRW: Packed Extract/Insert Word * -//********************************************************************************** -void SSE_PEXTRW_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8 ); -void SSE_PINSRW_R32_to_XMM(x86SSERegType from, x86IntRegType to, u8 imm8 ); - - -//**********************************************************************************/ -//PSUBx: Subtract Packed Integers * -//********************************************************************************** -void SSE2_PSUBB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); -void SSE2_PSUBB_M128_to_XMM(x86SSERegType to, uptr from ); -void SSE2_PSUBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); -void SSE2_PSUBW_M128_to_XMM(x86SSERegType to, uptr from ); -void SSE2_PSUBD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); -void SSE2_PSUBD_M128_to_XMM(x86SSERegType to, uptr from ); -void SSE2_PSUBQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); -void SSE2_PSUBQ_M128_to_XMM(x86SSERegType to, uptr from ); -/////////////////////////////////////////////////////////////////////////////////////// -//**********************************************************************************/ -//PCMPxx: Compare Packed Integers * -//********************************************************************************** -void SSE2_PCMPGTB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); -void SSE2_PCMPGTB_M128_to_XMM(x86SSERegType to, uptr from ); -void SSE2_PCMPGTW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); -void SSE2_PCMPGTW_M128_to_XMM(x86SSERegType to, uptr from ); -void SSE2_PCMPGTD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); -void SSE2_PCMPGTD_M128_to_XMM(x86SSERegType to, uptr from ); -void SSE2_PCMPEQB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); -void SSE2_PCMPEQB_M128_to_XMM(x86SSERegType to, uptr from ); -void SSE2_PCMPEQW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); -void SSE2_PCMPEQW_M128_to_XMM(x86SSERegType to, uptr from ); -void SSE2_PCMPEQD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); -void SSE2_PCMPEQD_M128_to_XMM(x86SSERegType to, uptr from ); -//**********************************************************************************/ -//MOVD: Move Dword(32bit) to /from XMM reg * -//********************************************************************************** -void SSE2_MOVD_M32_to_XMM( x86SSERegType to, uptr from ); -void SSE2_MOVD_R_to_XMM( x86SSERegType to, x86IntRegType from ); -void SSE2_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from ); -void SSE2_MOVD_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset ); -void SSE2_MOVD_XMM_to_M32( u32 to, x86SSERegType from ); -void SSE2_MOVD_XMM_to_R( x86IntRegType to, x86SSERegType from ); -void SSE2_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from ); -void SSE2_MOVD_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset ); - -#ifdef __x86_64__ -void SSE2_MOVQ_XMM_to_R( x86IntRegType to, x86SSERegType from ); -void SSE2_MOVQ_R_to_XMM( x86SSERegType to, x86IntRegType from ); -#endif - -//**********************************************************************************/ -//POR : SSE Bitwise OR * -//********************************************************************************** -void SSE2_POR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE2_POR_M128_to_XMM( x86SSERegType to, uptr from ); - -void SSE3_HADDPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -void SSE3_HADDPS_M128_to_XMM(x86SSERegType to, uptr from); - -void SSE3_MOVSLDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -void SSE3_MOVSLDUP_M128_to_XMM(x86SSERegType to, uptr from); -void SSE3_MOVSHDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -void SSE3_MOVSHDUP_M128_to_XMM(x86SSERegType to, uptr from); -//********************* -// SSE-X - uses both SSE,SSE2 code and tries to keep consistensies between the data -// Uses g_xmmtypes to infer the correct type. -//********************* -void SSEX_MOVDQA_M128_to_XMM( x86SSERegType to, uptr from ); -void SSEX_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from ); -void SSEX_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); - -void SSEX_MOVDQARmtoROffset( x86SSERegType to, x86IntRegType from, int offset ); -void SSEX_MOVDQARtoRmOffset( x86IntRegType to, x86SSERegType from, int offset ); - -void SSEX_MOVDQU_M128_to_XMM( x86SSERegType to, uptr from ); -void SSEX_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from ); -void SSEX_MOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); - -void SSEX_MOVD_M32_to_XMM( x86SSERegType to, uptr from ); -void SSEX_MOVD_XMM_to_M32( u32 to, x86SSERegType from ); -void SSEX_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from ); -void SSEX_MOVD_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset ); -void SSEX_MOVD_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset ); - -void SSEX_POR_M128_to_XMM( x86SSERegType to, uptr from ); -void SSEX_POR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSEX_PXOR_M128_to_XMM( x86SSERegType to, uptr from ); -void SSEX_PXOR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSEX_PAND_M128_to_XMM( x86SSERegType to, uptr from ); -void SSEX_PAND_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSEX_PANDN_M128_to_XMM( x86SSERegType to, uptr from ); -void SSEX_PANDN_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); - -void SSEX_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -void SSEX_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from); -void SSEX_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); -void SSEX_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from); - -void SSEX_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); - -//********************* -// 3DNOW instructions * -//********************* -void FEMMS( void ); -void PFCMPEQMtoR( x86IntRegType to, uptr from ); -void PFCMPGTMtoR( x86IntRegType to, uptr from ); -void PFCMPGEMtoR( x86IntRegType to, uptr from ); -void PFADDMtoR( x86IntRegType to, uptr from ); -void PFADDRtoR( x86IntRegType to, x86IntRegType from ); -void PFSUBMtoR( x86IntRegType to, uptr from ); -void PFSUBRtoR( x86IntRegType to, x86IntRegType from ); -void PFMULMtoR( x86IntRegType to, uptr from ); -void PFMULRtoR( x86IntRegType to, x86IntRegType from ); -void PFRCPMtoR( x86IntRegType to, uptr from ); -void PFRCPRtoR( x86IntRegType to, x86IntRegType from ); -void PFRCPIT1RtoR( x86IntRegType to, x86IntRegType from ); -void PFRCPIT2RtoR( x86IntRegType to, x86IntRegType from ); -void PFRSQRTRtoR( x86IntRegType to, x86IntRegType from ); -void PFRSQIT1RtoR( x86IntRegType to, x86IntRegType from ); -void PF2IDMtoR( x86IntRegType to, uptr from ); -void PF2IDRtoR( x86IntRegType to, x86IntRegType from ); -void PI2FDMtoR( x86IntRegType to, uptr from ); -void PI2FDRtoR( x86IntRegType to, x86IntRegType from ); -void PFMAXMtoR( x86IntRegType to, uptr from ); -void PFMAXRtoR( x86IntRegType to, x86IntRegType from ); -void PFMINMtoR( x86IntRegType to, uptr from ); -void PFMINRtoR( x86IntRegType to, x86IntRegType from ); - -void SSE2EMU_MOVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from); -void SSE2EMU_MOVQ_M64_to_XMM( x86SSERegType to, uptr from); -void SSE2EMU_MOVQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from); -void SSE2EMU_MOVD_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset ); -void SSE2EMU_MOVD_XMM_to_RmOffset(x86IntRegType to, x86SSERegType from, int offset ); - -#ifndef __x86_64__ -void SSE2EMU_MOVDQ2Q_XMM_to_MM( x86MMXRegType to, x86SSERegType from); -void SSE2EMU_MOVQ2DQ_MM_to_XMM( x86SSERegType to, x86MMXRegType from); -#endif - -/* SSE2 emulated functions for SSE CPU's by kekko*/ - -void SSE2EMU_PSHUFD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ); -void SSE2EMU_MOVD_XMM_to_R( x86IntRegType to, x86SSERegType from ); -void SSE2EMU_CVTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); -void SSE2EMU_CVTDQ2PS_M128_to_XMM( x86SSERegType to, uptr from ); -void SSE2EMU_MOVD_XMM_to_M32( u32 to, x86SSERegType from ); -void SSE2EMU_MOVD_R_to_XMM( x86SSERegType to, x86IntRegType from ); - -//////////////////////////////////////////////////// -#ifdef _DEBUG -#define WRITECHECK() CheckX86Ptr() -#else -#define WRITECHECK() -#endif - -#define write8(val ) { \ - *(u8*)x86Ptr = (u8)val; \ - x86Ptr++; \ -} \ - -#define write16(val ) \ -{ \ - *(u16*)x86Ptr = (u16)val; \ - x86Ptr += 2; \ -} \ - -#define write32( val ) \ -{ \ - *(u32*)x86Ptr = val; \ - x86Ptr += 4; \ -} \ - -#ifdef __cplusplus -} -#endif - -#endif // __IX86_H__ +/* + * ix86 definitions v0.6.2 + * Authors: linuzappz + * alexey silinov + * goldfinger + * shadow < shadow@pcsx2.net > + */ + +#ifndef __IX86_H__ +#define __IX86_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "PS2Etypes.h" // Basic types header + +#ifdef __x86_64__ +#define XMMREGS 16 +#define X86REGS 16 +#else +#define XMMREGS 8 +#define X86REGS 8 +#endif + +#define MMXREGS 8 + +#define SIB 4 +#define DISP32 5 + +// general types +typedef int x86IntRegType; +#define EAX 0 +#define EBX 3 +#define ECX 1 +#define EDX 2 +#define ESI 6 +#define EDI 7 +#define EBP 5 +#define ESP 4 + +#ifdef __x86_64__ +#define RAX 0 +#define RBX 3 +#define RCX 1 +#define RDX 2 +#define RSI 6 +#define RDI 7 +#define RBP 5 +#define RSP 4 +#define R8 8 +#define R9 9 +#define R10 10 +#define R11 11 +#define R12 12 +#define R13 13 +#define R14 14 +#define R15 15 + +#define X86_TEMP RAX // don't allocate anything + +#ifdef _MSC_VER +extern x86IntRegType g_x86savedregs[8]; +extern x86IntRegType g_x86tempregs[6]; +#else +extern x86IntRegType g_x86savedregs[6]; +extern x86IntRegType g_x86tempregs[8]; +#endif + +extern x86IntRegType g_x86allregs[14]; // all registers that can be used by the recs +extern x86IntRegType g_x868bitregs[11]; +extern x86IntRegType g_x86non8bitregs[3]; + +#ifdef _MSC_VER +#define X86ARG1 RCX +#define X86ARG2 RDX +#define X86ARG3 R8 +#define X86ARG4 R9 +#else +#define X86ARG1 RDI +#define X86ARG2 RSI +#define X86ARG3 RDX +#define X86ARG4 RCX +#endif + +#else + +#define X86ARG1 EAX +#define X86ARG2 ECX +#define X86ARG3 EDX +#define X86ARG4 EBX + +#endif // __x86_64__ + +#define MM0 0 +#define MM1 1 +#define MM2 2 +#define MM3 3 +#define MM4 4 +#define MM5 5 +#define MM6 6 +#define MM7 7 + +typedef int x86MMXRegType; + +#define XMM0 0 +#define XMM1 1 +#define XMM2 2 +#define XMM3 3 +#define XMM4 4 +#define XMM5 5 +#define XMM6 6 +#define XMM7 7 +#define XMM8 8 +#define XMM9 9 +#define XMM10 10 +#define XMM11 11 +#define XMM12 12 +#define XMM13 13 +#define XMM14 14 +#define XMM15 15 + +typedef int x86SSERegType; + +typedef enum +{ + XMMT_INT = 0, // integer (sse2 only) + XMMT_FPS = 1, // floating point + //XMMT_FPD = 3, // double +} XMMSSEType; + +extern XMMSSEType g_xmmtypes[XMMREGS]; + +void cpudetectInit( void );//this is all that needs to be called and will fill up the below structs + +//cpu capabilities structure +typedef struct { + u32 hasFloatingPointUnit; + u32 hasVirtual8086ModeEnhancements; + u32 hasDebuggingExtensions; + u32 hasPageSizeExtensions; + u32 hasTimeStampCounter; + u32 hasModelSpecificRegisters; + u32 hasPhysicalAddressExtension; + u32 hasCOMPXCHG8BInstruction; + u32 hasAdvancedProgrammableInterruptController; + u32 hasSEPFastSystemCall; + u32 hasMemoryTypeRangeRegisters; + u32 hasPTEGlobalFlag; + u32 hasMachineCheckArchitecture; + u32 hasConditionalMoveAndCompareInstructions; + u32 hasFGPageAttributeTable; + u32 has36bitPageSizeExtension; + u32 hasProcessorSerialNumber; + u32 hasCFLUSHInstruction; + u32 hasDebugStore; + u32 hasACPIThermalMonitorAndClockControl; + u32 hasMultimediaExtensions; + u32 hasFastStreamingSIMDExtensionsSaveRestore; + u32 hasStreamingSIMDExtensions; + u32 hasStreamingSIMD2Extensions; + u32 hasSelfSnoop; + u32 hasHyperThreading; + u32 hasThermalMonitor; + u32 hasIntel64BitArchitecture; + u32 hasStreamingSIMD3Extensions; + u32 hasStreamingSIMD4Extensions; + //that is only for AMDs + u32 hasMultimediaExtensionsExt; + u32 hasAMD64BitArchitecture; + u32 has3DNOWInstructionExtensionsExt; + u32 has3DNOWInstructionExtensions; +} CAPABILITIES; + +extern CAPABILITIES cpucaps; + +typedef struct { + + u32 x86Family; // Processor Family + u32 x86Model; // Processor Model + u32 x86PType; // Processor Type + u32 x86StepID; // Stepping ID + u32 x86Flags; // Feature Flags + u32 x86Flags2; // More Feature Flags + u32 x86EFlags; // Extended Feature Flags + //all the above returns hex values + s8 x86ID[16]; // Vendor ID //the vendor creator (in %s) + s8 x86Type[20]; //cpu type in char format //the cpu type (in %s) + s8 x86Fam[50]; // family in char format //the original cpu name string (in %s) + u32 cpuspeed; // speed of cpu //this will give cpu speed (in %d) +} CPUINFO; + +extern CPUINFO cpuinfo; + +extern s8 *x86Ptr; +extern u8 *j8Ptr[32]; +extern u32 *j32Ptr[32]; + + +#ifdef __x86_64__ +#define X86_64ASSERT() assert(0) +#define MEMADDR(addr, oplen) ((addr) - ((u64)x86Ptr + ((u64)oplen))) +#else +#define X86_64ASSERT() +#define MEMADDR(addr, oplen) (addr) +#endif + +#ifdef __x86_64__ +#define Rex( w, r, x, b ) write8( 0x40 | ((w) << 3) | ((r) << 2) | ((x) << 1) | (b) ); +#define RexR(w, reg) if( w||(reg)>=8 ) { Rex(w, (reg)>=8, 0, 0); } +#define RexB(w, base) if( w||(base)>=8 ) { Rex(w, 0, 0, (base)>=8); } +#define RexRB(w, reg, base) if( w || (reg) >= 8 || (base)>=8 ) { Rex(w, (reg)>=8, 0, (base)>=8); } +#define RexRXB(w, reg, index, base) if( w||(reg) >= 8 || (index) >= 8 || (base) >= 8 ) { \ + Rex(w, (reg)>=8, (index)>=8, (base)>=8); \ + } +#else +#define Rex(w,r,x,b) assert(0); +#define RexR(w, reg) if( w||(reg)>=8 ) assert(0); +#define RexB(w, base) if( w||(base)>=8 ) assert(0); +#define RexRB(w, reg, base) if( w||(reg) >= 8 || (base)>=8 ) assert(0); +#define RexRXB(w, reg, index, base) if( w||(reg) >= 8 || (index) >= 8 || (base) >= 8 ) assert(0); +#endif + +void write8( int val ); +void write16( int val ); +void write32( u32 val ); +void write64( u64 val ); + + +void x86SetPtr( char *ptr ); +void x86Shutdown( void ); + +void x86SetJ8( u8 *j8 ); +void x86SetJ8A( u8 *j8 ); +void x86SetJ16( u16 *j16 ); +void x86SetJ16A( u16 *j16 ); +void x86SetJ32( u32 *j32 ); +void x86SetJ32A( u32 *j32 ); + +void x86Align( int bytes ); +u64 GetCPUTick( void ); + +// General Helper functions +void ModRM( int mod, int rm, int reg ); +void SibSB( int ss, int rm, int index ); +void SET8R( int cc, int to ); +u8* J8Rel( int cc, int to ); +u32* J32Rel( int cc, u32 to ); +void CMOV32RtoR( int cc, int to, int from ); +void CMOV32MtoR( int cc, int to, uptr from ); + +//****************** +// IX86 intructions +//****************** + +// +// * scale values: +// * 0 - *1 +// * 1 - *2 +// * 2 - *4 +// * 3 - *8 +// + +void STC( void ); +void CLC( void ); + +//////////////////////////////////// +// mov instructions // +//////////////////////////////////// + +// mov r64 to r64 +void MOV64RtoR( x86IntRegType to, x86IntRegType from ); +// mov r64 to m64 +void MOV64RtoM( uptr to, x86IntRegType from ); +// mov m64 to r64 +void MOV64MtoR( x86IntRegType to, uptr from ); +// mov sign ext imm32 to m64 +void MOV64I32toM( uptr to, u32 from ); +// mov sign ext imm32 to r64 +void MOV64I32toR( x86IntRegType to, s32 from); +// mov imm64 to r64 +void MOV64ItoR( x86IntRegType to, u64 from); +// mov imm64 to [r64+off] +void MOV64ItoRmOffset( x86IntRegType to, u32 from, int offset); +// mov [r64+offset] to r64 +void MOV64RmOffsettoR( x86IntRegType to, x86IntRegType from, int offset ); +// mov [r64][r64*scale] to r64 +void MOV64RmStoR( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale); +// mov r64 to [r64+offset] +void MOV64RtoRmOffset( x86IntRegType to, x86IntRegType from, int offset ); +// mov r64 to [r64][r64*scale] +void MOV64RtoRmS( x86IntRegType to, x86IntRegType from, x86IntRegType from2, int scale); + +// mov r32 to r32 +void MOV32RtoR( x86IntRegType to, x86IntRegType from ); +// mov r32 to m32 +void MOV32RtoM( uptr to, x86IntRegType from ); +// mov m32 to r32 +void MOV32MtoR( x86IntRegType to, uptr from ); +// mov [r32] to r32 +void MOV32RmtoR( x86IntRegType to, x86IntRegType from ); +void MOV32RmtoROffset( x86IntRegType to, x86IntRegType from, int offset ); +// mov [r32][r32< subtract ST(0) from ST(1), store in ST(1) and POP stack +void FSUBP( void ); +// fmul ST(src) to fpu reg stack ST(0) +void FMUL32Rto0( x86IntRegType src ); +// fmul ST(0) to fpu reg stack ST(src) +void FMUL320toR( x86IntRegType src ); +// fdiv ST(src) to fpu reg stack ST(0) +void FDIV32Rto0( x86IntRegType src ); +// fdiv ST(0) to fpu reg stack ST(src) +void FDIV320toR( x86IntRegType src ); +// fdiv ST(0) to fpu reg stack ST(src), pop stack, store in ST(src) +void FDIV320toRP( x86IntRegType src ); + +// fadd m32 to fpu reg stack +void FADD32( u32 from ); +// fsub m32 to fpu reg stack +void FSUB32( u32 from ); +// fmul m32 to fpu reg stack +void FMUL32( u32 from ); +// fdiv m32 to fpu reg stack +void FDIV32( u32 from ); +// fcomi st, st( i) +void FCOMI( x86IntRegType src ); +// fcomip st, st( i) +void FCOMIP( x86IntRegType src ); +// fucomi st, st( i) +void FUCOMI( x86IntRegType src ); +// fucomip st, st( i) +void FUCOMIP( x86IntRegType src ); +// fcom m32 to fpu reg stack +void FCOM32( u32 from ); +// fabs fpu reg stack +void FABS( void ); +// fsqrt fpu reg stack +void FSQRT( void ); +// ftan fpu reg stack +void FPATAN( void ); +// fsin fpu reg stack +void FSIN( void ); +// fchs fpu reg stack +void FCHS( void ); + +// fcmovb fpu reg to fpu reg stack +void FCMOVB32( x86IntRegType from ); +// fcmove fpu reg to fpu reg stack +void FCMOVE32( x86IntRegType from ); +// fcmovbe fpu reg to fpu reg stack +void FCMOVBE32( x86IntRegType from ); +// fcmovu fpu reg to fpu reg stack +void FCMOVU32( x86IntRegType from ); +// fcmovnb fpu reg to fpu reg stack +void FCMOVNB32( x86IntRegType from ); +// fcmovne fpu reg to fpu reg stack +void FCMOVNE32( x86IntRegType from ); +// fcmovnbe fpu reg to fpu reg stack +void FCMOVNBE32( x86IntRegType from ); +// fcmovnu fpu reg to fpu reg stack +void FCMOVNU32( x86IntRegType from ); +void FCOMP32( u32 from ); +void FNSTSWtoAX( void ); + +// probably a little extreme here, but x86-64 should NOT use MMX +#ifdef __x86_64__ + +#define MMXONLY(code) + +#else + +#define MMXONLY(code) code + +//****************** +// MMX instructions +//****************** + +// r64 = mm + +// movq m64 to r64 +void MOVQMtoR( x86MMXRegType to, uptr from ); +// movq r64 to m64 +void MOVQRtoM( uptr to, x86MMXRegType from ); + +// pand r64 to r64 +void PANDRtoR( x86MMXRegType to, x86MMXRegType from ); +void PANDNRtoR( x86MMXRegType to, x86MMXRegType from ); +// pand m64 to r64 ; +void PANDMtoR( x86MMXRegType to, uptr from ); +// pandn r64 to r64 +void PANDNRtoR( x86MMXRegType to, x86MMXRegType from ); +// pandn r64 to r64 +void PANDNMtoR( x86MMXRegType to, uptr from ); +// por r64 to r64 +void PORRtoR( x86MMXRegType to, x86MMXRegType from ); +// por m64 to r64 +void PORMtoR( x86MMXRegType to, uptr from ); +// pxor r64 to r64 +void PXORRtoR( x86MMXRegType to, x86MMXRegType from ); +// pxor m64 to r64 +void PXORMtoR( x86MMXRegType to, uptr from ); + +// psllq r64 to r64 +void PSLLQRtoR( x86MMXRegType to, x86MMXRegType from ); +// psllq m64 to r64 +void PSLLQMtoR( x86MMXRegType to, uptr from ); +// psllq imm8 to r64 +void PSLLQItoR( x86MMXRegType to, u8 from ); +// psrlq r64 to r64 +void PSRLQRtoR( x86MMXRegType to, x86MMXRegType from ); +// psrlq m64 to r64 +void PSRLQMtoR( x86MMXRegType to, uptr from ); +// psrlq imm8 to r64 +void PSRLQItoR( x86MMXRegType to, u8 from ); + +// paddusb r64 to r64 +void PADDUSBRtoR( x86MMXRegType to, x86MMXRegType from ); +// paddusb m64 to r64 +void PADDUSBMtoR( x86MMXRegType to, uptr from ); +// paddusw r64 to r64 +void PADDUSWRtoR( x86MMXRegType to, x86MMXRegType from ); +// paddusw m64 to r64 +void PADDUSWMtoR( x86MMXRegType to, uptr from ); + +// paddb r64 to r64 +void PADDBRtoR( x86MMXRegType to, x86MMXRegType from ); +// paddb m64 to r64 +void PADDBMtoR( x86MMXRegType to, uptr from ); +// paddw r64 to r64 +void PADDWRtoR( x86MMXRegType to, x86MMXRegType from ); +// paddw m64 to r64 +void PADDWMtoR( x86MMXRegType to, uptr from ); +// paddd r64 to r64 +void PADDDRtoR( x86MMXRegType to, x86MMXRegType from ); +// paddd m64 to r64 +void PADDDMtoR( x86MMXRegType to, uptr from ); +void PADDSBRtoR( x86MMXRegType to, x86MMXRegType from ); +void PADDSWRtoR( x86MMXRegType to, x86MMXRegType from ); + +// paddq m64 to r64 (sse2 only?) +void PADDQMtoR( x86MMXRegType to, uptr from ); +// paddq r64 to r64 (sse2 only?) +void PADDQRtoR( x86MMXRegType to, x86MMXRegType from ); + +void PSUBSBRtoR( x86MMXRegType to, x86MMXRegType from ); +void PSUBSWRtoR( x86MMXRegType to, x86MMXRegType from ); + +void PSUBBRtoR( x86MMXRegType to, x86MMXRegType from ); +void PSUBWRtoR( x86MMXRegType to, x86MMXRegType from ); +void PSUBDRtoR( x86MMXRegType to, x86MMXRegType from ); +void PSUBDMtoR( x86MMXRegType to, uptr from ); + +// psubq m64 to r64 (sse2 only?) +void PSUBQMtoR( x86MMXRegType to, uptr from ); +// psubq r64 to r64 (sse2 only?) +void PSUBQRtoR( x86MMXRegType to, x86MMXRegType from ); + +// pmuludq m64 to r64 (sse2 only?) +void PMULUDQMtoR( x86MMXRegType to, uptr from ); +// pmuludq r64 to r64 (sse2 only?) +void PMULUDQRtoR( x86MMXRegType to, x86MMXRegType from ); + +void PCMPEQBRtoR( x86MMXRegType to, x86MMXRegType from ); +void PCMPEQWRtoR( x86MMXRegType to, x86MMXRegType from ); +void PCMPEQDRtoR( x86MMXRegType to, x86MMXRegType from ); +void PCMPEQDMtoR( x86MMXRegType to, uptr from ); +void PCMPGTBRtoR( x86MMXRegType to, x86MMXRegType from ); +void PCMPGTWRtoR( x86MMXRegType to, x86MMXRegType from ); +void PCMPGTDRtoR( x86MMXRegType to, x86MMXRegType from ); +void PCMPGTDMtoR( x86MMXRegType to, uptr from ); +void PSRLWItoR( x86MMXRegType to, u8 from ); +void PSRLDItoR( x86MMXRegType to, u8 from ); +void PSRLDRtoR( x86MMXRegType to, x86MMXRegType from ); +void PSLLWItoR( x86MMXRegType to, u8 from ); +void PSLLDItoR( x86MMXRegType to, u8 from ); +void PSLLDRtoR( x86MMXRegType to, x86MMXRegType from ); +void PSRAWItoR( x86MMXRegType to, u8 from ); +void PSRADItoR( x86MMXRegType to, u8 from ); +void PSRADRtoR( x86MMXRegType to, x86MMXRegType from ); +void PUNPCKLDQRtoR( x86MMXRegType to, x86MMXRegType from ); +void PUNPCKLDQMtoR( x86MMXRegType to, uptr from ); +void PUNPCKHDQRtoR( x86MMXRegType to, x86MMXRegType from ); +void PUNPCKHDQMtoR( x86MMXRegType to, uptr from ); +void MOVQ64ItoR( x86MMXRegType reg, u64 i ); //Prototype.Todo add all consts to end of block.not after jr $+8 +void MOVQRtoR( x86MMXRegType to, x86MMXRegType from ); +void MOVQRmtoROffset( x86MMXRegType to, x86IntRegType from, u32 offset ); +void MOVQRtoRmOffset( x86IntRegType to, x86MMXRegType from, u32 offset ); +void MOVDMtoMMX( x86MMXRegType to, uptr from ); +void MOVDMMXtoM( uptr to, x86MMXRegType from ); +void MOVD32RtoMMX( x86MMXRegType to, x86IntRegType from ); +void MOVD32RmtoMMX( x86MMXRegType to, x86IntRegType from ); +void MOVD32RmOffsettoMMX( x86MMXRegType to, x86IntRegType from, u32 offset ); +void MOVD32MMXtoR( x86IntRegType to, x86MMXRegType from ); +void MOVD32MMXtoRm( x86IntRegType to, x86MMXRegType from ); +void MOVD32MMXtoRmOffset( x86IntRegType to, x86MMXRegType from, u32 offset ); +void PINSRWRtoMMX( x86MMXRegType to, x86SSERegType from, u8 imm8 ); +void PSHUFWRtoR(x86MMXRegType to, x86MMXRegType from, u8 imm8); +void PSHUFWMtoR(x86MMXRegType to, uptr from, u8 imm8); +void MASKMOVQRtoR(x86MMXRegType to, x86MMXRegType from); + +// emms +void EMMS( void ); + +//**********************************************************************************/ +//PACKSSWB,PACKSSDW: Pack Saturate Signed Word 64bits +//********************************************************************************** +void PACKSSWBMMXtoMMX(x86MMXRegType to, x86MMXRegType from); +void PACKSSDWMMXtoMMX(x86MMXRegType to, x86MMXRegType from); + +void PMOVMSKBMMXtoR(x86IntRegType to, x86MMXRegType from); + +void SSE2_MOVDQ2Q_XMM_to_MM( x86MMXRegType to, x86SSERegType from); +void SSE2_MOVQ2DQ_MM_to_XMM( x86SSERegType to, x86MMXRegType from); + +#endif // !__x86_64__ + +//********************* +// SSE instructions * +//********************* +void SSE_MOVAPS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE_MOVAPS_XMM_to_M128( uptr to, x86SSERegType from ); +void SSE_MOVAPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +void SSE_MOVUPS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE_MOVUPS_XMM_to_M128( uptr to, x86SSERegType from ); + +void SSE_MOVSS_M32_to_XMM( x86SSERegType to, uptr from ); +void SSE_MOVSS_XMM_to_M32( u32 to, x86SSERegType from ); +void SSE_MOVSS_XMM_to_Rm( x86IntRegType to, x86SSERegType from ); +void SSE_MOVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_MOVSS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset ); +void SSE_MOVSS_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset ); + +void SSE2_MOVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +void SSE2_MOVQ_M64_to_XMM( x86SSERegType to, uptr from ); +void SSE2_MOVQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE2_MOVQ_XMM_to_M64( u32 to, x86SSERegType from ); + +void SSE_MASKMOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +void SSE_MOVLPS_M64_to_XMM( x86SSERegType to, uptr from ); +void SSE_MOVLPS_XMM_to_M64( u32 to, x86SSERegType from ); +void SSE_MOVLPS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset ); +void SSE_MOVLPS_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset ); + +void SSE_MOVHPS_M64_to_XMM( x86SSERegType to, uptr from ); +void SSE_MOVHPS_XMM_to_M64( u32 to, x86SSERegType from ); +void SSE_MOVHPS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset ); +void SSE_MOVHPS_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset ); + +void SSE_MOVLHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_MOVLPSRmtoR( x86SSERegType to, x86IntRegType from ); +void SSE_MOVLPSRmtoROffset( x86SSERegType to, x86IntRegType from, int offset ); +void SSE_MOVLPSRtoRm( x86SSERegType to, x86IntRegType from ); +void SSE_MOVLPSRtoRmOffset( x86SSERegType to, x86IntRegType from, int offset ); + +void SSE_MOVAPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ); +void SSE_MOVAPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ); +void SSE_MOVAPSRtoRmOffset( x86IntRegType to, x86SSERegType from, int offset ); +void SSE_MOVAPSRmtoROffset( x86SSERegType to, x86IntRegType from, int offset ); +void SSE_MOVUPSRmStoR( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ); +void SSE_MOVUPSRtoRmS( x86SSERegType to, x86IntRegType from, x86IntRegType from2, int scale ); +void SSE_MOVUPSRtoRm( x86IntRegType to, x86IntRegType from ); +void SSE_MOVUPSRmtoR( x86IntRegType to, x86IntRegType from ); + +void SSE_MOVUPSRmtoROffset( x86SSERegType to, x86IntRegType from, int offset ); +void SSE_MOVUPSRtoRmOffset( x86SSERegType to, x86IntRegType from, int offset ); + +void SSE2_MOVDQARtoRmOffset( x86IntRegType to, x86SSERegType from, int offset ); +void SSE2_MOVDQARmtoROffset( x86SSERegType to, x86IntRegType from, int offset ); + +void SSE_RCPPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_RCPPS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE_RCPSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_RCPSS_M32_to_XMM( x86SSERegType to, uptr from ); + +void SSE_ORPS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE_ORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_XORPS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE_XORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_ANDPS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE_ANDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_ANDNPS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE_ANDNPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_ADDPS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE_ADDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_ADDSS_M32_to_XMM( x86SSERegType to, uptr from ); +void SSE_ADDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_SUBPS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE_SUBPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_SUBSS_M32_to_XMM( x86SSERegType to, uptr from ); +void SSE_SUBSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_MULPS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE_MULPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_MULSS_M32_to_XMM( x86SSERegType to, uptr from ); +void SSE_MULSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_CMPEQSS_M32_to_XMM( x86SSERegType to, uptr from ); +void SSE_CMPEQSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_CMPLTSS_M32_to_XMM( x86SSERegType to, uptr from ); +void SSE_CMPLTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_CMPLESS_M32_to_XMM( x86SSERegType to, uptr from ); +void SSE_CMPLESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_CMPUNORDSS_M32_to_XMM( x86SSERegType to, uptr from ); +void SSE_CMPUNORDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_CMPNESS_M32_to_XMM( x86SSERegType to, uptr from ); +void SSE_CMPNESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_CMPNLTSS_M32_to_XMM( x86SSERegType to, uptr from ); +void SSE_CMPNLTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_CMPNLESS_M32_to_XMM( x86SSERegType to, uptr from ); +void SSE_CMPNLESS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_CMPORDSS_M32_to_XMM( x86SSERegType to, uptr from ); +void SSE_CMPORDSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +void SSE_UCOMISS_M32_to_XMM( x86SSERegType to, uptr from ); +void SSE_UCOMISS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +#ifndef __x86_64__ +void SSE_PMAXSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ); +void SSE_PMINSW_MM_to_MM( x86MMXRegType to, x86MMXRegType from ); +void SSE_CVTPI2PS_MM_to_XMM( x86SSERegType to, x86MMXRegType from ); +void SSE_CVTPS2PI_M64_to_MM( x86MMXRegType to, uptr from ); +void SSE_CVTPS2PI_XMM_to_MM( x86MMXRegType to, x86SSERegType from ); +#endif +void SSE_CVTPI2PS_M64_to_XMM( x86SSERegType to, uptr from ); +void SSE_CVTTSS2SI_M32_to_R32(x86IntRegType to, uptr from); +void SSE_CVTTSS2SI_XMM_to_R32(x86IntRegType to, x86SSERegType from); +void SSE_CVTSI2SS_M32_to_XMM(x86SSERegType to, uptr from); +void SSE_CVTSI2SS_R_to_XMM(x86SSERegType to, x86IntRegType from); + +void SSE2_CVTDQ2PS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE2_CVTDQ2PS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE2_CVTPS2DQ_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE2_CVTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE2_CVTTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +void SSE_MAXPS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE_MAXPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_MAXSS_M32_to_XMM( x86SSERegType to, uptr from ); +void SSE_MAXSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_MINPS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE_MINPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_MINSS_M32_to_XMM( x86SSERegType to, uptr from ); +void SSE_MINSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_RSQRTPS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE_RSQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_RSQRTSS_M32_to_XMM( x86SSERegType to, uptr from ); +void SSE_RSQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_SQRTPS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE_SQRTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_SQRTSS_M32_to_XMM( x86SSERegType to, uptr from ); +void SSE_SQRTSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_UNPCKLPS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE_UNPCKLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_UNPCKHPS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE_UNPCKHPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_SHUFPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ); +void SSE_SHUFPS_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ); +void SSE_SHUFPS_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset, u8 imm8 ); +void SSE_CMPEQPS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE_CMPEQPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_CMPLTPS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE_CMPLTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_CMPLEPS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE_CMPLEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_CMPUNORDPS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE_CMPUNORDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_CMPNEPS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE_CMPNEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_CMPNLTPS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE_CMPNLTPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_CMPNLEPS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE_CMPNLEPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_CMPORDPS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE_CMPORDPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_DIVPS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE_DIVPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE_DIVSS_M32_to_XMM( x86SSERegType to, uptr from ); +void SSE_DIVSS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +// VectorPath +void SSE2_PSHUFD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ); +void SSE2_PSHUFD_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ); + +void SSE2_PSHUFLW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ); +void SSE2_PSHUFLW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ); +void SSE2_PSHUFHW_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ); +void SSE2_PSHUFHW_M128_to_XMM( x86SSERegType to, uptr from, u8 imm8 ); + +void SSE_STMXCSR( uptr from ); +void SSE_LDMXCSR( uptr from ); + + +//********************* +// SSE 2 Instructions* +//********************* +void SSE2_MOVDQA_M128_to_XMM(x86SSERegType to, uptr from); +void SSE2_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from); +void SSE2_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from); + +void SSE2_MOVDQU_M128_to_XMM(x86SSERegType to, uptr from); +void SSE2_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from); +void SSE2_MOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from); + +void SSE2_PSRLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSE2_PSRLW_M128_to_XMM(x86SSERegType to, uptr from); +void SSE2_PSRLW_I8_to_XMM(x86SSERegType to, u8 imm8); +void SSE2_PSRLD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSE2_PSRLD_M128_to_XMM(x86SSERegType to, uptr from); +void SSE2_PSRLD_I8_to_XMM(x86SSERegType to, u8 imm8); +void SSE2_PSRLQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSE2_PSRLQ_M128_to_XMM(x86SSERegType to, uptr from); +void SSE2_PSRLQ_I8_to_XMM(x86SSERegType to, u8 imm8); +void SSE2_PSRLDQ_I8_to_XMM(x86SSERegType to, u8 imm8); +void SSE2_PSRAW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSE2_PSRAW_M128_to_XMM(x86SSERegType to, uptr from); +void SSE2_PSRAW_I8_to_XMM(x86SSERegType to, u8 imm8); +void SSE2_PSRAD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSE2_PSRAD_M128_to_XMM(x86SSERegType to, uptr from); +void SSE2_PSRAD_I8_to_XMM(x86SSERegType to, u8 imm8); +void SSE2_PSLLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSE2_PSLLW_M128_to_XMM(x86SSERegType to, uptr from); +void SSE2_PSLLW_I8_to_XMM(x86SSERegType to, u8 imm8); +void SSE2_PSLLD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSE2_PSLLD_M128_to_XMM(x86SSERegType to, uptr from); +void SSE2_PSLLD_I8_to_XMM(x86SSERegType to, u8 imm8); +void SSE2_PSLLQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSE2_PSLLQ_M128_to_XMM(x86SSERegType to, uptr from); +void SSE2_PSLLQ_I8_to_XMM(x86SSERegType to, u8 imm8); +void SSE2_PSLLDQ_I8_to_XMM(x86SSERegType to, u8 imm8); +void SSE2_PMAXSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE2_PMAXSW_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE2_PMAXUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE2_PMAXUB_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE2_PMINSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE2_PMINSW_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE2_PMINUB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE2_PMINUB_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE2_PADDSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE2_PADDSB_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE2_PADDSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE2_PADDSW_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE2_PSUBSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE2_PSUBSB_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE2_PSUBSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE2_PSUBSW_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE2_PSUBUSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE2_PSUBUSB_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE2_PSUBUSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE2_PSUBUSW_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE2_PAND_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE2_PAND_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE2_PANDN_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE2_PANDN_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE2_PXOR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE2_PXOR_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE2_PADDW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +void SSE2_PADDW_M128_to_XMM(x86SSERegType to, uptr from ); +void SSE2_PADDUSB_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE2_PADDUSB_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE2_PADDUSW_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE2_PADDUSW_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE2_PADDB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +void SSE2_PADDB_M128_to_XMM(x86SSERegType to, uptr from ); +void SSE2_PADDD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +void SSE2_PADDD_M128_to_XMM(x86SSERegType to, uptr from ); +void SSE2_PADDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +void SSE2_PADDQ_M128_to_XMM(x86SSERegType to, uptr from ); + +//**********************************************************************************/ +//PACKSSWB,PACKSSDW: Pack Saturate Signed Word +//********************************************************************************** +void SSE2_PACKSSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSE2_PACKSSWB_M128_to_XMM(x86SSERegType to, uptr from); +void SSE2_PACKSSDW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSE2_PACKSSDW_M128_to_XMM(x86SSERegType to, uptr from); + +void SSE2_PACKUSWB_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSE2_PACKUSWB_M128_to_XMM(x86SSERegType to, uptr from); + +//**********************************************************************************/ +//PUNPCKHWD: Unpack 16bit high +//********************************************************************************** +void SSE2_PUNPCKLBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSE2_PUNPCKLBW_M128_to_XMM(x86SSERegType to, uptr from); +void SSE2_PUNPCKHBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSE2_PUNPCKHBW_M128_to_XMM(x86SSERegType to, uptr from); + +void SSE2_PUNPCKLWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSE2_PUNPCKLWD_M128_to_XMM(x86SSERegType to, uptr from); +void SSE2_PUNPCKHWD_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSE2_PUNPCKHWD_M128_to_XMM(x86SSERegType to, uptr from); + +void SSE2_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSE2_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from); +void SSE2_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSE2_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from); + +void SSE2_PUNPCKLQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSE2_PUNPCKLQDQ_M128_to_XMM(x86SSERegType to, uptr from); + +void SSE2_PUNPCKHQDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSE2_PUNPCKHQDQ_M128_to_XMM(x86SSERegType to, uptr from); + +// mult by half words +void SSE2_PMULLW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSE2_PMULLW_M128_to_XMM(x86SSERegType to, uptr from); +void SSE2_PMULHW_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSE2_PMULHW_M128_to_XMM(x86SSERegType to, uptr from); + +void SSE2_PMULUDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSE2_PMULUDQ_M128_to_XMM(x86SSERegType to, uptr from); + + +//**********************************************************************************/ +//PMOVMSKB: Create 16bit mask from signs of 8bit integers +//********************************************************************************** +void SSE2_PMOVMSKB_XMM_to_R32(x86IntRegType to, x86SSERegType from); + +void SSE_MOVMSKPS_XMM_to_R32(x86IntRegType to, x86SSERegType from); +void SSE2_MOVMSKPD_XMM_to_R32(x86IntRegType to, x86SSERegType from); + +//**********************************************************************************/ +//PEXTRW,PINSRW: Packed Extract/Insert Word * +//********************************************************************************** +void SSE_PEXTRW_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8 ); +void SSE_PINSRW_R32_to_XMM(x86SSERegType from, x86IntRegType to, u8 imm8 ); + + +//**********************************************************************************/ +//PSUBx: Subtract Packed Integers * +//********************************************************************************** +void SSE2_PSUBB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +void SSE2_PSUBB_M128_to_XMM(x86SSERegType to, uptr from ); +void SSE2_PSUBW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +void SSE2_PSUBW_M128_to_XMM(x86SSERegType to, uptr from ); +void SSE2_PSUBD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +void SSE2_PSUBD_M128_to_XMM(x86SSERegType to, uptr from ); +void SSE2_PSUBQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +void SSE2_PSUBQ_M128_to_XMM(x86SSERegType to, uptr from ); +/////////////////////////////////////////////////////////////////////////////////////// +//**********************************************************************************/ +//PCMPxx: Compare Packed Integers * +//********************************************************************************** +void SSE2_PCMPGTB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +void SSE2_PCMPGTB_M128_to_XMM(x86SSERegType to, uptr from ); +void SSE2_PCMPGTW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +void SSE2_PCMPGTW_M128_to_XMM(x86SSERegType to, uptr from ); +void SSE2_PCMPGTD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +void SSE2_PCMPGTD_M128_to_XMM(x86SSERegType to, uptr from ); +void SSE2_PCMPEQB_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +void SSE2_PCMPEQB_M128_to_XMM(x86SSERegType to, uptr from ); +void SSE2_PCMPEQW_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +void SSE2_PCMPEQW_M128_to_XMM(x86SSERegType to, uptr from ); +void SSE2_PCMPEQD_XMM_to_XMM(x86SSERegType to, x86SSERegType from ); +void SSE2_PCMPEQD_M128_to_XMM(x86SSERegType to, uptr from ); +//**********************************************************************************/ +//MOVD: Move Dword(32bit) to /from XMM reg * +//********************************************************************************** +void SSE2_MOVD_M32_to_XMM( x86SSERegType to, uptr from ); +void SSE2_MOVD_R_to_XMM( x86SSERegType to, x86IntRegType from ); +void SSE2_MOVD_Rm_to_XMM( x86SSERegType to, x86IntRegType from ); +void SSE2_MOVD_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset ); +void SSE2_MOVD_XMM_to_M32( u32 to, x86SSERegType from ); +void SSE2_MOVD_XMM_to_R( x86IntRegType to, x86SSERegType from ); +void SSE2_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from ); +void SSE2_MOVD_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset ); + +#ifdef __x86_64__ +void SSE2_MOVQ_XMM_to_R( x86IntRegType to, x86SSERegType from ); +void SSE2_MOVQ_R_to_XMM( x86SSERegType to, x86IntRegType from ); +#endif + +//**********************************************************************************/ +//POR : SSE Bitwise OR * +//********************************************************************************** +void SSE2_POR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE2_POR_M128_to_XMM( x86SSERegType to, uptr from ); + +void SSE3_HADDPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSE3_HADDPS_M128_to_XMM(x86SSERegType to, uptr from); + +void SSE3_MOVSLDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSE3_MOVSLDUP_M128_to_XMM(x86SSERegType to, uptr from); +void SSE3_MOVSHDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSE3_MOVSHDUP_M128_to_XMM(x86SSERegType to, uptr from); + +// SSE4.1 + +#ifndef _MM_MK_INSERTPS_NDX +#define _MM_MK_INSERTPS_NDX(srcField, dstField, zeroMask) (((srcField)<<6) | ((dstField)<<4) | (zeroMask)) +#endif + +void SSE4_DPPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8); +void SSE4_DPPS_M128_to_XMM(x86SSERegType to, uptr from, u8 imm8); +void SSE4_INSERTPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8); +void SSE4_EXTRACTPS_XMM_to_R32(x86SSERegType to, x86IntRegType from, u8 imm8); + +//********************* +// SSE-X - uses both SSE,SSE2 code and tries to keep consistensies between the data +// Uses g_xmmtypes to infer the correct type. +//********************* +void SSEX_MOVDQA_M128_to_XMM( x86SSERegType to, uptr from ); +void SSEX_MOVDQA_XMM_to_M128( uptr to, x86SSERegType from ); +void SSEX_MOVDQA_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +void SSEX_MOVDQARmtoROffset( x86SSERegType to, x86IntRegType from, int offset ); +void SSEX_MOVDQARtoRmOffset( x86IntRegType to, x86SSERegType from, int offset ); + +void SSEX_MOVDQU_M128_to_XMM( x86SSERegType to, uptr from ); +void SSEX_MOVDQU_XMM_to_M128( uptr to, x86SSERegType from ); +void SSEX_MOVDQU_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +void SSEX_MOVD_M32_to_XMM( x86SSERegType to, uptr from ); +void SSEX_MOVD_XMM_to_M32( u32 to, x86SSERegType from ); +void SSEX_MOVD_XMM_to_Rm( x86IntRegType to, x86SSERegType from ); +void SSEX_MOVD_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset ); +void SSEX_MOVD_XMM_to_RmOffset( x86IntRegType to, x86SSERegType from, int offset ); + +void SSEX_POR_M128_to_XMM( x86SSERegType to, uptr from ); +void SSEX_POR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSEX_PXOR_M128_to_XMM( x86SSERegType to, uptr from ); +void SSEX_PXOR_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSEX_PAND_M128_to_XMM( x86SSERegType to, uptr from ); +void SSEX_PAND_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSEX_PANDN_M128_to_XMM( x86SSERegType to, uptr from ); +void SSEX_PANDN_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +void SSEX_PUNPCKLDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSEX_PUNPCKLDQ_M128_to_XMM(x86SSERegType to, uptr from); +void SSEX_PUNPCKHDQ_XMM_to_XMM(x86SSERegType to, x86SSERegType from); +void SSEX_PUNPCKHDQ_M128_to_XMM(x86SSERegType to, uptr from); + +void SSEX_MOVHLPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); + +//********************* +// 3DNOW instructions * +//********************* +void FEMMS( void ); +void PFCMPEQMtoR( x86IntRegType to, uptr from ); +void PFCMPGTMtoR( x86IntRegType to, uptr from ); +void PFCMPGEMtoR( x86IntRegType to, uptr from ); +void PFADDMtoR( x86IntRegType to, uptr from ); +void PFADDRtoR( x86IntRegType to, x86IntRegType from ); +void PFSUBMtoR( x86IntRegType to, uptr from ); +void PFSUBRtoR( x86IntRegType to, x86IntRegType from ); +void PFMULMtoR( x86IntRegType to, uptr from ); +void PFMULRtoR( x86IntRegType to, x86IntRegType from ); +void PFRCPMtoR( x86IntRegType to, uptr from ); +void PFRCPRtoR( x86IntRegType to, x86IntRegType from ); +void PFRCPIT1RtoR( x86IntRegType to, x86IntRegType from ); +void PFRCPIT2RtoR( x86IntRegType to, x86IntRegType from ); +void PFRSQRTRtoR( x86IntRegType to, x86IntRegType from ); +void PFRSQIT1RtoR( x86IntRegType to, x86IntRegType from ); +void PF2IDMtoR( x86IntRegType to, uptr from ); +void PF2IDRtoR( x86IntRegType to, x86IntRegType from ); +void PI2FDMtoR( x86IntRegType to, uptr from ); +void PI2FDRtoR( x86IntRegType to, x86IntRegType from ); +void PFMAXMtoR( x86IntRegType to, uptr from ); +void PFMAXRtoR( x86IntRegType to, x86IntRegType from ); +void PFMINMtoR( x86IntRegType to, uptr from ); +void PFMINRtoR( x86IntRegType to, x86IntRegType from ); + +void SSE2EMU_MOVSD_XMM_to_XMM( x86SSERegType to, x86SSERegType from); +void SSE2EMU_MOVQ_M64_to_XMM( x86SSERegType to, uptr from); +void SSE2EMU_MOVQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from); +void SSE2EMU_MOVD_RmOffset_to_XMM( x86SSERegType to, x86IntRegType from, int offset ); +void SSE2EMU_MOVD_XMM_to_RmOffset(x86IntRegType to, x86SSERegType from, int offset ); + +#ifndef __x86_64__ +void SSE2EMU_MOVDQ2Q_XMM_to_MM( x86MMXRegType to, x86SSERegType from); +void SSE2EMU_MOVQ2DQ_MM_to_XMM( x86SSERegType to, x86MMXRegType from); +#endif + +/* SSE2 emulated functions for SSE CPU's by kekko*/ + +void SSE2EMU_PSHUFD_XMM_to_XMM( x86SSERegType to, x86SSERegType from, u8 imm8 ); +void SSE2EMU_MOVD_XMM_to_R( x86IntRegType to, x86SSERegType from ); +void SSE2EMU_CVTPS2DQ_XMM_to_XMM( x86SSERegType to, x86SSERegType from ); +void SSE2EMU_CVTDQ2PS_M128_to_XMM( x86SSERegType to, uptr from ); +void SSE2EMU_MOVD_XMM_to_M32( u32 to, x86SSERegType from ); +void SSE2EMU_MOVD_R_to_XMM( x86SSERegType to, x86IntRegType from ); + +//////////////////////////////////////////////////// +#ifdef _DEBUG +#define WRITECHECK() CheckX86Ptr() +#else +#define WRITECHECK() +#endif + +#define write8(val ) { \ + *(u8*)x86Ptr = (u8)val; \ + x86Ptr++; \ +} \ + +#define write16(val ) \ +{ \ + *(u16*)x86Ptr = (u16)val; \ + x86Ptr += 2; \ +} \ + +#define write24(val ) \ +{ \ + *(u8*)x86Ptr = (u8)(val & 0xff); \ + x86Ptr++; \ + *(u8*)x86Ptr = (u8)((val >> 8) & 0xff); \ + x86Ptr++; \ + *(u8*)x86Ptr = (u8)((val >> 16) & 0xff); \ + x86Ptr++; \ +} \ + +#define write32( val ) \ +{ \ + *(u32*)x86Ptr = val; \ + x86Ptr += 4; \ +} \ + +#ifdef __cplusplus +} +#endif + +#endif // __IX86_H__ diff --git a/pcsx2/x86/ix86/ix86_cpudetect.c b/pcsx2/x86/ix86/ix86_cpudetect.c index 49ffe7f..4e0b3c1 100644 --- a/pcsx2/x86/ix86/ix86_cpudetect.c +++ b/pcsx2/x86/ix86/ix86_cpudetect.c @@ -234,6 +234,7 @@ void cpudetectInit( void ) cpuinfo.x86PType = (regs[ 0 ] >> 12) & 0x3; x86_64_8BITBRANDID = regs[1] & 0xff; cpuinfo.x86Flags = regs[ 3 ]; + cpuinfo.x86Flags2 = regs[ 2 ]; } } if ( iCpuId( 0x80000000, regs ) != -1 ) @@ -302,6 +303,7 @@ void cpudetectInit( void ) cpucaps.hasFastStreamingSIMDExtensionsSaveRestore = ( cpuinfo.x86Flags >> 24 ) & 1; cpucaps.hasStreamingSIMDExtensions = ( cpuinfo.x86Flags >> 25 ) & 1; //sse cpucaps.hasStreamingSIMD2Extensions = ( cpuinfo.x86Flags >> 26 ) & 1; //sse2 + cpucaps.hasStreamingSIMD4Extensions = ( cpuinfo.x86Flags2 >> 19 ) & 1; //sse4.1 cpucaps.hasSelfSnoop = ( cpuinfo.x86Flags >> 27 ) & 1; cpucaps.hasHyperThreading = ( cpuinfo.x86Flags >> 28 ) & 1; cpucaps.hasThermalMonitor = ( cpuinfo.x86Flags >> 29 ) & 1; diff --git a/pcsx2/x86/ix86/ix86_sse.c b/pcsx2/x86/ix86/ix86_sse.c index f3dff33..99876f0 100644 --- a/pcsx2/x86/ix86/ix86_sse.c +++ b/pcsx2/x86/ix86/ix86_sse.c @@ -437,6 +437,7 @@ void SSE_ORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x5 //**********************************************************************************/ //XORPS : Bitwise Logical XOR of Single-Precision FP Values * //********************************************************************************** + void SSE_XORPS_M128_to_XMM( x86SSERegType to, uptr from ) { SSEMtoR( 0x570f, 0 ); } void SSE_XORPS_XMM_to_XMM( x86SSERegType to, x86SSERegType from ) { SSERtoR( 0x570f ); } @@ -1149,6 +1150,45 @@ void SSE3_MOVSLDUP_M128_to_XMM(x86SSERegType to, uptr from) { write8(0xf3); SSEM void SSE3_MOVSHDUP_XMM_to_XMM(x86SSERegType to, x86SSERegType from) { write8(0xf3); SSERtoR(0x160f); } void SSE3_MOVSHDUP_M128_to_XMM(x86SSERegType to, uptr from) { write8(0xf3); SSEMtoR(0x160f, 0); } +// SSE4.1 + +void SSE4_DPPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8) +{ + write8(0x66); + write24(0x403A0F); + ModRM(3, to, from); + write8(imm8); +} + +void SSE4_DPPS_M128_to_XMM(x86SSERegType to, uptr from, u8 imm8) +{ + const int overb = 0; // TODO: x64? + + write8(0x66); + write24(0x403A0F); + ModRM(0, to, DISP32); + write32(MEMADDR(from, 4 + overb)); + write8(imm8); +} + +void SSE4_INSERTPS_XMM_to_XMM(x86SSERegType to, x86SSERegType from, u8 imm8) +{ + write8(0x66); + RexRB(0, to, from); + write24(0x213A0F); + ModRM(3, to, from); + write8(imm8); +} + +void SSE4_EXTRACTPS_XMM_to_R32(x86IntRegType to, x86SSERegType from, u8 imm8) +{ + write8(0x66); + RexRB(0, to, from); + write24(0x173A0F); + ModRM(3, to, from); + write8(imm8); +} + // SSE-X void SSEX_MOVDQA_M128_to_XMM( x86SSERegType to, uptr from ) {