mirror of
https://github.com/libretro/pcsx2.git
synced 2025-01-27 12:22:19 +00:00
Merged drk||Raziel's "BTS Manual Protection" enhancement for the vtlb into /trunk, and combined it with Pseudonim's "Manual Block Clear" enhancement for an ideal two-phase protection system.
Most things should be a bit faster with this new system. The system is more balanced than the previous one, in that it provides a better overall performance across most games, but some specific FMVs (like Disgaea 2's) will be a bit slower. On the other hand, others like DQ8 and Kingdom Hearts 2 FMVs get a big speedup. Almost all in-game stuff should be either the same or faster now. Set a bunch of ignores for TortoiseSVN users, as suggested in Issue 166. git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1083 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
commit
31f0be6eb8
@ -37,7 +37,7 @@ static const uint m_psxMemSize =
|
|||||||
void psxMemAlloc()
|
void psxMemAlloc()
|
||||||
{
|
{
|
||||||
if( m_psxAllMem == NULL )
|
if( m_psxAllMem == NULL )
|
||||||
m_psxAllMem = vtlb_malloc( m_psxMemSize, 4096, 0x21000000 );
|
m_psxAllMem = vtlb_malloc( m_psxMemSize, 4096 );
|
||||||
|
|
||||||
if( m_psxAllMem == NULL)
|
if( m_psxAllMem == NULL)
|
||||||
throw Exception::OutOfMemory( "psxMemAlloc > failed allocating memory for the IOP processor." );
|
throw Exception::OutOfMemory( "psxMemAlloc > failed allocating memory for the IOP processor." );
|
||||||
|
@ -618,7 +618,7 @@ static u8* m_psAllMem = NULL;
|
|||||||
void memAlloc()
|
void memAlloc()
|
||||||
{
|
{
|
||||||
if( m_psAllMem == NULL )
|
if( m_psAllMem == NULL )
|
||||||
m_psAllMem = vtlb_malloc( m_allMemSize, 4096, 0x2400000 );
|
m_psAllMem = vtlb_malloc( m_allMemSize, 4096 );
|
||||||
|
|
||||||
if( m_psAllMem == NULL)
|
if( m_psAllMem == NULL)
|
||||||
throw Exception::OutOfMemory( "memAlloc > failed to allocate PS2's base ram/rom/scratchpad." );
|
throw Exception::OutOfMemory( "memAlloc > failed to allocate PS2's base ram/rom/scratchpad." );
|
||||||
|
@ -76,6 +76,9 @@ int _SPR0chain()
|
|||||||
{
|
{
|
||||||
memcpy_fast((u8*)pMem, &PS2MEM_SCRATCH[spr0->sadr & 0x3fff], spr0->qwc << 4);
|
memcpy_fast((u8*)pMem, &PS2MEM_SCRATCH[spr0->sadr & 0x3fff], spr0->qwc << 4);
|
||||||
|
|
||||||
|
// Clear dependent EE recompiler blocks, if necessary [needed for BTS protection system]
|
||||||
|
Cpu->Clear( spr0->madr, spr0->qwc << 2 );
|
||||||
|
|
||||||
// clear VU mem also!
|
// clear VU mem also!
|
||||||
TestClearVUs(spr0->madr, spr0->qwc << 2); // Wtf is going on here? AFAIK, only VIF should affect VU micromem (cottonvibes)
|
TestClearVUs(spr0->madr, spr0->qwc << 2); // Wtf is going on here? AFAIK, only VIF should affect VU micromem (cottonvibes)
|
||||||
|
|
||||||
@ -121,6 +124,7 @@ void _SPR0interleave()
|
|||||||
{
|
{
|
||||||
// clear VU mem also!
|
// clear VU mem also!
|
||||||
TestClearVUs(spr0->madr, spr0->qwc << 2);
|
TestClearVUs(spr0->madr, spr0->qwc << 2);
|
||||||
|
Cpu->Clear( spr0->madr, spr0->qwc << 2 );
|
||||||
memcpy_fast((u8*)pMem, &PS2MEM_SCRATCH[spr0->sadr & 0x3fff], spr0->qwc << 4);
|
memcpy_fast((u8*)pMem, &PS2MEM_SCRATCH[spr0->sadr & 0x3fff], spr0->qwc << 4);
|
||||||
}
|
}
|
||||||
spr0->sadr += spr0->qwc * 16;
|
spr0->sadr += spr0->qwc * 16;
|
||||||
|
@ -168,6 +168,7 @@ bool SysAllocateMem()
|
|||||||
|
|
||||||
try
|
try
|
||||||
{
|
{
|
||||||
|
vtlb_Core_Alloc();
|
||||||
memAlloc();
|
memAlloc();
|
||||||
psxMemAlloc();
|
psxMemAlloc();
|
||||||
vuMicroMemAlloc();
|
vuMicroMemAlloc();
|
||||||
@ -271,6 +272,7 @@ void SysShutdownMem()
|
|||||||
vuMicroMemShutdown();
|
vuMicroMemShutdown();
|
||||||
psxMemShutdown();
|
psxMemShutdown();
|
||||||
memShutdown();
|
memShutdown();
|
||||||
|
vtlb_Core_Shutdown();
|
||||||
}
|
}
|
||||||
|
|
||||||
//////////////////////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
@ -83,7 +83,7 @@ static const uint m_vuMemSize =
|
|||||||
void vuMicroMemAlloc()
|
void vuMicroMemAlloc()
|
||||||
{
|
{
|
||||||
if( m_vuAllMem == NULL )
|
if( m_vuAllMem == NULL )
|
||||||
m_vuAllMem = vtlb_malloc( m_vuMemSize, 16, 0x28000000 );
|
m_vuAllMem = vtlb_malloc( m_vuMemSize, 16 );
|
||||||
|
|
||||||
if( m_vuAllMem == NULL )
|
if( m_vuAllMem == NULL )
|
||||||
throw Exception::OutOfMemory( "vuMicroMemInit > Failed to allocate VUmicro memory." );
|
throw Exception::OutOfMemory( "vuMicroMemInit > Failed to allocate VUmicro memory." );
|
||||||
|
@ -61,7 +61,6 @@ vtlbHandler UnmappedVirtHandler1;
|
|||||||
vtlbHandler UnmappedPhyHandler0;
|
vtlbHandler UnmappedPhyHandler0;
|
||||||
vtlbHandler UnmappedPhyHandler1;
|
vtlbHandler UnmappedPhyHandler1;
|
||||||
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
__asm
|
__asm
|
||||||
{
|
{
|
||||||
@ -87,10 +86,22 @@ callfunction:
|
|||||||
jmp [readfunctions8-0x800000+eax];
|
jmp [readfunctions8-0x800000+eax];
|
||||||
}*/
|
}*/
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Interpreter Implementations of VTLB Memory Operations.
|
// Interpreter Implementations of VTLB Memory Operations.
|
||||||
// See recVTLB.cpp for the dynarec versions.
|
// See recVTLB.cpp for the dynarec versions.
|
||||||
|
|
||||||
|
// ------------------------------------------------------------------------
|
||||||
|
// Helper for the BTS manual protection system. Sets a bit based on the given address,
|
||||||
|
// marking that piece of PS2 memory as 'dirty.'
|
||||||
|
//
|
||||||
|
static void memwritebits(u8* ptr)
|
||||||
|
{
|
||||||
|
u32 offs=ptr-vtlbdata.alloc_base;
|
||||||
|
offs/=16;
|
||||||
|
vtlbdata.alloc_bits[offs/8] |= 1 << (offs%8);
|
||||||
|
}
|
||||||
|
|
||||||
|
// ------------------------------------------------------------------------
|
||||||
// Interpreted VTLB lookup for 8, 16, and 32 bit accesses
|
// Interpreted VTLB lookup for 8, 16, and 32 bit accesses
|
||||||
template<int DataSize,typename DataType>
|
template<int DataSize,typename DataType>
|
||||||
__forceinline DataType __fastcall MemOp_r0(u32 addr)
|
__forceinline DataType __fastcall MemOp_r0(u32 addr)
|
||||||
@ -117,6 +128,7 @@ __forceinline DataType __fastcall MemOp_r0(u32 addr)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ------------------------------------------------------------------------
|
||||||
// Interpreterd VTLB lookup for 64 and 128 bit accesses.
|
// Interpreterd VTLB lookup for 64 and 128 bit accesses.
|
||||||
template<int DataSize,typename DataType>
|
template<int DataSize,typename DataType>
|
||||||
__forceinline void __fastcall MemOp_r1(u32 addr, DataType* data)
|
__forceinline void __fastcall MemOp_r1(u32 addr, DataType* data)
|
||||||
@ -148,6 +160,7 @@ __forceinline void __fastcall MemOp_r1(u32 addr, DataType* data)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ------------------------------------------------------------------------
|
||||||
template<int DataSize,typename DataType>
|
template<int DataSize,typename DataType>
|
||||||
__forceinline void __fastcall MemOp_w0(u32 addr, DataType data)
|
__forceinline void __fastcall MemOp_w0(u32 addr, DataType data)
|
||||||
{
|
{
|
||||||
@ -155,6 +168,7 @@ __forceinline void __fastcall MemOp_w0(u32 addr, DataType data)
|
|||||||
s32 ppf=addr+vmv;
|
s32 ppf=addr+vmv;
|
||||||
if (!(ppf<0))
|
if (!(ppf<0))
|
||||||
{
|
{
|
||||||
|
memwritebits((u8*)ppf);
|
||||||
*reinterpret_cast<DataType*>(ppf)=data;
|
*reinterpret_cast<DataType*>(ppf)=data;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -174,6 +188,8 @@ __forceinline void __fastcall MemOp_w0(u32 addr, DataType data)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ------------------------------------------------------------------------
|
||||||
template<int DataSize,typename DataType>
|
template<int DataSize,typename DataType>
|
||||||
__forceinline void __fastcall MemOp_w1(u32 addr,const DataType* data)
|
__forceinline void __fastcall MemOp_w1(u32 addr,const DataType* data)
|
||||||
{
|
{
|
||||||
@ -182,6 +198,7 @@ __forceinline void __fastcall MemOp_w1(u32 addr,const DataType* data)
|
|||||||
s32 ppf=addr+vmv;
|
s32 ppf=addr+vmv;
|
||||||
if (!(ppf<0))
|
if (!(ppf<0))
|
||||||
{
|
{
|
||||||
|
memwritebits((u8*)ppf);
|
||||||
*reinterpret_cast<DataType*>(ppf)=*data;
|
*reinterpret_cast<DataType*>(ppf)=*data;
|
||||||
if (DataSize==128)
|
if (DataSize==128)
|
||||||
*reinterpret_cast<DataType*>(ppf+8)=data[1];
|
*reinterpret_cast<DataType*>(ppf+8)=data[1];
|
||||||
@ -202,7 +219,6 @@ __forceinline void __fastcall MemOp_w1(u32 addr,const DataType* data)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
mem8_t __fastcall vtlb_memRead8(u32 mem)
|
mem8_t __fastcall vtlb_memRead8(u32 mem)
|
||||||
{
|
{
|
||||||
return MemOp_r0<8,mem8_t>(mem);
|
return MemOp_r0<8,mem8_t>(mem);
|
||||||
@ -328,7 +344,7 @@ void __fastcall vtlbDefaultPhyWrite64(u32 addr,const mem64_t* data) { Console::E
|
|||||||
void __fastcall vtlbDefaultPhyWrite128(u32 addr,const mem128_t* data) { Console::Error("vtlbDefaultPhyWrite128: 0x%X",params addr); verify(false); }
|
void __fastcall vtlbDefaultPhyWrite128(u32 addr,const mem128_t* data) { Console::Error("vtlbDefaultPhyWrite128: 0x%X",params addr); verify(false); }
|
||||||
|
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////////////////
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// VTLB Public API -- Init/Term/RegisterHandler stuff
|
// VTLB Public API -- Init/Term/RegisterHandler stuff
|
||||||
//
|
//
|
||||||
|
|
||||||
@ -361,6 +377,7 @@ vtlbHandler vtlb_RegisterHandler( vtlbMemR8FP* r8,vtlbMemR16FP* r16,vtlbMemR32FP
|
|||||||
return rv;
|
return rv;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Maps the given hander (created with vtlb_RegisterHandler) to the specified memory region.
|
// Maps the given hander (created with vtlb_RegisterHandler) to the specified memory region.
|
||||||
// New mappings always assume priority over previous mappings, so place "generic" mappings for
|
// New mappings always assume priority over previous mappings, so place "generic" mappings for
|
||||||
// large areas of memory first, and then specialize specific small regions of memory afterward.
|
// large areas of memory first, and then specialize specific small regions of memory afterward.
|
||||||
@ -500,7 +517,8 @@ void vtlb_VMapUnmap(u32 vaddr,u32 sz)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Clears vtlb handlers and memory mappings.
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// vtlb_init -- Clears vtlb handlers and memory mappings.
|
||||||
void vtlb_Init()
|
void vtlb_Init()
|
||||||
{
|
{
|
||||||
vtlbHandlerCount=0;
|
vtlbHandlerCount=0;
|
||||||
@ -540,7 +558,8 @@ void vtlb_Init()
|
|||||||
vtlb_VMapUnmap((VTLB_VMAP_ITEMS-1)*VTLB_PAGE_SIZE,VTLB_PAGE_SIZE);
|
vtlb_VMapUnmap((VTLB_VMAP_ITEMS-1)*VTLB_PAGE_SIZE,VTLB_PAGE_SIZE);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Performs a COP0-level reset of the PS2's TLB.
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// vtlb_Reset -- Performs a COP0-level reset of the PS2's TLB.
|
||||||
// This function should probably be part of the COP0 rather than here in VTLB.
|
// This function should probably be part of the COP0 rather than here in VTLB.
|
||||||
void vtlb_Reset()
|
void vtlb_Reset()
|
||||||
{
|
{
|
||||||
@ -552,30 +571,65 @@ void vtlb_Term()
|
|||||||
//nothing to do for now
|
//nothing to do for now
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
// Reserves the vtlb core allocation used by various emulation components!
|
||||||
|
//
|
||||||
|
void vtlb_Core_Alloc()
|
||||||
|
{
|
||||||
|
if( vtlbdata.alloc_base != NULL ) return;
|
||||||
|
|
||||||
|
vtlbdata.alloc_current = 0;
|
||||||
|
|
||||||
|
#ifdef __LINUX__
|
||||||
|
vtlbdata.alloc_base = SysMmapEx( 0x16000000, VTLB_ALLOC_SIZE, 0x80000000, "Vtlb" );
|
||||||
|
#else
|
||||||
|
// Win32 just needs this, since malloc always maps below 2GB.
|
||||||
|
vtlbdata.alloc_base = (u8*)_aligned_malloc( VTLB_ALLOC_SIZE, 4096 );
|
||||||
|
if( vtlbdata.alloc_base == NULL )
|
||||||
|
throw Exception::OutOfMemory( "Fatal Error: could not allocate 42Meg buffer for PS2's mappable system ram." );
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
|
void vtlb_Core_Shutdown()
|
||||||
|
{
|
||||||
|
if( vtlbdata.alloc_base == NULL ) return;
|
||||||
|
|
||||||
|
#ifdef __LINUX__
|
||||||
|
SafeSysMunmap( vtlbdata.alloc_base, VTLB_ALLOC_SIZE );
|
||||||
|
#else
|
||||||
|
// Make sure and unprotect memory first, since CrtDebug will try to write to it.
|
||||||
|
HostSys::MemProtect( vtlbdata.alloc_base, VTLB_ALLOC_SIZE, Protect_ReadWrite );
|
||||||
|
safe_aligned_free( vtlbdata.alloc_base );
|
||||||
|
#endif
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// This function allocates memory block with are compatible with the Vtlb's requirements
|
// This function allocates memory block with are compatible with the Vtlb's requirements
|
||||||
// for memory locations. The Vtlb requires the topmost bit (Sign bit) of the memory
|
// for memory locations. The Vtlb requires the topmost bit (Sign bit) of the memory
|
||||||
// pointer to be cleared. Some operating systems and/or implementations of malloc do that,
|
// pointer to be cleared. Some operating systems and/or implementations of malloc do that,
|
||||||
// but others do not. So use this instead to allocate the memory correctly for your
|
// but others do not. So use this instead to allocate the memory correctly for your
|
||||||
// platform.
|
// platform.
|
||||||
u8* vtlb_malloc( uint size, uint align, uptr tryBaseAddress )
|
//
|
||||||
|
u8* vtlb_malloc( uint size, uint align )
|
||||||
{
|
{
|
||||||
#ifdef __LINUX__
|
vtlbdata.alloc_current += align-1;
|
||||||
return SysMmapEx( tryBaseAddress, size, 0x80000000, "Vtlb" );
|
vtlbdata.alloc_current &= ~(align-1);
|
||||||
#else
|
|
||||||
// Win32 just needs this, since malloc always maps below 2GB.
|
int rv = vtlbdata.alloc_current;
|
||||||
return (u8*)_aligned_malloc(size, align);
|
vtlbdata.alloc_current += size;
|
||||||
#endif
|
return &vtlbdata.alloc_base[rv];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
|
//
|
||||||
void vtlb_free( void* pmem, uint size )
|
void vtlb_free( void* pmem, uint size )
|
||||||
{
|
{
|
||||||
if( pmem == NULL ) return;
|
// Does nothing anymore! Alloc/dealloc is now handled by vtlb_Core_Alloc /
|
||||||
|
// vtlb_Core_Shutdown. Placebo is left in place in case it becomes useful again
|
||||||
#ifdef __LINUX__
|
// at a later date.
|
||||||
SafeSysMunmap( pmem, size );
|
|
||||||
#else
|
return;
|
||||||
// Make sure and unprotect memory first, since CrtDebug will try to write to it.
|
|
||||||
HostSys::MemProtect( pmem, size, Protect_ReadWrite );
|
|
||||||
safe_aligned_free( pmem );
|
|
||||||
#endif
|
|
||||||
}
|
}
|
||||||
|
11
pcsx2/vtlb.h
11
pcsx2/vtlb.h
@ -23,10 +23,12 @@ typedef void __fastcall vtlbMemW128FP(u32 addr,const mem128_t* data);
|
|||||||
|
|
||||||
typedef u32 vtlbHandler;
|
typedef u32 vtlbHandler;
|
||||||
|
|
||||||
|
extern void vtlb_Core_Alloc();
|
||||||
|
extern void vtlb_Core_Shutdown();
|
||||||
extern void vtlb_Init();
|
extern void vtlb_Init();
|
||||||
extern void vtlb_Reset();
|
extern void vtlb_Reset();
|
||||||
extern void vtlb_Term();
|
extern void vtlb_Term();
|
||||||
extern u8* vtlb_malloc( uint size, uint align, uptr tryBaseAddress );
|
extern u8* vtlb_malloc( uint size, uint align );
|
||||||
extern void vtlb_free( void* pmem, uint size );
|
extern void vtlb_free( void* pmem, uint size );
|
||||||
|
|
||||||
|
|
||||||
@ -67,6 +69,8 @@ extern void vtlb_DynGenRead32_Const( u32 bits, bool sign, u32 addr_const );
|
|||||||
|
|
||||||
namespace vtlb_private
|
namespace vtlb_private
|
||||||
{
|
{
|
||||||
|
static const uint VTLB_ALLOC_SIZE = 0x2900000; //this is a bit more than required
|
||||||
|
|
||||||
static const uint VTLB_PAGE_BITS = 12;
|
static const uint VTLB_PAGE_BITS = 12;
|
||||||
static const uint VTLB_PAGE_MASK = 4095;
|
static const uint VTLB_PAGE_MASK = 4095;
|
||||||
static const uint VTLB_PAGE_SIZE = 4096;
|
static const uint VTLB_PAGE_SIZE = 4096;
|
||||||
@ -77,6 +81,11 @@ namespace vtlb_private
|
|||||||
|
|
||||||
struct MapData
|
struct MapData
|
||||||
{
|
{
|
||||||
|
u8 alloc_bits[VTLB_ALLOC_SIZE/16/8];
|
||||||
|
|
||||||
|
u8* alloc_base; //base of the memory array
|
||||||
|
int alloc_current; //current base
|
||||||
|
|
||||||
s32 pmap[VTLB_PMAP_ITEMS]; //512KB
|
s32 pmap[VTLB_PMAP_ITEMS]; //512KB
|
||||||
s32 vmap[VTLB_VMAP_ITEMS]; //4MB
|
s32 vmap[VTLB_VMAP_ITEMS]; //4MB
|
||||||
|
|
||||||
|
@ -2883,7 +2883,7 @@
|
|||||||
</Filter>
|
</Filter>
|
||||||
</Filter>
|
</Filter>
|
||||||
<Filter
|
<Filter
|
||||||
Name="Dynarec Emitter"
|
Name="x86Emitter"
|
||||||
>
|
>
|
||||||
<File
|
<File
|
||||||
RelativePath="..\..\x86\ix86\ix86.cpp"
|
RelativePath="..\..\x86\ix86\ix86.cpp"
|
||||||
|
@ -202,10 +202,8 @@ void WinRun()
|
|||||||
_doPluginOverride( "DEV9", g_Startup.dev9dll, Config.DEV9 );
|
_doPluginOverride( "DEV9", g_Startup.dev9dll, Config.DEV9 );
|
||||||
|
|
||||||
|
|
||||||
#ifndef _DEBUG
|
|
||||||
if( Config.Profiler )
|
if( Config.Profiler )
|
||||||
ProfilerInit();
|
ProfilerInit();
|
||||||
#endif
|
|
||||||
|
|
||||||
InitCPUTicks();
|
InitCPUTicks();
|
||||||
|
|
||||||
@ -800,7 +798,6 @@ LRESULT WINAPI MainWndProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam)
|
|||||||
SaveConfig();
|
SaveConfig();
|
||||||
break;
|
break;
|
||||||
|
|
||||||
#ifndef _DEBUG
|
|
||||||
case ID_PROFILER:
|
case ID_PROFILER:
|
||||||
Config.Profiler = !Config.Profiler;
|
Config.Profiler = !Config.Profiler;
|
||||||
if( Config.Profiler )
|
if( Config.Profiler )
|
||||||
@ -815,7 +812,6 @@ LRESULT WINAPI MainWndProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam)
|
|||||||
}
|
}
|
||||||
SaveConfig();
|
SaveConfig();
|
||||||
break;
|
break;
|
||||||
#endif
|
|
||||||
|
|
||||||
default:
|
default:
|
||||||
if (LOWORD(wParam) >= ID_LANGS && LOWORD(wParam) <= (ID_LANGS + langsMax))
|
if (LOWORD(wParam) >= ID_LANGS && LOWORD(wParam) <= (ID_LANGS + langsMax))
|
||||||
@ -989,9 +985,7 @@ void CreateMainMenu() {
|
|||||||
ADDMENUITEM(0,_("Print cdvd &Info"), ID_CDVDPRINT);
|
ADDMENUITEM(0,_("Print cdvd &Info"), ID_CDVDPRINT);
|
||||||
ADDMENUITEM(0,_("Close GS Window on Esc"), ID_CLOSEGS);
|
ADDMENUITEM(0,_("Close GS Window on Esc"), ID_CLOSEGS);
|
||||||
ADDSEPARATOR(0);
|
ADDSEPARATOR(0);
|
||||||
#ifndef _DEBUG
|
|
||||||
ADDMENUITEM(0,_("Enable &Profiler"), ID_PROFILER);
|
ADDMENUITEM(0,_("Enable &Profiler"), ID_PROFILER);
|
||||||
#endif
|
|
||||||
ADDMENUITEM(0,_("Enable &Patches"), ID_PATCHES);
|
ADDMENUITEM(0,_("Enable &Patches"), ID_PATCHES);
|
||||||
ADDMENUITEM(0,_("Enable &Console"), ID_CONSOLE);
|
ADDMENUITEM(0,_("Enable &Console"), ID_CONSOLE);
|
||||||
ADDSEPARATOR(0);
|
ADDSEPARATOR(0);
|
||||||
|
@ -7,7 +7,8 @@
|
|||||||
//
|
//
|
||||||
// Generated from the TEXTINCLUDE 2 resource.
|
// Generated from the TEXTINCLUDE 2 resource.
|
||||||
//
|
//
|
||||||
#include "afxresmw.h"
|
#include "afxresmw.h"
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////////
|
||||||
#undef APSTUDIO_READONLY_SYMBOLS
|
#undef APSTUDIO_READONLY_SYMBOLS
|
||||||
|
|
||||||
@ -899,7 +900,8 @@ END
|
|||||||
//
|
//
|
||||||
// Generated from the TEXTINCLUDE 3 resource.
|
// Generated from the TEXTINCLUDE 3 resource.
|
||||||
//
|
//
|
||||||
|
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////////////
|
||||||
#endif // not APSTUDIO_INVOKED
|
#endif // not APSTUDIO_INVOKED
|
||||||
|
|
||||||
|
@ -418,6 +418,9 @@ static void recAlloc()
|
|||||||
x86FpuState = FPU_STATE;
|
x86FpuState = FPU_STATE;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
PCSX2_ALIGNED16( static u16 manual_page[Ps2MemSize::Base >> 12] );
|
||||||
|
PCSX2_ALIGNED16( static u8 manual_counter[Ps2MemSize::Base >> 12] );
|
||||||
|
|
||||||
////////////////////////////////////////////////////
|
////////////////////////////////////////////////////
|
||||||
void recResetEE( void )
|
void recResetEE( void )
|
||||||
{
|
{
|
||||||
@ -427,6 +430,8 @@ void recResetEE( void )
|
|||||||
|
|
||||||
memset_8<0xcc, REC_CACHEMEM>(recMem); // 0xcc is INT3
|
memset_8<0xcc, REC_CACHEMEM>(recMem); // 0xcc is INT3
|
||||||
memzero_ptr<m_recBlockAllocSize>( m_recBlockAlloc );
|
memzero_ptr<m_recBlockAllocSize>( m_recBlockAlloc );
|
||||||
|
memzero_obj( manual_page );
|
||||||
|
memzero_obj( manual_counter );
|
||||||
ClearRecLUT((BASEBLOCK*)m_recBlockAlloc,
|
ClearRecLUT((BASEBLOCK*)m_recBlockAlloc,
|
||||||
(((Ps2MemSize::Base + Ps2MemSize::Rom + Ps2MemSize::Rom1) / 4)));
|
(((Ps2MemSize::Base + Ps2MemSize::Rom + Ps2MemSize::Rom1) / 4)));
|
||||||
|
|
||||||
@ -720,7 +725,6 @@ static void ClearRecLUT(BASEBLOCK* base, int count)
|
|||||||
base[i].SetFnptr((uptr)JITCompile);
|
base[i].SetFnptr((uptr)JITCompile);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns the offset to the next instruction after any cleared memory
|
|
||||||
void recClear(u32 addr, u32 size)
|
void recClear(u32 addr, u32 size)
|
||||||
{
|
{
|
||||||
BASEBLOCKEX* pexblock;
|
BASEBLOCKEX* pexblock;
|
||||||
@ -1256,14 +1260,16 @@ void badespfn() {
|
|||||||
|
|
||||||
void __fastcall dyna_block_discard(u32 start,u32 sz)
|
void __fastcall dyna_block_discard(u32 start,u32 sz)
|
||||||
{
|
{
|
||||||
DevCon::WriteLn("dyna_block_discard %08X , count %d", params start,sz);
|
DevCon::WriteLn("dyna_block_discard .. start: %08X count=%d", params start,sz);
|
||||||
Cpu->Clear(start,sz);
|
Cpu->Clear(start, sz);
|
||||||
}
|
}
|
||||||
|
|
||||||
void __fastcall dyna_block_reset(u32 start,u32 sz)
|
|
||||||
|
void __fastcall dyna_page_reset(u32 start,u32 sz)
|
||||||
{
|
{
|
||||||
DevCon::WriteLn("dyna_block_reset %08X , count %d", params start,sz);
|
DevCon::WriteLn("dyna_page_reset .. start=%08X count=%d", params start,sz);
|
||||||
Cpu->Clear(start & ~0xfffUL, 0x400);
|
Cpu->Clear(start & ~0xfffUL, 0x400);
|
||||||
|
manual_counter[start >> 10]++;
|
||||||
mmap_MarkCountedRamPage(PSM(start), start & ~0xfffUL);
|
mmap_MarkCountedRamPage(PSM(start), start & ~0xfffUL);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1490,98 +1496,6 @@ StartRecomp:
|
|||||||
// instruction being analyzed.
|
// instruction being analyzed.
|
||||||
if( usecop2 ) vucycle++;
|
if( usecop2 ) vucycle++;
|
||||||
|
|
||||||
// peephole optimizations //
|
|
||||||
#ifdef PCSX2_VM_COISSUE
|
|
||||||
if( i < s_nEndBlock-4 && recompileCodeSafe(i) ) {
|
|
||||||
u32 curcode = cpuRegs.code;
|
|
||||||
u32 nextcode = *(u32*)PSM(i+4);
|
|
||||||
if( _eeIsLoadStoreCoIssue(curcode, nextcode) && recBSC_co[curcode>>26] != NULL ) {
|
|
||||||
|
|
||||||
// rs has to be the same, and cannot be just written
|
|
||||||
if( ((curcode >> 21) & 0x1F) == ((nextcode >> 21) & 0x1F) && !_eeLoadWritesRs(curcode) ) {
|
|
||||||
|
|
||||||
if( _eeIsLoadStoreCoX(curcode) && ((nextcode>>16)&0x1f) != ((curcode>>21)&0x1f) ) {
|
|
||||||
// see how many stores there are
|
|
||||||
u32 j;
|
|
||||||
// use xmmregs since only supporting lwc1,lq,swc1,sq
|
|
||||||
for(j = i+8; j < s_nEndBlock && j < i+4*iREGCNT_XMM; j += 4 ) {
|
|
||||||
u32 nncode = *(u32*)PSM(j);
|
|
||||||
if( (nncode>>26) != (curcode>>26) || ((curcode>>21)&0x1f) != ((nncode>>21)&0x1f) ||
|
|
||||||
_eeLoadWritesRs(nncode))
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if( j > i+8 ) {
|
|
||||||
u32 num = (j-i)>>2; // number of stores that can coissue
|
|
||||||
assert( num <= iREGCNT_XMM );
|
|
||||||
|
|
||||||
g_pCurInstInfo[0].numpeeps = num-1;
|
|
||||||
g_pCurInstInfo[0].info |= EEINSTINFO_COREC;
|
|
||||||
|
|
||||||
while(i < j-4) {
|
|
||||||
g_pCurInstInfo++;
|
|
||||||
g_pCurInstInfo[0].info |= EEINSTINFO_NOREC;
|
|
||||||
i += 4;
|
|
||||||
}
|
|
||||||
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// fall through
|
|
||||||
}
|
|
||||||
|
|
||||||
// unaligned loadstores
|
|
||||||
|
|
||||||
// if LWL, check if LWR and that offsets are +3 away
|
|
||||||
switch(curcode >> 26) {
|
|
||||||
case 0x22: // LWL
|
|
||||||
if( (nextcode>>26) != 0x26 || ((s16)nextcode)+3 != (s16)curcode )
|
|
||||||
continue;
|
|
||||||
break;
|
|
||||||
case 0x26: // LWR
|
|
||||||
if( (nextcode>>26) != 0x22 || ((s16)nextcode) != (s16)curcode+3 )
|
|
||||||
continue;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 0x2a: // SWL
|
|
||||||
if( (nextcode>>26) != 0x2e || ((s16)nextcode)+3 != (s16)curcode )
|
|
||||||
continue;
|
|
||||||
break;
|
|
||||||
case 0x2e: // SWR
|
|
||||||
if( (nextcode>>26) != 0x2a || ((s16)nextcode) != (s16)curcode+3 )
|
|
||||||
continue;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 0x1a: // LDL
|
|
||||||
if( (nextcode>>26) != 0x1b || ((s16)nextcode)+7 != (s16)curcode )
|
|
||||||
continue;
|
|
||||||
break;
|
|
||||||
case 0x1b: // LWR
|
|
||||||
if( (nextcode>>26) != 0x1aa || ((s16)nextcode) != (s16)curcode+7 )
|
|
||||||
continue;
|
|
||||||
break;
|
|
||||||
|
|
||||||
case 0x2c: // SWL
|
|
||||||
if( (nextcode>>26) != 0x2d || ((s16)nextcode)+7 != (s16)curcode )
|
|
||||||
continue;
|
|
||||||
break;
|
|
||||||
case 0x2d: // SWR
|
|
||||||
if( (nextcode>>26) != 0x2c || ((s16)nextcode) != (s16)curcode+7 )
|
|
||||||
continue;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
// good enough
|
|
||||||
g_pCurInstInfo[0].info |= EEINSTINFO_COREC;
|
|
||||||
g_pCurInstInfo[0].numpeeps = 1;
|
|
||||||
g_pCurInstInfo[1].info |= EEINSTINFO_NOREC;
|
|
||||||
g_pCurInstInfo++;
|
|
||||||
i += 4;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#endif // end peephole
|
|
||||||
}
|
}
|
||||||
// This *is* important because g_pCurInstInfo is checked a bit later on and
|
// This *is* important because g_pCurInstInfo is checked a bit later on and
|
||||||
// if it's not equal to s_pInstCache it handles recompilation differently.
|
// if it's not equal to s_pInstCache it handles recompilation differently.
|
||||||
@ -1611,7 +1525,6 @@ StartRecomp:
|
|||||||
iDumpBlock(startpc, recPtr);
|
iDumpBlock(startpc, recPtr);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static u16 manual_page[Ps2MemSize::Base >> 12];
|
|
||||||
u32 sz=(s_nEndBlock-startpc)>>2;
|
u32 sz=(s_nEndBlock-startpc)>>2;
|
||||||
|
|
||||||
u32 inpage_ptr=HWADDR(startpc);
|
u32 inpage_ptr=HWADDR(startpc);
|
||||||
@ -1631,31 +1544,76 @@ StartRecomp:
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
// import the vtlbdata (alloc_bits and alloc_base and stuff):
|
||||||
|
using namespace vtlb_private;
|
||||||
|
|
||||||
MOV32ItoR(ECX, inpage_ptr);
|
MOV32ItoR(ECX, inpage_ptr);
|
||||||
MOV32ItoR(EDX, pgsz);
|
MOV32ItoR(EDX, pgsz);
|
||||||
|
|
||||||
|
u32 mask=0;
|
||||||
|
u32 writen=0;
|
||||||
|
u32 writen_start=0;
|
||||||
|
|
||||||
u32 lpc=inpage_ptr;
|
u32 lpc=inpage_ptr;
|
||||||
u32 stg=pgsz;
|
u32 stg=pgsz;
|
||||||
|
|
||||||
while(stg>0)
|
while(stg>0)
|
||||||
{
|
{
|
||||||
// was dyna_block_discard_recmem. See note in recResetEE for details.
|
u32 bit = (lpc>>4) & 7;
|
||||||
CMP32ItoM((uptr)PSM(lpc),*(u32*)PSM(lpc));
|
if (mask==0)
|
||||||
JNE32(((u32)&dyna_block_discard)- ( (u32)x86Ptr + 6 ));
|
{
|
||||||
|
//writen=bit;
|
||||||
|
writen_start=(((u8*)PSM(lpc)-vtlbdata.alloc_base)>>4)/8;
|
||||||
|
}
|
||||||
|
mask |= 1 << bit;
|
||||||
|
|
||||||
stg-=4;
|
if (bit==31)
|
||||||
lpc+=4;
|
{
|
||||||
|
vtlbdata.alloc_bits[writen_start]&=~mask;
|
||||||
|
xTEST( ptr32[&vtlbdata.alloc_bits[writen_start]], mask ); // auto-optimizes to imm8 when applicable.
|
||||||
|
xJNZ( dyna_block_discard );
|
||||||
|
//SysPrintf("%08X %d %d\n",mask,pgsz,pgsz>>4);
|
||||||
|
mask = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
//writen++;
|
||||||
|
|
||||||
|
if (stg<=16)
|
||||||
|
{
|
||||||
|
lpc += stg;
|
||||||
|
stg = 0;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
lpc += 16;
|
||||||
|
stg -= 16;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (startpc != 0x81fc0) {
|
|
||||||
|
if (mask)
|
||||||
|
{
|
||||||
|
vtlbdata.alloc_bits[writen_start] &= ~mask;
|
||||||
|
xTEST( ptr32[&vtlbdata.alloc_bits[writen_start]], mask ); // auto-optimizes to imm8 when applicable.
|
||||||
|
xJNZ( dyna_block_discard );
|
||||||
|
//SysPrintf("%08X %d %d\n",mask,pgsz,pgsz>>4);
|
||||||
|
mask = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if( startpc != 0x81fc0 && manual_counter[inpage_ptr >> 12] <= 4 )
|
||||||
|
{
|
||||||
|
// Commented out until we replace it with a smarter algo that only
|
||||||
|
// recompiles blocks a limited number of times.
|
||||||
|
|
||||||
xADD(ptr16[&manual_page[inpage_ptr >> 12]], 1);
|
xADD(ptr16[&manual_page[inpage_ptr >> 12]], 1);
|
||||||
xJC( dyna_block_reset );
|
xJC( dyna_page_reset );
|
||||||
}
|
}
|
||||||
|
|
||||||
DbgCon::WriteLn("Manual block @ %08X : %08X %d %d %d %d", params
|
DbgCon::WriteLn("Manual block @ %08X : %08X %d %d %d %d", params
|
||||||
startpc,inpage_ptr,pgsz,0x1000-inpage_offs,inpage_sz,sz*4);
|
startpc,inpage_ptr,pgsz,0x1000-inpage_offs,inpage_sz,sz*4);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
inpage_ptr+=pgsz;
|
inpage_ptr += pgsz;
|
||||||
inpage_sz-=pgsz;
|
inpage_sz -= pgsz;
|
||||||
}
|
}
|
||||||
|
|
||||||
// finally recompile //
|
// finally recompile //
|
||||||
|
@ -23,31 +23,88 @@
|
|||||||
|
|
||||||
#include "iCore.h"
|
#include "iCore.h"
|
||||||
#include "iR5900.h"
|
#include "iR5900.h"
|
||||||
|
#include "ix86\ix86_internal.h"
|
||||||
|
|
||||||
using namespace vtlb_private;
|
using namespace vtlb_private;
|
||||||
using namespace x86Emitter;
|
using namespace x86Emitter;
|
||||||
|
|
||||||
// NOTICE: This function *destroys* EAX!!
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
// Moves 128 bits of memory from the source register ptr to the dest register ptr.
|
// iAllocRegSSE -- allocates an xmm register. If no xmm register is available, xmm0 is
|
||||||
// (used as an equivalent to movaps, when a free XMM register is unavailable for some reason)
|
// saved into g_globalXMMData and returned as a free register.
|
||||||
void MOV128_MtoM( x86IntRegType destRm, x86IntRegType srcRm )
|
//
|
||||||
|
class iAllocRegSSE
|
||||||
{
|
{
|
||||||
// (this is one of my test cases for the new emitter --air)
|
protected:
|
||||||
|
xRegisterSSE m_reg;
|
||||||
|
bool m_free;
|
||||||
|
|
||||||
xAddressReg src( srcRm );
|
public:
|
||||||
xAddressReg dest( destRm );
|
iAllocRegSSE() :
|
||||||
|
m_reg( xmm0 ),
|
||||||
|
m_free( !!_hasFreeXMMreg() )
|
||||||
|
{
|
||||||
|
if( m_free )
|
||||||
|
m_reg = xRegisterSSE( _allocTempXMMreg( XMMT_INT, -1 ) );
|
||||||
|
else
|
||||||
|
xStoreReg( m_reg );
|
||||||
|
}
|
||||||
|
|
||||||
xMOV( eax, ptr[src] );
|
~iAllocRegSSE()
|
||||||
xMOV( ptr[dest], eax );
|
{
|
||||||
|
if( m_free )
|
||||||
|
_freeXMMreg( m_reg.Id );
|
||||||
|
else
|
||||||
|
xRestoreReg( m_reg );
|
||||||
|
}
|
||||||
|
|
||||||
|
operator xRegisterSSE() const { return m_reg; }
|
||||||
|
};
|
||||||
|
|
||||||
xMOV( eax, ptr[src+4] );
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
xMOV( ptr[dest+4], eax );
|
// Moves 128 bits from point B to point A, using SSE's MOVAPS (or MOVDQA).
|
||||||
|
// This instruction always uses an SSE register, even if all registers are allocated! It
|
||||||
|
// saves an SSE register to memory first, performs the copy, and restores the register.
|
||||||
|
//
|
||||||
|
void iMOV128_SSE( const ModSibBase& destRm, const ModSibBase& srcRm )
|
||||||
|
{
|
||||||
|
iAllocRegSSE reg;
|
||||||
|
xMOVDQA( reg, srcRm );
|
||||||
|
xMOVDQA( destRm, reg );
|
||||||
|
}
|
||||||
|
|
||||||
xMOV( eax, ptr[src+8] );
|
//////////////////////////////////////////////////////////////////////////////////////////
|
||||||
xMOV( ptr[dest+8], eax );
|
// Moves 64 bits of data from point B to point A, using either MMX, SSE, or x86 registers
|
||||||
|
// if neither MMX nor SSE is available to the task.
|
||||||
|
//
|
||||||
|
// Optimizations: This method uses MMX is the cpu is in MMX mode, or SSE if it's in FPU
|
||||||
|
// mode (saving on potential EMMS uses).
|
||||||
|
//
|
||||||
|
void iMOV64_Smart( const ModSibBase& destRm, const ModSibBase& srcRm )
|
||||||
|
{
|
||||||
|
if( (x86FpuState == FPU_STATE) && _hasFreeXMMreg() )
|
||||||
|
{
|
||||||
|
// Move things using MOVLPS:
|
||||||
|
xRegisterSSE reg( _allocTempXMMreg( XMMT_INT, -1 ) );
|
||||||
|
xMOVL.PS( reg, srcRm );
|
||||||
|
xMOVL.PS( destRm, reg );
|
||||||
|
_freeXMMreg( reg.Id );
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
xMOV( eax, ptr[src+12] );
|
if( _hasFreeMMXreg() )
|
||||||
xMOV( ptr[dest+12], eax );
|
{
|
||||||
|
xRegisterMMX reg( _allocMMXreg(-1, MMX_TEMP, 0) );
|
||||||
|
xMOVQ( reg, srcRm );
|
||||||
|
xMOVQ( destRm, reg );
|
||||||
|
_freeMMXreg( reg.Id );
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
xMOV( eax, srcRm );
|
||||||
|
xMOV( destRm, eax );
|
||||||
|
xMOV( eax, srcRm+4 );
|
||||||
|
xMOV( destRm+4, eax );
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -127,38 +184,11 @@ static void _vtlb_DynGen_DirectRead( u32 bits, bool sign )
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case 64:
|
case 64:
|
||||||
if( _hasFreeMMXreg() )
|
iMOV64_Smart(ptr[edx],ptr[ecx]);
|
||||||
{
|
|
||||||
const int freereg = _allocMMXreg(-1, MMX_TEMP, 0);
|
|
||||||
MOVQRmtoR(freereg,ECX);
|
|
||||||
MOVQRtoRm(EDX,freereg);
|
|
||||||
_freeMMXreg(freereg);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
MOV32RmtoR(EAX,ECX);
|
|
||||||
MOV32RtoRm(EDX,EAX);
|
|
||||||
|
|
||||||
MOV32RmtoR(EAX,ECX,4);
|
|
||||||
MOV32RtoRm(EDX,EAX,4);
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 128:
|
case 128:
|
||||||
if( _hasFreeXMMreg() )
|
iMOV128_SSE(ptr[edx],ptr[ecx]);
|
||||||
{
|
|
||||||
const int freereg = _allocTempXMMreg( XMMT_INT, -1 );
|
|
||||||
SSE2_MOVDQARmtoR(freereg,ECX);
|
|
||||||
SSE2_MOVDQARtoRm(EDX,freereg);
|
|
||||||
_freeXMMreg(freereg);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// Could put in an MMX optimization here as well, but no point really.
|
|
||||||
// It's almost never used since there's almost always a free XMM reg.
|
|
||||||
|
|
||||||
MOV128_MtoM( EDX, ECX ); // dest <- src!
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
jNO_DEFAULT
|
jNO_DEFAULT
|
||||||
@ -262,39 +292,11 @@ void vtlb_DynGenRead64_Const( u32 bits, u32 addr_const )
|
|||||||
switch( bits )
|
switch( bits )
|
||||||
{
|
{
|
||||||
case 64:
|
case 64:
|
||||||
if( _hasFreeMMXreg() )
|
iMOV64_Smart(ptr[edx],ptr[ppf]);
|
||||||
{
|
|
||||||
const int freereg = _allocMMXreg(-1, MMX_TEMP, 0);
|
|
||||||
MOVQMtoR(freereg,ppf);
|
|
||||||
MOVQRtoRm(EDX,freereg);
|
|
||||||
_freeMMXreg(freereg);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
MOV32MtoR(EAX,ppf);
|
|
||||||
MOV32RtoRm(EDX,EAX);
|
|
||||||
|
|
||||||
MOV32MtoR(EAX,ppf+4);
|
|
||||||
MOV32RtoRm(EDX,EAX,4);
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 128:
|
case 128:
|
||||||
if( _hasFreeXMMreg() )
|
iMOV128_SSE(ptr[edx],ptr[ppf]);
|
||||||
{
|
|
||||||
const int freereg = _allocTempXMMreg( XMMT_INT, -1 );
|
|
||||||
SSE2_MOVDQA_M128_to_XMM( freereg, ppf );
|
|
||||||
SSE2_MOVDQARtoRm(EDX,freereg);
|
|
||||||
_freeXMMreg(freereg);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// Could put in an MMX optimization here as well, but no point really.
|
|
||||||
// It's almost never used since there's almost always a free XMM reg.
|
|
||||||
|
|
||||||
MOV32ItoR( ECX, ppf );
|
|
||||||
MOV128_MtoM( EDX, ECX ); // dest <- src!
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
jNO_DEFAULT
|
jNO_DEFAULT
|
||||||
@ -415,40 +417,21 @@ static void _vtlb_DynGen_DirectWrite( u32 bits )
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case 64:
|
case 64:
|
||||||
if( _hasFreeMMXreg() )
|
iMOV64_Smart(ptr[ecx],ptr[edx]);
|
||||||
{
|
|
||||||
const int freereg = _allocMMXreg(-1, MMX_TEMP, 0);
|
|
||||||
MOVQRmtoR(freereg,EDX);
|
|
||||||
MOVQRtoRm(ECX,freereg);
|
|
||||||
_freeMMXreg( freereg );
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
MOV32RmtoR(EAX,EDX);
|
|
||||||
MOV32RtoRm(ECX,EAX);
|
|
||||||
|
|
||||||
MOV32RmtoR(EAX,EDX,4);
|
|
||||||
MOV32RtoRm(ECX,EAX,4);
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 128:
|
case 128:
|
||||||
if( _hasFreeXMMreg() )
|
iMOV128_SSE(ptr[ecx],ptr[edx]);
|
||||||
{
|
|
||||||
const int freereg = _allocTempXMMreg( XMMT_INT, -1 );
|
|
||||||
SSE2_MOVDQARmtoR(freereg,EDX);
|
|
||||||
SSE2_MOVDQARtoRm(ECX,freereg);
|
|
||||||
_freeXMMreg( freereg );
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// Could put in an MMX optimization here as well, but no point really.
|
|
||||||
// It's almost never used since there's almost always a free XMM reg.
|
|
||||||
|
|
||||||
MOV128_MtoM( ECX, EDX ); // dest <- src!
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
xSHR( ecx, 4 );
|
||||||
|
|
||||||
|
uptr alloc_base = (uptr)vtlbdata.alloc_base;
|
||||||
|
u8* bits_base = vtlbdata.alloc_bits;
|
||||||
|
bits_base -= (alloc_base>>4)/8; //in bytes
|
||||||
|
|
||||||
|
xBTS( ecx, bits_base );
|
||||||
}
|
}
|
||||||
|
|
||||||
// ------------------------------------------------------------------------
|
// ------------------------------------------------------------------------
|
||||||
@ -514,39 +497,11 @@ void vtlb_DynGenWrite_Const( u32 bits, u32 addr_const )
|
|||||||
break;
|
break;
|
||||||
|
|
||||||
case 64:
|
case 64:
|
||||||
if( _hasFreeMMXreg() )
|
iMOV64_Smart( ptr[ppf], ptr[edx] );
|
||||||
{
|
|
||||||
const int freereg = _allocMMXreg(-1, MMX_TEMP, 0);
|
|
||||||
MOVQRmtoR(freereg,EDX);
|
|
||||||
MOVQRtoM(ppf,freereg);
|
|
||||||
_freeMMXreg( freereg );
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
MOV32RmtoR(EAX,EDX);
|
|
||||||
MOV32RtoM(ppf,EAX);
|
|
||||||
|
|
||||||
MOV32RmtoR(EAX,EDX,4);
|
|
||||||
MOV32RtoM(ppf+4,EAX);
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case 128:
|
case 128:
|
||||||
if( _hasFreeXMMreg() )
|
iMOV128_SSE( ptr[ppf], ptr[edx] );
|
||||||
{
|
|
||||||
const int freereg = _allocTempXMMreg( XMMT_INT, -1 );
|
|
||||||
SSE2_MOVDQARmtoR(freereg,EDX);
|
|
||||||
SSE2_MOVDQA_XMM_to_M128(ppf,freereg);
|
|
||||||
_freeXMMreg( freereg );
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
// Could put in an MMX optimization here as well, but no point really.
|
|
||||||
// It's almost never used since there's almost always a free XMM reg.
|
|
||||||
|
|
||||||
MOV32ItoR( ECX, ppf );
|
|
||||||
MOV128_MtoM( ECX, EDX ); // dest <- src!
|
|
||||||
}
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -571,3 +526,4 @@ void vtlb_DynGenWrite_Const( u32 bits, u32 addr_const )
|
|||||||
CALLFunc( (int)vtlbdata.RWFT[szidx][1][handler] );
|
CALLFunc( (int)vtlbdata.RWFT[szidx][1][handler] );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -35,6 +35,9 @@
|
|||||||
|
|
||||||
namespace x86Emitter
|
namespace x86Emitter
|
||||||
{
|
{
|
||||||
|
extern void xStoreReg( const xRegisterSSE& src );
|
||||||
|
extern void xRestoreReg( const xRegisterSSE& dest );
|
||||||
|
|
||||||
// ------------------------------------------------------------------------
|
// ------------------------------------------------------------------------
|
||||||
// Group 1 Instruction Class
|
// Group 1 Instruction Class
|
||||||
|
|
||||||
|
@ -677,8 +677,6 @@ extern void CDQE( void );
|
|||||||
extern void LAHF();
|
extern void LAHF();
|
||||||
extern void SAHF();
|
extern void SAHF();
|
||||||
|
|
||||||
extern void BT32ItoR( x86IntRegType to, u8 from );
|
|
||||||
extern void BTR32ItoR( x86IntRegType to, u8 from );
|
|
||||||
extern void BSRRtoR(x86IntRegType to, x86IntRegType from);
|
extern void BSRRtoR(x86IntRegType to, x86IntRegType from);
|
||||||
extern void BSWAP32R( x86IntRegType to );
|
extern void BSWAP32R( x86IntRegType to );
|
||||||
|
|
||||||
|
@ -30,9 +30,22 @@ u8 g_globalXMMSaved = 0;
|
|||||||
PCSX2_ALIGNED16( static u64 g_globalMMXData[8] );
|
PCSX2_ALIGNED16( static u64 g_globalMMXData[8] );
|
||||||
PCSX2_ALIGNED16( static u64 g_globalXMMData[2*iREGCNT_XMM] );
|
PCSX2_ALIGNED16( static u64 g_globalXMMData[2*iREGCNT_XMM] );
|
||||||
|
|
||||||
|
namespace x86Emitter
|
||||||
|
{
|
||||||
|
void xStoreReg( const xRegisterSSE& src )
|
||||||
|
{
|
||||||
|
xMOVDQA( &g_globalXMMData[src.Id], src );
|
||||||
|
}
|
||||||
|
|
||||||
|
void xRestoreReg( const xRegisterSSE& dest )
|
||||||
|
{
|
||||||
|
xMOVDQA( dest, &g_globalXMMData[dest.Id] );
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/////////////////////////////////////////////////////////////////////
|
/////////////////////////////////////////////////////////////////////
|
||||||
// SetCPUState -- for assugnment of SSE roundmodes and clampmodes.
|
// SetCPUState -- for assignment of SSE roundmodes and clampmodes.
|
||||||
|
|
||||||
u32 g_sseMXCSR = DEFAULT_sseMXCSR;
|
u32 g_sseMXCSR = DEFAULT_sseMXCSR;
|
||||||
u32 g_sseVUMXCSR = DEFAULT_sseVUMXCSR;
|
u32 g_sseVUMXCSR = DEFAULT_sseVUMXCSR;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user