Merged drk||Raziel's "BTS Manual Protection" enhancement for the vtlb into /trunk, and combined it with Pseudonim's "Manual Block Clear" enhancement for an ideal two-phase protection system.

Most things should be a bit faster with this new system.  The system is more balanced than the previous one, in that it provides a better overall performance across most games, but some specific FMVs (like Disgaea 2's) will be a bit slower.  On the other hand, others like DQ8 and Kingdom Hearts 2 FMVs get a big speedup.  Almost all in-game stuff should be either the same or faster now.

Set a bunch of ignores for TortoiseSVN users, as suggested in Issue 166.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1083 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
Jake.Stine 2009-04-29 04:24:46 +00:00
commit 31f0be6eb8
15 changed files with 271 additions and 278 deletions

View File

@ -37,7 +37,7 @@ static const uint m_psxMemSize =
void psxMemAlloc() void psxMemAlloc()
{ {
if( m_psxAllMem == NULL ) if( m_psxAllMem == NULL )
m_psxAllMem = vtlb_malloc( m_psxMemSize, 4096, 0x21000000 ); m_psxAllMem = vtlb_malloc( m_psxMemSize, 4096 );
if( m_psxAllMem == NULL) if( m_psxAllMem == NULL)
throw Exception::OutOfMemory( "psxMemAlloc > failed allocating memory for the IOP processor." ); throw Exception::OutOfMemory( "psxMemAlloc > failed allocating memory for the IOP processor." );

View File

@ -618,7 +618,7 @@ static u8* m_psAllMem = NULL;
void memAlloc() void memAlloc()
{ {
if( m_psAllMem == NULL ) if( m_psAllMem == NULL )
m_psAllMem = vtlb_malloc( m_allMemSize, 4096, 0x2400000 ); m_psAllMem = vtlb_malloc( m_allMemSize, 4096 );
if( m_psAllMem == NULL) if( m_psAllMem == NULL)
throw Exception::OutOfMemory( "memAlloc > failed to allocate PS2's base ram/rom/scratchpad." ); throw Exception::OutOfMemory( "memAlloc > failed to allocate PS2's base ram/rom/scratchpad." );

View File

@ -76,6 +76,9 @@ int _SPR0chain()
{ {
memcpy_fast((u8*)pMem, &PS2MEM_SCRATCH[spr0->sadr & 0x3fff], spr0->qwc << 4); memcpy_fast((u8*)pMem, &PS2MEM_SCRATCH[spr0->sadr & 0x3fff], spr0->qwc << 4);
// Clear dependent EE recompiler blocks, if necessary [needed for BTS protection system]
Cpu->Clear( spr0->madr, spr0->qwc << 2 );
// clear VU mem also! // clear VU mem also!
TestClearVUs(spr0->madr, spr0->qwc << 2); // Wtf is going on here? AFAIK, only VIF should affect VU micromem (cottonvibes) TestClearVUs(spr0->madr, spr0->qwc << 2); // Wtf is going on here? AFAIK, only VIF should affect VU micromem (cottonvibes)
@ -121,6 +124,7 @@ void _SPR0interleave()
{ {
// clear VU mem also! // clear VU mem also!
TestClearVUs(spr0->madr, spr0->qwc << 2); TestClearVUs(spr0->madr, spr0->qwc << 2);
Cpu->Clear( spr0->madr, spr0->qwc << 2 );
memcpy_fast((u8*)pMem, &PS2MEM_SCRATCH[spr0->sadr & 0x3fff], spr0->qwc << 4); memcpy_fast((u8*)pMem, &PS2MEM_SCRATCH[spr0->sadr & 0x3fff], spr0->qwc << 4);
} }
spr0->sadr += spr0->qwc * 16; spr0->sadr += spr0->qwc * 16;

View File

@ -168,6 +168,7 @@ bool SysAllocateMem()
try try
{ {
vtlb_Core_Alloc();
memAlloc(); memAlloc();
psxMemAlloc(); psxMemAlloc();
vuMicroMemAlloc(); vuMicroMemAlloc();
@ -271,6 +272,7 @@ void SysShutdownMem()
vuMicroMemShutdown(); vuMicroMemShutdown();
psxMemShutdown(); psxMemShutdown();
memShutdown(); memShutdown();
vtlb_Core_Shutdown();
} }
////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////

View File

@ -83,7 +83,7 @@ static const uint m_vuMemSize =
void vuMicroMemAlloc() void vuMicroMemAlloc()
{ {
if( m_vuAllMem == NULL ) if( m_vuAllMem == NULL )
m_vuAllMem = vtlb_malloc( m_vuMemSize, 16, 0x28000000 ); m_vuAllMem = vtlb_malloc( m_vuMemSize, 16 );
if( m_vuAllMem == NULL ) if( m_vuAllMem == NULL )
throw Exception::OutOfMemory( "vuMicroMemInit > Failed to allocate VUmicro memory." ); throw Exception::OutOfMemory( "vuMicroMemInit > Failed to allocate VUmicro memory." );

View File

@ -61,7 +61,6 @@ vtlbHandler UnmappedVirtHandler1;
vtlbHandler UnmappedPhyHandler0; vtlbHandler UnmappedPhyHandler0;
vtlbHandler UnmappedPhyHandler1; vtlbHandler UnmappedPhyHandler1;
/* /*
__asm __asm
{ {
@ -87,10 +86,22 @@ callfunction:
jmp [readfunctions8-0x800000+eax]; jmp [readfunctions8-0x800000+eax];
}*/ }*/
///////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////
// Interpreter Implementations of VTLB Memory Operations. // Interpreter Implementations of VTLB Memory Operations.
// See recVTLB.cpp for the dynarec versions. // See recVTLB.cpp for the dynarec versions.
// ------------------------------------------------------------------------
// Helper for the BTS manual protection system. Sets a bit based on the given address,
// marking that piece of PS2 memory as 'dirty.'
//
static void memwritebits(u8* ptr)
{
u32 offs=ptr-vtlbdata.alloc_base;
offs/=16;
vtlbdata.alloc_bits[offs/8] |= 1 << (offs%8);
}
// ------------------------------------------------------------------------
// Interpreted VTLB lookup for 8, 16, and 32 bit accesses // Interpreted VTLB lookup for 8, 16, and 32 bit accesses
template<int DataSize,typename DataType> template<int DataSize,typename DataType>
__forceinline DataType __fastcall MemOp_r0(u32 addr) __forceinline DataType __fastcall MemOp_r0(u32 addr)
@ -117,6 +128,7 @@ __forceinline DataType __fastcall MemOp_r0(u32 addr)
} }
} }
// ------------------------------------------------------------------------
// Interpreterd VTLB lookup for 64 and 128 bit accesses. // Interpreterd VTLB lookup for 64 and 128 bit accesses.
template<int DataSize,typename DataType> template<int DataSize,typename DataType>
__forceinline void __fastcall MemOp_r1(u32 addr, DataType* data) __forceinline void __fastcall MemOp_r1(u32 addr, DataType* data)
@ -148,6 +160,7 @@ __forceinline void __fastcall MemOp_r1(u32 addr, DataType* data)
} }
} }
// ------------------------------------------------------------------------
template<int DataSize,typename DataType> template<int DataSize,typename DataType>
__forceinline void __fastcall MemOp_w0(u32 addr, DataType data) __forceinline void __fastcall MemOp_w0(u32 addr, DataType data)
{ {
@ -155,6 +168,7 @@ __forceinline void __fastcall MemOp_w0(u32 addr, DataType data)
s32 ppf=addr+vmv; s32 ppf=addr+vmv;
if (!(ppf<0)) if (!(ppf<0))
{ {
memwritebits((u8*)ppf);
*reinterpret_cast<DataType*>(ppf)=data; *reinterpret_cast<DataType*>(ppf)=data;
} }
else else
@ -174,6 +188,8 @@ __forceinline void __fastcall MemOp_w0(u32 addr, DataType data)
} }
} }
} }
// ------------------------------------------------------------------------
template<int DataSize,typename DataType> template<int DataSize,typename DataType>
__forceinline void __fastcall MemOp_w1(u32 addr,const DataType* data) __forceinline void __fastcall MemOp_w1(u32 addr,const DataType* data)
{ {
@ -182,6 +198,7 @@ __forceinline void __fastcall MemOp_w1(u32 addr,const DataType* data)
s32 ppf=addr+vmv; s32 ppf=addr+vmv;
if (!(ppf<0)) if (!(ppf<0))
{ {
memwritebits((u8*)ppf);
*reinterpret_cast<DataType*>(ppf)=*data; *reinterpret_cast<DataType*>(ppf)=*data;
if (DataSize==128) if (DataSize==128)
*reinterpret_cast<DataType*>(ppf+8)=data[1]; *reinterpret_cast<DataType*>(ppf+8)=data[1];
@ -202,7 +219,6 @@ __forceinline void __fastcall MemOp_w1(u32 addr,const DataType* data)
} }
} }
mem8_t __fastcall vtlb_memRead8(u32 mem) mem8_t __fastcall vtlb_memRead8(u32 mem)
{ {
return MemOp_r0<8,mem8_t>(mem); return MemOp_r0<8,mem8_t>(mem);
@ -328,7 +344,7 @@ void __fastcall vtlbDefaultPhyWrite64(u32 addr,const mem64_t* data) { Console::E
void __fastcall vtlbDefaultPhyWrite128(u32 addr,const mem128_t* data) { Console::Error("vtlbDefaultPhyWrite128: 0x%X",params addr); verify(false); } void __fastcall vtlbDefaultPhyWrite128(u32 addr,const mem128_t* data) { Console::Error("vtlbDefaultPhyWrite128: 0x%X",params addr); verify(false); }
///////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////
// VTLB Public API -- Init/Term/RegisterHandler stuff // VTLB Public API -- Init/Term/RegisterHandler stuff
// //
@ -361,6 +377,7 @@ vtlbHandler vtlb_RegisterHandler( vtlbMemR8FP* r8,vtlbMemR16FP* r16,vtlbMemR32FP
return rv; return rv;
} }
//////////////////////////////////////////////////////////////////////////////////////////
// Maps the given hander (created with vtlb_RegisterHandler) to the specified memory region. // Maps the given hander (created with vtlb_RegisterHandler) to the specified memory region.
// New mappings always assume priority over previous mappings, so place "generic" mappings for // New mappings always assume priority over previous mappings, so place "generic" mappings for
// large areas of memory first, and then specialize specific small regions of memory afterward. // large areas of memory first, and then specialize specific small regions of memory afterward.
@ -500,7 +517,8 @@ void vtlb_VMapUnmap(u32 vaddr,u32 sz)
} }
} }
// Clears vtlb handlers and memory mappings. //////////////////////////////////////////////////////////////////////////////////////////
// vtlb_init -- Clears vtlb handlers and memory mappings.
void vtlb_Init() void vtlb_Init()
{ {
vtlbHandlerCount=0; vtlbHandlerCount=0;
@ -540,7 +558,8 @@ void vtlb_Init()
vtlb_VMapUnmap((VTLB_VMAP_ITEMS-1)*VTLB_PAGE_SIZE,VTLB_PAGE_SIZE); vtlb_VMapUnmap((VTLB_VMAP_ITEMS-1)*VTLB_PAGE_SIZE,VTLB_PAGE_SIZE);
} }
// Performs a COP0-level reset of the PS2's TLB. //////////////////////////////////////////////////////////////////////////////////////////
// vtlb_Reset -- Performs a COP0-level reset of the PS2's TLB.
// This function should probably be part of the COP0 rather than here in VTLB. // This function should probably be part of the COP0 rather than here in VTLB.
void vtlb_Reset() void vtlb_Reset()
{ {
@ -552,30 +571,65 @@ void vtlb_Term()
//nothing to do for now //nothing to do for now
} }
//////////////////////////////////////////////////////////////////////////////////////////
// Reserves the vtlb core allocation used by various emulation components!
//
void vtlb_Core_Alloc()
{
if( vtlbdata.alloc_base != NULL ) return;
vtlbdata.alloc_current = 0;
#ifdef __LINUX__
vtlbdata.alloc_base = SysMmapEx( 0x16000000, VTLB_ALLOC_SIZE, 0x80000000, "Vtlb" );
#else
// Win32 just needs this, since malloc always maps below 2GB.
vtlbdata.alloc_base = (u8*)_aligned_malloc( VTLB_ALLOC_SIZE, 4096 );
if( vtlbdata.alloc_base == NULL )
throw Exception::OutOfMemory( "Fatal Error: could not allocate 42Meg buffer for PS2's mappable system ram." );
#endif
}
//////////////////////////////////////////////////////////////////////////////////////////
//
void vtlb_Core_Shutdown()
{
if( vtlbdata.alloc_base == NULL ) return;
#ifdef __LINUX__
SafeSysMunmap( vtlbdata.alloc_base, VTLB_ALLOC_SIZE );
#else
// Make sure and unprotect memory first, since CrtDebug will try to write to it.
HostSys::MemProtect( vtlbdata.alloc_base, VTLB_ALLOC_SIZE, Protect_ReadWrite );
safe_aligned_free( vtlbdata.alloc_base );
#endif
}
//////////////////////////////////////////////////////////////////////////////////////////
// This function allocates memory block with are compatible with the Vtlb's requirements // This function allocates memory block with are compatible with the Vtlb's requirements
// for memory locations. The Vtlb requires the topmost bit (Sign bit) of the memory // for memory locations. The Vtlb requires the topmost bit (Sign bit) of the memory
// pointer to be cleared. Some operating systems and/or implementations of malloc do that, // pointer to be cleared. Some operating systems and/or implementations of malloc do that,
// but others do not. So use this instead to allocate the memory correctly for your // but others do not. So use this instead to allocate the memory correctly for your
// platform. // platform.
u8* vtlb_malloc( uint size, uint align, uptr tryBaseAddress ) //
u8* vtlb_malloc( uint size, uint align )
{ {
#ifdef __LINUX__ vtlbdata.alloc_current += align-1;
return SysMmapEx( tryBaseAddress, size, 0x80000000, "Vtlb" ); vtlbdata.alloc_current &= ~(align-1);
#else
// Win32 just needs this, since malloc always maps below 2GB. int rv = vtlbdata.alloc_current;
return (u8*)_aligned_malloc(size, align); vtlbdata.alloc_current += size;
#endif return &vtlbdata.alloc_base[rv];
} }
//////////////////////////////////////////////////////////////////////////////////////////
//
void vtlb_free( void* pmem, uint size ) void vtlb_free( void* pmem, uint size )
{ {
if( pmem == NULL ) return; // Does nothing anymore! Alloc/dealloc is now handled by vtlb_Core_Alloc /
// vtlb_Core_Shutdown. Placebo is left in place in case it becomes useful again
#ifdef __LINUX__ // at a later date.
SafeSysMunmap( pmem, size );
#else return;
// Make sure and unprotect memory first, since CrtDebug will try to write to it.
HostSys::MemProtect( pmem, size, Protect_ReadWrite );
safe_aligned_free( pmem );
#endif
} }

View File

@ -23,10 +23,12 @@ typedef void __fastcall vtlbMemW128FP(u32 addr,const mem128_t* data);
typedef u32 vtlbHandler; typedef u32 vtlbHandler;
extern void vtlb_Core_Alloc();
extern void vtlb_Core_Shutdown();
extern void vtlb_Init(); extern void vtlb_Init();
extern void vtlb_Reset(); extern void vtlb_Reset();
extern void vtlb_Term(); extern void vtlb_Term();
extern u8* vtlb_malloc( uint size, uint align, uptr tryBaseAddress ); extern u8* vtlb_malloc( uint size, uint align );
extern void vtlb_free( void* pmem, uint size ); extern void vtlb_free( void* pmem, uint size );
@ -67,6 +69,8 @@ extern void vtlb_DynGenRead32_Const( u32 bits, bool sign, u32 addr_const );
namespace vtlb_private namespace vtlb_private
{ {
static const uint VTLB_ALLOC_SIZE = 0x2900000; //this is a bit more than required
static const uint VTLB_PAGE_BITS = 12; static const uint VTLB_PAGE_BITS = 12;
static const uint VTLB_PAGE_MASK = 4095; static const uint VTLB_PAGE_MASK = 4095;
static const uint VTLB_PAGE_SIZE = 4096; static const uint VTLB_PAGE_SIZE = 4096;
@ -77,6 +81,11 @@ namespace vtlb_private
struct MapData struct MapData
{ {
u8 alloc_bits[VTLB_ALLOC_SIZE/16/8];
u8* alloc_base; //base of the memory array
int alloc_current; //current base
s32 pmap[VTLB_PMAP_ITEMS]; //512KB s32 pmap[VTLB_PMAP_ITEMS]; //512KB
s32 vmap[VTLB_VMAP_ITEMS]; //4MB s32 vmap[VTLB_VMAP_ITEMS]; //4MB

View File

@ -2883,7 +2883,7 @@
</Filter> </Filter>
</Filter> </Filter>
<Filter <Filter
Name="Dynarec Emitter" Name="x86Emitter"
> >
<File <File
RelativePath="..\..\x86\ix86\ix86.cpp" RelativePath="..\..\x86\ix86\ix86.cpp"

View File

@ -202,10 +202,8 @@ void WinRun()
_doPluginOverride( "DEV9", g_Startup.dev9dll, Config.DEV9 ); _doPluginOverride( "DEV9", g_Startup.dev9dll, Config.DEV9 );
#ifndef _DEBUG
if( Config.Profiler ) if( Config.Profiler )
ProfilerInit(); ProfilerInit();
#endif
InitCPUTicks(); InitCPUTicks();
@ -800,7 +798,6 @@ LRESULT WINAPI MainWndProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam)
SaveConfig(); SaveConfig();
break; break;
#ifndef _DEBUG
case ID_PROFILER: case ID_PROFILER:
Config.Profiler = !Config.Profiler; Config.Profiler = !Config.Profiler;
if( Config.Profiler ) if( Config.Profiler )
@ -815,7 +812,6 @@ LRESULT WINAPI MainWndProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam)
} }
SaveConfig(); SaveConfig();
break; break;
#endif
default: default:
if (LOWORD(wParam) >= ID_LANGS && LOWORD(wParam) <= (ID_LANGS + langsMax)) if (LOWORD(wParam) >= ID_LANGS && LOWORD(wParam) <= (ID_LANGS + langsMax))
@ -989,9 +985,7 @@ void CreateMainMenu() {
ADDMENUITEM(0,_("Print cdvd &Info"), ID_CDVDPRINT); ADDMENUITEM(0,_("Print cdvd &Info"), ID_CDVDPRINT);
ADDMENUITEM(0,_("Close GS Window on Esc"), ID_CLOSEGS); ADDMENUITEM(0,_("Close GS Window on Esc"), ID_CLOSEGS);
ADDSEPARATOR(0); ADDSEPARATOR(0);
#ifndef _DEBUG
ADDMENUITEM(0,_("Enable &Profiler"), ID_PROFILER); ADDMENUITEM(0,_("Enable &Profiler"), ID_PROFILER);
#endif
ADDMENUITEM(0,_("Enable &Patches"), ID_PATCHES); ADDMENUITEM(0,_("Enable &Patches"), ID_PATCHES);
ADDMENUITEM(0,_("Enable &Console"), ID_CONSOLE); ADDMENUITEM(0,_("Enable &Console"), ID_CONSOLE);
ADDSEPARATOR(0); ADDSEPARATOR(0);

View File

@ -7,7 +7,8 @@
// //
// Generated from the TEXTINCLUDE 2 resource. // Generated from the TEXTINCLUDE 2 resource.
// //
#include "afxresmw.h" #include "afxresmw.h"
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////
#undef APSTUDIO_READONLY_SYMBOLS #undef APSTUDIO_READONLY_SYMBOLS
@ -899,7 +900,8 @@ END
// //
// Generated from the TEXTINCLUDE 3 resource. // Generated from the TEXTINCLUDE 3 resource.
// //
///////////////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////////////
#endif // not APSTUDIO_INVOKED #endif // not APSTUDIO_INVOKED

View File

@ -418,6 +418,9 @@ static void recAlloc()
x86FpuState = FPU_STATE; x86FpuState = FPU_STATE;
} }
PCSX2_ALIGNED16( static u16 manual_page[Ps2MemSize::Base >> 12] );
PCSX2_ALIGNED16( static u8 manual_counter[Ps2MemSize::Base >> 12] );
//////////////////////////////////////////////////// ////////////////////////////////////////////////////
void recResetEE( void ) void recResetEE( void )
{ {
@ -427,6 +430,8 @@ void recResetEE( void )
memset_8<0xcc, REC_CACHEMEM>(recMem); // 0xcc is INT3 memset_8<0xcc, REC_CACHEMEM>(recMem); // 0xcc is INT3
memzero_ptr<m_recBlockAllocSize>( m_recBlockAlloc ); memzero_ptr<m_recBlockAllocSize>( m_recBlockAlloc );
memzero_obj( manual_page );
memzero_obj( manual_counter );
ClearRecLUT((BASEBLOCK*)m_recBlockAlloc, ClearRecLUT((BASEBLOCK*)m_recBlockAlloc,
(((Ps2MemSize::Base + Ps2MemSize::Rom + Ps2MemSize::Rom1) / 4))); (((Ps2MemSize::Base + Ps2MemSize::Rom + Ps2MemSize::Rom1) / 4)));
@ -720,7 +725,6 @@ static void ClearRecLUT(BASEBLOCK* base, int count)
base[i].SetFnptr((uptr)JITCompile); base[i].SetFnptr((uptr)JITCompile);
} }
// Returns the offset to the next instruction after any cleared memory
void recClear(u32 addr, u32 size) void recClear(u32 addr, u32 size)
{ {
BASEBLOCKEX* pexblock; BASEBLOCKEX* pexblock;
@ -1256,14 +1260,16 @@ void badespfn() {
void __fastcall dyna_block_discard(u32 start,u32 sz) void __fastcall dyna_block_discard(u32 start,u32 sz)
{ {
DevCon::WriteLn("dyna_block_discard %08X , count %d", params start,sz); DevCon::WriteLn("dyna_block_discard .. start: %08X count=%d", params start,sz);
Cpu->Clear(start,sz); Cpu->Clear(start, sz);
} }
void __fastcall dyna_block_reset(u32 start,u32 sz)
void __fastcall dyna_page_reset(u32 start,u32 sz)
{ {
DevCon::WriteLn("dyna_block_reset %08X , count %d", params start,sz); DevCon::WriteLn("dyna_page_reset .. start=%08X count=%d", params start,sz);
Cpu->Clear(start & ~0xfffUL, 0x400); Cpu->Clear(start & ~0xfffUL, 0x400);
manual_counter[start >> 10]++;
mmap_MarkCountedRamPage(PSM(start), start & ~0xfffUL); mmap_MarkCountedRamPage(PSM(start), start & ~0xfffUL);
} }
@ -1490,98 +1496,6 @@ StartRecomp:
// instruction being analyzed. // instruction being analyzed.
if( usecop2 ) vucycle++; if( usecop2 ) vucycle++;
// peephole optimizations //
#ifdef PCSX2_VM_COISSUE
if( i < s_nEndBlock-4 && recompileCodeSafe(i) ) {
u32 curcode = cpuRegs.code;
u32 nextcode = *(u32*)PSM(i+4);
if( _eeIsLoadStoreCoIssue(curcode, nextcode) && recBSC_co[curcode>>26] != NULL ) {
// rs has to be the same, and cannot be just written
if( ((curcode >> 21) & 0x1F) == ((nextcode >> 21) & 0x1F) && !_eeLoadWritesRs(curcode) ) {
if( _eeIsLoadStoreCoX(curcode) && ((nextcode>>16)&0x1f) != ((curcode>>21)&0x1f) ) {
// see how many stores there are
u32 j;
// use xmmregs since only supporting lwc1,lq,swc1,sq
for(j = i+8; j < s_nEndBlock && j < i+4*iREGCNT_XMM; j += 4 ) {
u32 nncode = *(u32*)PSM(j);
if( (nncode>>26) != (curcode>>26) || ((curcode>>21)&0x1f) != ((nncode>>21)&0x1f) ||
_eeLoadWritesRs(nncode))
break;
}
if( j > i+8 ) {
u32 num = (j-i)>>2; // number of stores that can coissue
assert( num <= iREGCNT_XMM );
g_pCurInstInfo[0].numpeeps = num-1;
g_pCurInstInfo[0].info |= EEINSTINFO_COREC;
while(i < j-4) {
g_pCurInstInfo++;
g_pCurInstInfo[0].info |= EEINSTINFO_NOREC;
i += 4;
}
continue;
}
// fall through
}
// unaligned loadstores
// if LWL, check if LWR and that offsets are +3 away
switch(curcode >> 26) {
case 0x22: // LWL
if( (nextcode>>26) != 0x26 || ((s16)nextcode)+3 != (s16)curcode )
continue;
break;
case 0x26: // LWR
if( (nextcode>>26) != 0x22 || ((s16)nextcode) != (s16)curcode+3 )
continue;
break;
case 0x2a: // SWL
if( (nextcode>>26) != 0x2e || ((s16)nextcode)+3 != (s16)curcode )
continue;
break;
case 0x2e: // SWR
if( (nextcode>>26) != 0x2a || ((s16)nextcode) != (s16)curcode+3 )
continue;
break;
case 0x1a: // LDL
if( (nextcode>>26) != 0x1b || ((s16)nextcode)+7 != (s16)curcode )
continue;
break;
case 0x1b: // LWR
if( (nextcode>>26) != 0x1aa || ((s16)nextcode) != (s16)curcode+7 )
continue;
break;
case 0x2c: // SWL
if( (nextcode>>26) != 0x2d || ((s16)nextcode)+7 != (s16)curcode )
continue;
break;
case 0x2d: // SWR
if( (nextcode>>26) != 0x2c || ((s16)nextcode) != (s16)curcode+7 )
continue;
break;
}
// good enough
g_pCurInstInfo[0].info |= EEINSTINFO_COREC;
g_pCurInstInfo[0].numpeeps = 1;
g_pCurInstInfo[1].info |= EEINSTINFO_NOREC;
g_pCurInstInfo++;
i += 4;
continue;
}
}
}
#endif // end peephole
} }
// This *is* important because g_pCurInstInfo is checked a bit later on and // This *is* important because g_pCurInstInfo is checked a bit later on and
// if it's not equal to s_pInstCache it handles recompilation differently. // if it's not equal to s_pInstCache it handles recompilation differently.
@ -1611,7 +1525,6 @@ StartRecomp:
iDumpBlock(startpc, recPtr); iDumpBlock(startpc, recPtr);
#endif #endif
static u16 manual_page[Ps2MemSize::Base >> 12];
u32 sz=(s_nEndBlock-startpc)>>2; u32 sz=(s_nEndBlock-startpc)>>2;
u32 inpage_ptr=HWADDR(startpc); u32 inpage_ptr=HWADDR(startpc);
@ -1631,31 +1544,76 @@ StartRecomp:
} }
else else
{ {
// import the vtlbdata (alloc_bits and alloc_base and stuff):
using namespace vtlb_private;
MOV32ItoR(ECX, inpage_ptr); MOV32ItoR(ECX, inpage_ptr);
MOV32ItoR(EDX, pgsz); MOV32ItoR(EDX, pgsz);
u32 mask=0;
u32 writen=0;
u32 writen_start=0;
u32 lpc=inpage_ptr; u32 lpc=inpage_ptr;
u32 stg=pgsz; u32 stg=pgsz;
while(stg>0) while(stg>0)
{ {
// was dyna_block_discard_recmem. See note in recResetEE for details. u32 bit = (lpc>>4) & 7;
CMP32ItoM((uptr)PSM(lpc),*(u32*)PSM(lpc)); if (mask==0)
JNE32(((u32)&dyna_block_discard)- ( (u32)x86Ptr + 6 )); {
//writen=bit;
writen_start=(((u8*)PSM(lpc)-vtlbdata.alloc_base)>>4)/8;
}
mask |= 1 << bit;
stg-=4; if (bit==31)
lpc+=4; {
vtlbdata.alloc_bits[writen_start]&=~mask;
xTEST( ptr32[&vtlbdata.alloc_bits[writen_start]], mask ); // auto-optimizes to imm8 when applicable.
xJNZ( dyna_block_discard );
//SysPrintf("%08X %d %d\n",mask,pgsz,pgsz>>4);
mask = 0;
}
//writen++;
if (stg<=16)
{
lpc += stg;
stg = 0;
}
else
{
lpc += 16;
stg -= 16;
}
} }
if (startpc != 0x81fc0) {
if (mask)
{
vtlbdata.alloc_bits[writen_start] &= ~mask;
xTEST( ptr32[&vtlbdata.alloc_bits[writen_start]], mask ); // auto-optimizes to imm8 when applicable.
xJNZ( dyna_block_discard );
//SysPrintf("%08X %d %d\n",mask,pgsz,pgsz>>4);
mask = 0;
}
if( startpc != 0x81fc0 && manual_counter[inpage_ptr >> 12] <= 4 )
{
// Commented out until we replace it with a smarter algo that only
// recompiles blocks a limited number of times.
xADD(ptr16[&manual_page[inpage_ptr >> 12]], 1); xADD(ptr16[&manual_page[inpage_ptr >> 12]], 1);
xJC( dyna_block_reset ); xJC( dyna_page_reset );
} }
DbgCon::WriteLn("Manual block @ %08X : %08X %d %d %d %d", params DbgCon::WriteLn("Manual block @ %08X : %08X %d %d %d %d", params
startpc,inpage_ptr,pgsz,0x1000-inpage_offs,inpage_sz,sz*4); startpc,inpage_ptr,pgsz,0x1000-inpage_offs,inpage_sz,sz*4);
} }
} }
inpage_ptr+=pgsz; inpage_ptr += pgsz;
inpage_sz-=pgsz; inpage_sz -= pgsz;
} }
// finally recompile // // finally recompile //

View File

@ -23,31 +23,88 @@
#include "iCore.h" #include "iCore.h"
#include "iR5900.h" #include "iR5900.h"
#include "ix86\ix86_internal.h"
using namespace vtlb_private; using namespace vtlb_private;
using namespace x86Emitter; using namespace x86Emitter;
// NOTICE: This function *destroys* EAX!! //////////////////////////////////////////////////////////////////////////////////////////
// Moves 128 bits of memory from the source register ptr to the dest register ptr. // iAllocRegSSE -- allocates an xmm register. If no xmm register is available, xmm0 is
// (used as an equivalent to movaps, when a free XMM register is unavailable for some reason) // saved into g_globalXMMData and returned as a free register.
void MOV128_MtoM( x86IntRegType destRm, x86IntRegType srcRm ) //
class iAllocRegSSE
{ {
// (this is one of my test cases for the new emitter --air) protected:
xRegisterSSE m_reg;
bool m_free;
xAddressReg src( srcRm ); public:
xAddressReg dest( destRm ); iAllocRegSSE() :
m_reg( xmm0 ),
m_free( !!_hasFreeXMMreg() )
{
if( m_free )
m_reg = xRegisterSSE( _allocTempXMMreg( XMMT_INT, -1 ) );
else
xStoreReg( m_reg );
}
xMOV( eax, ptr[src] ); ~iAllocRegSSE()
xMOV( ptr[dest], eax ); {
if( m_free )
_freeXMMreg( m_reg.Id );
else
xRestoreReg( m_reg );
}
operator xRegisterSSE() const { return m_reg; }
};
xMOV( eax, ptr[src+4] ); //////////////////////////////////////////////////////////////////////////////////////////
xMOV( ptr[dest+4], eax ); // Moves 128 bits from point B to point A, using SSE's MOVAPS (or MOVDQA).
// This instruction always uses an SSE register, even if all registers are allocated! It
// saves an SSE register to memory first, performs the copy, and restores the register.
//
void iMOV128_SSE( const ModSibBase& destRm, const ModSibBase& srcRm )
{
iAllocRegSSE reg;
xMOVDQA( reg, srcRm );
xMOVDQA( destRm, reg );
}
xMOV( eax, ptr[src+8] ); //////////////////////////////////////////////////////////////////////////////////////////
xMOV( ptr[dest+8], eax ); // Moves 64 bits of data from point B to point A, using either MMX, SSE, or x86 registers
// if neither MMX nor SSE is available to the task.
//
// Optimizations: This method uses MMX is the cpu is in MMX mode, or SSE if it's in FPU
// mode (saving on potential EMMS uses).
//
void iMOV64_Smart( const ModSibBase& destRm, const ModSibBase& srcRm )
{
if( (x86FpuState == FPU_STATE) && _hasFreeXMMreg() )
{
// Move things using MOVLPS:
xRegisterSSE reg( _allocTempXMMreg( XMMT_INT, -1 ) );
xMOVL.PS( reg, srcRm );
xMOVL.PS( destRm, reg );
_freeXMMreg( reg.Id );
return;
}
xMOV( eax, ptr[src+12] ); if( _hasFreeMMXreg() )
xMOV( ptr[dest+12], eax ); {
xRegisterMMX reg( _allocMMXreg(-1, MMX_TEMP, 0) );
xMOVQ( reg, srcRm );
xMOVQ( destRm, reg );
_freeMMXreg( reg.Id );
}
else
{
xMOV( eax, srcRm );
xMOV( destRm, eax );
xMOV( eax, srcRm+4 );
xMOV( destRm+4, eax );
}
} }
/* /*
@ -127,38 +184,11 @@ static void _vtlb_DynGen_DirectRead( u32 bits, bool sign )
break; break;
case 64: case 64:
if( _hasFreeMMXreg() ) iMOV64_Smart(ptr[edx],ptr[ecx]);
{
const int freereg = _allocMMXreg(-1, MMX_TEMP, 0);
MOVQRmtoR(freereg,ECX);
MOVQRtoRm(EDX,freereg);
_freeMMXreg(freereg);
}
else
{
MOV32RmtoR(EAX,ECX);
MOV32RtoRm(EDX,EAX);
MOV32RmtoR(EAX,ECX,4);
MOV32RtoRm(EDX,EAX,4);
}
break; break;
case 128: case 128:
if( _hasFreeXMMreg() ) iMOV128_SSE(ptr[edx],ptr[ecx]);
{
const int freereg = _allocTempXMMreg( XMMT_INT, -1 );
SSE2_MOVDQARmtoR(freereg,ECX);
SSE2_MOVDQARtoRm(EDX,freereg);
_freeXMMreg(freereg);
}
else
{
// Could put in an MMX optimization here as well, but no point really.
// It's almost never used since there's almost always a free XMM reg.
MOV128_MtoM( EDX, ECX ); // dest <- src!
}
break; break;
jNO_DEFAULT jNO_DEFAULT
@ -262,39 +292,11 @@ void vtlb_DynGenRead64_Const( u32 bits, u32 addr_const )
switch( bits ) switch( bits )
{ {
case 64: case 64:
if( _hasFreeMMXreg() ) iMOV64_Smart(ptr[edx],ptr[ppf]);
{
const int freereg = _allocMMXreg(-1, MMX_TEMP, 0);
MOVQMtoR(freereg,ppf);
MOVQRtoRm(EDX,freereg);
_freeMMXreg(freereg);
}
else
{
MOV32MtoR(EAX,ppf);
MOV32RtoRm(EDX,EAX);
MOV32MtoR(EAX,ppf+4);
MOV32RtoRm(EDX,EAX,4);
}
break; break;
case 128: case 128:
if( _hasFreeXMMreg() ) iMOV128_SSE(ptr[edx],ptr[ppf]);
{
const int freereg = _allocTempXMMreg( XMMT_INT, -1 );
SSE2_MOVDQA_M128_to_XMM( freereg, ppf );
SSE2_MOVDQARtoRm(EDX,freereg);
_freeXMMreg(freereg);
}
else
{
// Could put in an MMX optimization here as well, but no point really.
// It's almost never used since there's almost always a free XMM reg.
MOV32ItoR( ECX, ppf );
MOV128_MtoM( EDX, ECX ); // dest <- src!
}
break; break;
jNO_DEFAULT jNO_DEFAULT
@ -415,40 +417,21 @@ static void _vtlb_DynGen_DirectWrite( u32 bits )
break; break;
case 64: case 64:
if( _hasFreeMMXreg() ) iMOV64_Smart(ptr[ecx],ptr[edx]);
{
const int freereg = _allocMMXreg(-1, MMX_TEMP, 0);
MOVQRmtoR(freereg,EDX);
MOVQRtoRm(ECX,freereg);
_freeMMXreg( freereg );
}
else
{
MOV32RmtoR(EAX,EDX);
MOV32RtoRm(ECX,EAX);
MOV32RmtoR(EAX,EDX,4);
MOV32RtoRm(ECX,EAX,4);
}
break; break;
case 128: case 128:
if( _hasFreeXMMreg() ) iMOV128_SSE(ptr[ecx],ptr[edx]);
{
const int freereg = _allocTempXMMreg( XMMT_INT, -1 );
SSE2_MOVDQARmtoR(freereg,EDX);
SSE2_MOVDQARtoRm(ECX,freereg);
_freeXMMreg( freereg );
}
else
{
// Could put in an MMX optimization here as well, but no point really.
// It's almost never used since there's almost always a free XMM reg.
MOV128_MtoM( ECX, EDX ); // dest <- src!
}
break; break;
} }
xSHR( ecx, 4 );
uptr alloc_base = (uptr)vtlbdata.alloc_base;
u8* bits_base = vtlbdata.alloc_bits;
bits_base -= (alloc_base>>4)/8; //in bytes
xBTS( ecx, bits_base );
} }
// ------------------------------------------------------------------------ // ------------------------------------------------------------------------
@ -514,39 +497,11 @@ void vtlb_DynGenWrite_Const( u32 bits, u32 addr_const )
break; break;
case 64: case 64:
if( _hasFreeMMXreg() ) iMOV64_Smart( ptr[ppf], ptr[edx] );
{
const int freereg = _allocMMXreg(-1, MMX_TEMP, 0);
MOVQRmtoR(freereg,EDX);
MOVQRtoM(ppf,freereg);
_freeMMXreg( freereg );
}
else
{
MOV32RmtoR(EAX,EDX);
MOV32RtoM(ppf,EAX);
MOV32RmtoR(EAX,EDX,4);
MOV32RtoM(ppf+4,EAX);
}
break; break;
case 128: case 128:
if( _hasFreeXMMreg() ) iMOV128_SSE( ptr[ppf], ptr[edx] );
{
const int freereg = _allocTempXMMreg( XMMT_INT, -1 );
SSE2_MOVDQARmtoR(freereg,EDX);
SSE2_MOVDQA_XMM_to_M128(ppf,freereg);
_freeXMMreg( freereg );
}
else
{
// Could put in an MMX optimization here as well, but no point really.
// It's almost never used since there's almost always a free XMM reg.
MOV32ItoR( ECX, ppf );
MOV128_MtoM( ECX, EDX ); // dest <- src!
}
break; break;
} }
@ -571,3 +526,4 @@ void vtlb_DynGenWrite_Const( u32 bits, u32 addr_const )
CALLFunc( (int)vtlbdata.RWFT[szidx][1][handler] ); CALLFunc( (int)vtlbdata.RWFT[szidx][1][handler] );
} }
} }

View File

@ -35,6 +35,9 @@
namespace x86Emitter namespace x86Emitter
{ {
extern void xStoreReg( const xRegisterSSE& src );
extern void xRestoreReg( const xRegisterSSE& dest );
// ------------------------------------------------------------------------ // ------------------------------------------------------------------------
// Group 1 Instruction Class // Group 1 Instruction Class

View File

@ -677,8 +677,6 @@ extern void CDQE( void );
extern void LAHF(); extern void LAHF();
extern void SAHF(); extern void SAHF();
extern void BT32ItoR( x86IntRegType to, u8 from );
extern void BTR32ItoR( x86IntRegType to, u8 from );
extern void BSRRtoR(x86IntRegType to, x86IntRegType from); extern void BSRRtoR(x86IntRegType to, x86IntRegType from);
extern void BSWAP32R( x86IntRegType to ); extern void BSWAP32R( x86IntRegType to );

View File

@ -30,9 +30,22 @@ u8 g_globalXMMSaved = 0;
PCSX2_ALIGNED16( static u64 g_globalMMXData[8] ); PCSX2_ALIGNED16( static u64 g_globalMMXData[8] );
PCSX2_ALIGNED16( static u64 g_globalXMMData[2*iREGCNT_XMM] ); PCSX2_ALIGNED16( static u64 g_globalXMMData[2*iREGCNT_XMM] );
namespace x86Emitter
{
void xStoreReg( const xRegisterSSE& src )
{
xMOVDQA( &g_globalXMMData[src.Id], src );
}
void xRestoreReg( const xRegisterSSE& dest )
{
xMOVDQA( dest, &g_globalXMMData[dest.Id] );
}
}
///////////////////////////////////////////////////////////////////// /////////////////////////////////////////////////////////////////////
// SetCPUState -- for assugnment of SSE roundmodes and clampmodes. // SetCPUState -- for assignment of SSE roundmodes and clampmodes.
u32 g_sseMXCSR = DEFAULT_sseMXCSR; u32 g_sseMXCSR = DEFAULT_sseMXCSR;
u32 g_sseVUMXCSR = DEFAULT_sseVUMXCSR; u32 g_sseVUMXCSR = DEFAULT_sseVUMXCSR;