Made the EE recompiler 64-bit constant buffer (was called a "stack" despite not being used as one) reuse recent constants rather than duplicating for every instance, resulting in less recompiler resets (e.g. espgaluda resetting every couple of seconds).

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@1509 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
sudonim1 2009-07-14 21:33:38 +00:00
parent 633d83a2c4
commit 5bd531e212
6 changed files with 77 additions and 112 deletions

View File

@ -140,7 +140,7 @@ void _eeOnWriteReg(int reg, int signext);
void _deleteEEreg(int reg, int flush);
// allocates memory on the instruction size and returns the pointer
u32* recAllocStackMem(int size, int align);
u32* recGetImm64(u32 hi, u32 lo);
void _vuRegsCOP22(VURegs * VU, _VURegsNum *VUregsn);

View File

@ -75,17 +75,18 @@ u32 g_cpuHasConstReg = 0, g_cpuFlushedConstReg = 0;
// Static Private Variables - R5900 Dynarec
#define X86
static const int RECSTACK_SIZE = 0x00020000;
static const int RECCONSTBUF_SIZE = 16384 * 2; // 64 bit consts in 32 bit units
static u8 *recMem = NULL; // the recompiled blocks will be here
static u8* recStack = NULL; // stack mem
static u32* recConstBuf = NULL; // 64-bit pseudo-immediates
static BASEBLOCK *recRAM = NULL; // and the ptr to the blocks here
static BASEBLOCK *recROM = NULL; // and here
static BASEBLOCK *recROM1 = NULL; // also here
static u32 *recRAMCopy = NULL;
void JITCompile();
static BaseBlocks recBlocks((uptr)JITCompile);
static u8* recPtr = NULL, *recStackPtr = NULL;
static u8* recPtr = NULL;
static u32 *recConstBufPtr = NULL;
EEINST* s_pInstCache = NULL;
static u32 s_nInstCacheSize = 0;
@ -209,13 +210,8 @@ u32* _eeGetConstReg(int reg)
return &cpuRegs.GPR.r[ reg ].UL[0];
// if written in the future, don't flush
if( _recIsRegWritten(g_pCurInstInfo+1, (s_nEndBlock-pc)/4, XMMTYPE_GPRREG, reg) ) {
u32* ptempmem;
ptempmem = recAllocStackMem(8, 4);
ptempmem[0] = g_cpuConstRegs[ reg ].UL[0];
ptempmem[1] = g_cpuConstRegs[ reg ].UL[1];
return ptempmem;
}
if( _recIsRegWritten(g_pCurInstInfo+1, (s_nEndBlock-pc)/4, XMMTYPE_GPRREG, reg) )
return recGetImm64(g_cpuConstRegs[reg].UL[1], g_cpuConstRegs[reg].UL[0]);
_flushConstReg(reg);
return &cpuRegs.GPR.r[ reg ].UL[0];
@ -341,19 +337,44 @@ int _flushUnusedConstReg()
return 0;
}
// ------------------------------------------------------------------------
// recAllocStackMem -- an optimization trick to write data to a location so that
// recompiled code can reference it later on during execution.
//
// Intended use is for setting up 64/128 bit SSE immediates, primarily.
//
u32* recAllocStackMem(int size, int align)
// Some of the generated MMX code needs 64-bit immediates but x86 doesn't
// provide this. One of the reasons we are probably better off not doing
// MMX register allocation for the EE.
u32* recGetImm64(u32 hi, u32 lo)
{
jASSUME( align == 4 || align == 8 || align == 16 );
u32 *imm64; // returned pointer
static u32 *imm64_cache[509];
int cacheidx = lo % (sizeof imm64_cache / sizeof *imm64_cache);
//static int count; count++;
recStackPtr = (u8*) ( (((uptr)recStackPtr) + (align-1)) & ~(align-1) );
recStackPtr += size;
return (u32*)(recStackPtr-size);
imm64 = imm64_cache[cacheidx];
if (imm64 && imm64[0] == lo && imm64[1] == hi)
return imm64;
if (recConstBufPtr >= recConstBuf + RECCONSTBUF_SIZE) {
// TODO: flag an error in recompilation which would reset the recompiler
// immediately and recompile the current block again. There is currently
// no way to do this, so have a last ditch attempt at making things sane
// and return some nonsense if that fails.
for (u32 *p = recConstBuf; p < recConstBuf + RECCONSTBUF_SIZE; p += 2)
if (p[0] == lo && p[1] == hi) {
imm64_cache[cacheidx] = p;
return p;
}
return recConstBuf;
}
imm64 = recConstBufPtr;
recConstBufPtr += 2;
imm64_cache[cacheidx] = imm64;
imm64[0] = lo;
imm64[1] = hi;
//Console::Notice("Consts allocated: %d of %u", params (recConstBufPtr - recConstBuf) / 2, ++count);
return imm64;
}
//////////////////////////////////////////////////////////////////////////////////////////
@ -366,7 +387,7 @@ static u8* m_recBlockAlloc = NULL;
static const uint m_recBlockAllocSize =
(((Ps2MemSize::Base + Ps2MemSize::Rom + Ps2MemSize::Rom1) / 4) * sizeof(BASEBLOCK))
+ RECSTACK_SIZE + Ps2MemSize::Base;
+ RECCONSTBUF_SIZE * sizeof(u32) + Ps2MemSize::Base;
static void recAlloc()
{
@ -408,7 +429,7 @@ static void recAlloc()
recRAM = (BASEBLOCK*)curpos; curpos += (Ps2MemSize::Base / 4) * sizeof(BASEBLOCK);
recROM = (BASEBLOCK*)curpos; curpos += (Ps2MemSize::Rom / 4) * sizeof(BASEBLOCK);
recROM1 = (BASEBLOCK*)curpos; curpos += (Ps2MemSize::Rom1 / 4) * sizeof(BASEBLOCK);
recStack = (u8*)curpos; curpos += RECSTACK_SIZE;
recConstBuf = (u32*)curpos; curpos += RECCONSTBUF_SIZE * sizeof(u32);
recRAMCopy = (u32*)curpos;
if( s_pInstCache == NULL )
@ -439,6 +460,7 @@ void recResetEE( void )
memset_8<0xcc, REC_CACHEMEM>(recMem); // 0xcc is INT3
memzero_ptr<m_recBlockAllocSize>( m_recBlockAlloc );
memzero_ptr<RECCONSTBUF_SIZE * sizeof(u32)>(recConstBuf);
memzero_obj( manual_page );
memzero_obj( manual_counter );
ClearRecLUT((BASEBLOCK*)m_recBlockAlloc,
@ -490,7 +512,7 @@ void recResetEE( void )
x86SetPtr(recMem);
recPtr = recMem;
recStackPtr = recStack;
recConstBufPtr = recConstBuf;
x86FpuState = FPU_STATE;
branch = 0;
@ -505,7 +527,7 @@ static void recShutdown( void )
SafeSysMunmap( recMem, REC_CACHEMEM );
safe_aligned_free( m_recBlockAlloc );
recRAM = recROM = recROM1 = NULL;
recStack = NULL;
recConstBuf = NULL;
recRAMCopy = NULL;
safe_free( s_pInstCache );
@ -1247,7 +1269,7 @@ void recRecompile( const u32 startpc )
if ( ( (uptr)recPtr - (uptr)recMem ) >= REC_CACHEMEM-0x40000 || dumplog == 0xffffffff) {
recResetEE();
}
if ( ( (uptr)recStackPtr - (uptr)recStack ) >= RECSTACK_SIZE-0x100 ) {
if ( (recConstBufPtr - recConstBuf) >= RECCONSTBUF_SIZE - 64 ) {
DevCon::WriteLn("EE recompiler stack reset");
recResetEE();
}
@ -1646,7 +1668,7 @@ StartRecomp:
}
assert( x86Ptr < recMem+REC_CACHEMEM );
assert( recStackPtr < recStack+RECSTACK_SIZE );
assert( recConstBufPtr < recConstBuf + RECCONSTBUF_SIZE );
assert( x86FpuState == 0 );
assert(x86Ptr - recPtr < 0x10000);

View File

@ -1306,10 +1306,6 @@ void recSLTs_consts(int info, int sign)
PSRLQItoR(EEREC_D, 63);
}
else {
u32* ptempmem = recAllocStackMem(8,4);
ptempmem[0] = g_cpuConstRegs[_Rs_].UL[0]^0x80000000;
ptempmem[1] = 0;
if( EEREC_D != EEREC_T ) {
MOVDMtoMMX(EEREC_D, (u32)&s_sltconst);
PXORRtoR(EEREC_D, EEREC_T);
@ -1318,7 +1314,7 @@ void recSLTs_consts(int info, int sign)
PXORMtoR(EEREC_D, (u32)&s_sltconst);
}
PCMPGTDMtoR(EEREC_D, (u32)ptempmem);
PCMPGTDMtoR(EEREC_D, (uptr)recGetImm64(0, g_cpuConstRegs[_Rs_].UL[0] ^ 0x80000000));
PUNPCKLDQRtoR(EEREC_D, EEREC_D);
PSRLQItoR(EEREC_D, 63);
@ -1439,11 +1435,7 @@ void recSLTs_constt(int info, int sign)
recSLTmemconstt(EEREC_D, EEREC_S, (u32)_eeGetConstReg(_Rt_), 1);
}
else {
u32* ptempmem = recAllocStackMem(8,4);
ptempmem[0] = g_cpuConstRegs[_Rt_].UL[0]^0x80000000;
ptempmem[1] = 0;
recSLTmemconstt(EEREC_D, EEREC_S, (u32)ptempmem, 0);
recSLTmemconstt(EEREC_D, EEREC_S, (uptr)recGetImm64(0, g_cpuConstRegs[_Rt_].UL[0] ^ 0x80000000), 0);
}
return;

View File

@ -63,13 +63,8 @@ void recADDI_(int info)
if ( info & PROCESS_EE_MMX ) {
if ( _Imm_ != 0 ) {
u32* ptempmem = recAllocStackMem(8, 8);
ptempmem[0] = (s32)_Imm_;
ptempmem[1] = 0;
if ( EEREC_T != EEREC_S ) MOVQRtoR(EEREC_T, EEREC_S);
PADDDMtoR(EEREC_T, (u32)ptempmem);
PADDDMtoR(EEREC_T, (uptr)recGetImm64(0, _Imm_));
if ( EEINST_ISLIVE1(_Rt_) ) _signExtendGPRtoMMX(EEREC_T, _Rt_, 0);
else EEINST_RESETHASLIVE1(_Rt_);
}
@ -89,12 +84,8 @@ void recADDI_(int info)
SetMMXstate();
if ( _Imm_ != 0 ) {
u32* ptempmem = recAllocStackMem(8, 8);
ptempmem[0] = (s32)_Imm_;
ptempmem[1] = 0;
MOVDMtoMMX(rtreg, (int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ]);
PADDDMtoR(rtreg, (u32)ptempmem);
PADDDMtoR(rtreg, (uptr)recGetImm64(0, _Imm_));
if ( EEINST_ISLIVE1(_Rt_) ) _signExtendGPRtoMMX(rtreg, _Rt_, 0);
else EEINST_RESETHASLIVE1(_Rt_);
@ -162,13 +153,8 @@ void recDADDI_(int info)
if( info & PROCESS_EE_MMX ) {
if( _Imm_ != 0 ) {
// flush
u32* ptempmem = recAllocStackMem(8, 8);
ptempmem[0] = _Imm_;
ptempmem[1] = _Imm_ >= 0 ? 0 : 0xffffffff;
if( EEREC_T != EEREC_S ) MOVQRtoR(EEREC_T, EEREC_S);
PADDQMtoR(EEREC_T, (u32)ptempmem);
PADDQMtoR(EEREC_T, (uptr)recGetImm64(-(_Imm_ < 0), _Imm_));
}
else {
if( EEREC_T != EEREC_S ) MOVQRtoR(EEREC_T, EEREC_S);
@ -178,15 +164,12 @@ void recDADDI_(int info)
if( (g_pCurInstInfo->regs[_Rt_]&EEINST_MMX) ) {
int rtreg;
u32* ptempmem = recAllocStackMem(8, 8);
ptempmem[0] = _Imm_;
ptempmem[1] = _Imm_ >= 0 ? 0 : 0xffffffff;
rtreg = _allocMMXreg(-1, MMX_GPR+_Rt_, MODE_WRITE);
SetMMXstate();
MOVQMtoR(rtreg, (int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ]);
PADDQMtoR(rtreg, (u32)ptempmem);
PADDQMtoR(rtreg, (uptr)recGetImm64(-(_Imm_ < 0), _Imm_));
}
else {
if( _Rt_ == _Rs_ ) {
@ -239,10 +222,7 @@ void recSLTIU_(int info)
{
if( info & PROCESS_EE_MMX ) {
if( EEINST_ISSIGNEXT(_Rs_) ) {
u32* ptempmem = recAllocStackMem(8,4);
ptempmem[0] = ((s32)(_Imm_))^0x80000000;
ptempmem[1] = 0;
recSLTmemconstt(EEREC_T, EEREC_S, (u32)ptempmem, 0);
recSLTmemconstt(EEREC_T, EEREC_S, (uptr)recGetImm64(0, ((s32)_Imm_)^0x80000000), 0);
EEINST_SETSIGNEXT(_Rt_);
return;
}
@ -292,10 +272,7 @@ void recSLTI_(int info)
if( info & PROCESS_EE_MMX) {
if( EEINST_ISSIGNEXT(_Rs_) ) {
u32* ptempmem = recAllocStackMem(8,4);
ptempmem[0] = _Imm_;
ptempmem[1] = 0;
recSLTmemconstt(EEREC_T, EEREC_S, (u32)ptempmem, 1);
recSLTmemconstt(EEREC_T, EEREC_S, (uptr)recGetImm64(0, _Imm_), 1);
EEINST_SETSIGNEXT(_Rt_);
return;
}
@ -347,12 +324,8 @@ void recLogicalOpI(int info, int op)
SetMMXstate();
if( _ImmU_ != 0 ) {
u32* ptempmem = recAllocStackMem(8, 8);
ptempmem[0] = _ImmU_;
ptempmem[1] = 0;
if( EEREC_T != EEREC_S ) MOVQRtoR(EEREC_T, EEREC_S);
LogicalOpMtoR(EEREC_T, (u32)ptempmem, op);
LogicalOpMtoR(EEREC_T, (uptr)recGetImm64(0, _ImmU_), op);
}
else {
if( op == 0 ) PXORRtoR(EEREC_T, EEREC_T);
@ -367,21 +340,15 @@ void recLogicalOpI(int info, int op)
if( op == 0 ) {
if ( _ImmU_ != 0 ) {
u32* ptempmem = recAllocStackMem(8, 8);
ptempmem[0] = _ImmU_;
ptempmem[1] = 0;
MOVDMtoMMX(rtreg, (int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ]);
PANDMtoR(rtreg, (u32)ptempmem);
PANDMtoR(rtreg, (uptr)recGetImm64(0, _ImmU_));
}
else PXORRtoR(rtreg, rtreg);
}
else {
MOVQMtoR(rtreg, (int)&cpuRegs.GPR.r[ _Rs_ ].UL[ 0 ]);
if ( _ImmU_ != 0 ) {
u32* ptempmem = recAllocStackMem(8, 8);
ptempmem[0] = _ImmU_;
ptempmem[1] = 0;
LogicalOpMtoR(rtreg, (u32)ptempmem, op);
LogicalOpMtoR(rtreg, (uptr)recGetImm64(0, _ImmU_), op);
}
}
}

View File

@ -442,15 +442,12 @@ void recMOVZtemp_consts(int info)
CMP8ItoR(EAX, 0xff);
j8Ptr[ 0 ] = JNE8( 0 );
if( g_cpuFlushedConstReg & (1<<_Rs_) ) mem = &cpuRegs.GPR.r[_Rs_].UL[0];
else {
mem = recAllocStackMem(8,8);
if( g_cpuFlushedConstReg & (1<<_Rs_) )
mem = &cpuRegs.GPR.r[_Rs_].UL[0];
else
mem = _eeGetConstReg(_Rs_);
mem[0] = g_cpuConstRegs[_Rs_].UL[0];
mem[1] = g_cpuConstRegs[_Rs_].UL[1];
}
MOVQMtoR(EEREC_D, (u32)mem);
MOVQMtoR(EEREC_D, (uptr)mem);
x86SetJ8( j8Ptr[ 0 ] );
_freeMMXreg(t0reg);
@ -566,15 +563,12 @@ void recMOVNtemp_consts(int info)
CMP8ItoR(EAX, 0xff);
j8Ptr[ 0 ] = JE8( 0 );
if( g_cpuFlushedConstReg & (1<<_Rs_) ) mem = &cpuRegs.GPR.r[_Rs_].UL[0];
else {
mem = recAllocStackMem(8,8);
if( g_cpuFlushedConstReg & (1<<_Rs_) )
mem = &cpuRegs.GPR.r[_Rs_].UL[0];
else
mem = _eeGetConstReg(_Rs_);
mem[0] = g_cpuConstRegs[_Rs_].UL[0];
mem[1] = g_cpuConstRegs[_Rs_].UL[1];
}
MOVQMtoR(EEREC_D, (u32)mem);
MOVQMtoR(EEREC_D, (uptr)mem);
x86SetJ8( j8Ptr[ 0 ] );
_freeMMXreg(t0reg);

View File

@ -266,20 +266,15 @@ void recWritebackConstHILO(u64 res, int writed, int upper)
if( g_pCurInstInfo->regs[XMMGPR_LO] & testlive ) {
if( !upper && (reglo = _allocCheckGPRtoMMX(g_pCurInstInfo, XMMGPR_LO, MODE_WRITE)) >= 0 ) {
u32* ptr = recAllocStackMem(8, 8);
ptr[0] = res & 0xffffffff;
ptr[1] = (res&0x80000000)?0xffffffff:0;
MOVQMtoR(reglo, (u32)ptr);
MOVQMtoR(reglo, (uptr)recGetImm64(res & 0x80000000 ? -1 : 0, (u32)res));
}
else {
reglo = _allocCheckGPRtoXMM(g_pCurInstInfo, XMMGPR_LO, MODE_WRITE|MODE_READ);
if( reglo >= 0 ) {
u32* ptr = recAllocStackMem(8, 8);
ptr[0] = res & 0xffffffff;
ptr[1] = (res&0x80000000)?0xffffffff:0;
if( upper ) SSE_MOVHPS_M64_to_XMM(reglo, (u32)ptr);
else SSE_MOVLPS_M64_to_XMM(reglo, (u32)ptr);
u32* ptr = recGetImm64(res & 0x80000000 ? -1 : 0, (u32)res);
if( upper ) SSE_MOVHPS_M64_to_XMM(reglo, (uptr)ptr);
else SSE_MOVLPS_M64_to_XMM(reglo, (uptr)ptr);
}
else {
MOV32ItoM(loaddr, res & 0xffffffff);
@ -291,18 +286,13 @@ void recWritebackConstHILO(u64 res, int writed, int upper)
if( g_pCurInstInfo->regs[XMMGPR_HI] & testlive ) {
if( !upper && (reghi = _allocCheckGPRtoMMX(g_pCurInstInfo, XMMGPR_HI, MODE_WRITE)) >= 0 ) {
u32* ptr = recAllocStackMem(8, 8);
ptr[0] = res >> 32;
ptr[1] = (res>>63)?0xffffffff:0;
MOVQMtoR(reghi, (u32)ptr);
MOVQMtoR(reghi, (uptr)recGetImm64(res >> 63 ? -1 : 0, res >> 32));
}
else {
reghi = _allocCheckGPRtoXMM(g_pCurInstInfo, XMMGPR_HI, MODE_WRITE|MODE_READ);
if( reghi >= 0 ) {
u32* ptr = recAllocStackMem(8, 8);
ptr[0] = res >> 32;
ptr[1] = (res>>63)?0xffffffff:0;
u32* ptr = recGetImm64(res >> 63 ? -1 : 0, res >> 32);
if( upper ) SSE_MOVHPS_M64_to_XMM(reghi, (u32)ptr);
else SSE_MOVLPS_M64_to_XMM(reghi, (u32)ptr);
}