Merge pull request #468 from unknownbrackets/jit-match

Match the interpreter better in jit
This commit is contained in:
Henrik Rydgård 2013-01-22 00:43:59 -08:00
commit c2b7fe29df
7 changed files with 70 additions and 55 deletions

View File

@ -73,6 +73,12 @@ static inline void DelayBranchTo(u32 where)
mipsr4k.inDelaySlot = true;
}
static inline void SkipLikely()
{
PC += 8;
--mipsr4k.downcount;
}
int MIPS_SingleStep()
{
#if defined(ARM)
@ -172,15 +178,15 @@ namespace MIPSInt
switch (op >> 26)
{
case 4: if (R(rt) == R(rs)) DelayBranchTo(addr); else PC += 4; break; //beq
case 5: if (R(rt) != R(rs)) DelayBranchTo(addr); else PC += 4; break; //bne
case 6: if ((s32)R(rs) <= 0) DelayBranchTo(addr); else PC += 4; break; //blez
case 7: if ((s32)R(rs) > 0) DelayBranchTo(addr); else PC += 4; break; //bgtz
case 4: if (R(rt) == R(rs)) DelayBranchTo(addr); else PC += 4; break; //beq
case 5: if (R(rt) != R(rs)) DelayBranchTo(addr); else PC += 4; break; //bne
case 6: if ((s32)R(rs) <= 0) DelayBranchTo(addr); else PC += 4; break; //blez
case 7: if ((s32)R(rs) > 0) DelayBranchTo(addr); else PC += 4; break; //bgtz
case 20: if (R(rt) == R(rs)) DelayBranchTo(addr); else PC += 8; break; //beql
case 21: if (R(rt) != R(rs)) DelayBranchTo(addr); else PC += 8; break; //bnel
case 22: if ((s32)R(rs) <= 0) DelayBranchTo(addr); else PC += 8; break; //blezl
case 23: if ((s32)R(rs) > 0) DelayBranchTo(addr); else PC += 8; break; //bgtzl
case 20: if (R(rt) == R(rs)) DelayBranchTo(addr); else SkipLikely(); break; //beql
case 21: if (R(rt) != R(rs)) DelayBranchTo(addr); else SkipLikely(); break; //bnel
case 22: if ((s32)R(rs) <= 0) DelayBranchTo(addr); else SkipLikely(); break; //blezl
case 23: if ((s32)R(rs) > 0) DelayBranchTo(addr); else SkipLikely(); break; //bgtzl
default:
_dbg_assert_msg_(CPU,0,"Trying to interpret instruction that can't be interpreted");
@ -198,12 +204,12 @@ namespace MIPSInt
{
case 0: if ((s32)R(rs) < 0) DelayBranchTo(addr); else PC += 4; break;//bltz
case 1: if ((s32)R(rs) >= 0) DelayBranchTo(addr); else PC += 4; break;//bgez
case 2: if ((s32)R(rs) < 0) DelayBranchTo(addr); else PC += 8; break;//bltzl
case 3: if ((s32)R(rs) >= 0) DelayBranchTo(addr); else PC += 8; break;//bgezl
case 2: if ((s32)R(rs) < 0) DelayBranchTo(addr); else SkipLikely(); break;//bltzl
case 3: if ((s32)R(rs) >= 0) DelayBranchTo(addr); else SkipLikely(); break;//bgezl
case 16: R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) < 0) DelayBranchTo(addr); else PC += 4; break;//bltz
case 17: R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) >= 0) DelayBranchTo(addr); else PC += 4; break;//bgez
case 18: R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) < 0) DelayBranchTo(addr); else PC += 8; break;//bltzl
case 19: R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) >= 0) DelayBranchTo(addr); else PC += 8; break;//bgezl
case 18: R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) < 0) DelayBranchTo(addr); else SkipLikely(); break;//bltzl
case 19: R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) >= 0) DelayBranchTo(addr); else SkipLikely(); break;//bgezl
default:
_dbg_assert_msg_(CPU,0,"Trying to interpret instruction that can't be interpreted");
break;
@ -223,8 +229,8 @@ namespace MIPSInt
{
case 0: if (!val) DelayBranchTo(addr); else PC += 4; break; //bvf
case 1: if ( val) DelayBranchTo(addr); else PC += 4; break; //bvt
case 2: if (!val) DelayBranchTo(addr); else PC += 8; break; //bvfl
case 3: if ( val) DelayBranchTo(addr); else PC += 8; break; //bvtl
case 2: if (!val) DelayBranchTo(addr); else SkipLikely(); break; //bvfl
case 3: if ( val) DelayBranchTo(addr); else SkipLikely(); break; //bvtl
}
}
@ -234,10 +240,10 @@ namespace MIPSInt
u32 addr = PC + imm + 4;
switch((op>>16)&0x1f)
{
case 0: if (!currentMIPS->fpcond) DelayBranchTo(addr); else PC += 4; break;//bc1f
case 0: if (!currentMIPS->fpcond) DelayBranchTo(addr); else PC += 4; break;//bc1f
case 1: if ( currentMIPS->fpcond) DelayBranchTo(addr); else PC += 4; break;//bc1t
case 2: if (!currentMIPS->fpcond) DelayBranchTo(addr); else PC += 8; break;//bc1fl
case 3: if ( currentMIPS->fpcond) DelayBranchTo(addr); else PC += 8; break;//bc1tl
case 2: if (!currentMIPS->fpcond) DelayBranchTo(addr); else SkipLikely(); break;//bc1fl
case 3: if ( currentMIPS->fpcond) DelayBranchTo(addr); else SkipLikely(); break;//bc1tl
default:
_dbg_assert_msg_(CPU,0,"Trying to interpret instruction that can't be interpreted");
break;

View File

@ -947,6 +947,8 @@ int MIPSInterpret_RunUntil(u64 globalTicks)
MIPSState *curMips = currentMIPS;
while (coreState == CORE_RUNNING)
{
CoreTiming::Advance();
// NEVER stop in a delay slot!
while (curMips->downcount >= 0 && coreState == CORE_RUNNING)
{
@ -1002,8 +1004,6 @@ int MIPSInterpret_RunUntil(u64 globalTicks)
return 1;
}
}
CoreTiming::Advance();
}
return 1;
@ -1026,6 +1026,8 @@ int MIPSInterpret_RunFastUntil(u64 globalTicks)
MIPSState *curMips = currentMIPS;
while (coreState == CORE_RUNNING)
{
CoreTiming::Advance();
while (curMips->downcount >= 0 && coreState == CORE_RUNNING) // TODO: Try to get rid of the latter check
{
again:
@ -1127,8 +1129,6 @@ int MIPSInterpret_RunFastUntil(u64 globalTicks)
goto again;
}
}
CoreTiming::Advance();
}
return 1;
}

View File

@ -99,8 +99,7 @@ void AsmRoutineManager::Generate(MIPSState *mips, MIPSComp::Jit *jit)
#ifdef _M_IX86
AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
_assert_msg_(CPU, Memory::base != 0, "Memory base bogus");
MOV(32, R(EDX), Imm32((u32)Memory::base));
MOV(32, R(EAX), MComplex(EDX, EAX, SCALE_1, 0));
MOV(32, R(EAX), MDisp(EAX, (u32)Memory::base));
#elif _M_X64
MOV(32, R(EAX), MComplex(RBX, RAX, SCALE_1, 0));
#endif

View File

@ -154,7 +154,6 @@ void Jit::BranchRSRTComp(u32 op, Gen::CCFlags cc, bool likely)
}
FlushAll();
js.inDelaySlot = true;
Gen::FixupBranch ptr;
if (!likely)
{
@ -166,7 +165,6 @@ void Jit::BranchRSRTComp(u32 op, Gen::CCFlags cc, bool likely)
ptr = J_CC(cc, true);
CompileDelaySlot(false);
}
js.inDelaySlot = false;
// Take the branch
CONDITIONAL_LOG_EXIT(targetAddr);
@ -205,7 +203,6 @@ void Jit::BranchRSZeroComp(u32 op, Gen::CCFlags cc, bool likely)
FlushAll();
Gen::FixupBranch ptr;
js.inDelaySlot = true;
if (!likely)
{
CompileDelaySlot(!delaySlotIsNice);
@ -216,7 +213,6 @@ void Jit::BranchRSZeroComp(u32 op, Gen::CCFlags cc, bool likely)
ptr = J_CC(cc, true);
CompileDelaySlot(false);
}
js.inDelaySlot = false;
// Take the branch
CONDITIONAL_LOG_EXIT(targetAddr);
@ -295,7 +291,6 @@ void Jit::BranchFPFlag(u32 op, Gen::CCFlags cc, bool likely)
TEST(32, M((void *)&(mips_->fpcond)), Imm32(1));
Gen::FixupBranch ptr;
js.inDelaySlot = true;
if (!likely)
{
CompileDelaySlot(!delaySlotIsNice);
@ -306,7 +301,6 @@ void Jit::BranchFPFlag(u32 op, Gen::CCFlags cc, bool likely)
ptr = J_CC(cc, true);
CompileDelaySlot(false);
}
js.inDelaySlot = false;
// Take the branch
CONDITIONAL_LOG_EXIT(targetAddr);
@ -365,7 +359,6 @@ void Jit::BranchVFPUFlag(u32 op, Gen::CCFlags cc, bool likely)
//int val = (mips_->vfpuCtrl[VFPU_CTRL_CC] >> imm3) & 1;
TEST(32, M((void *)&(mips_->vfpuCtrl[VFPU_CTRL_CC])), Imm32(1 << imm3));
Gen::FixupBranch ptr;
js.inDelaySlot = true;
if (!likely)
{
CompileDelaySlot(!delaySlotIsNice);
@ -376,7 +369,6 @@ void Jit::BranchVFPUFlag(u32 op, Gen::CCFlags cc, bool likely)
ptr = J_CC(cc, true);
CompileDelaySlot(false);
}
js.inDelaySlot = false;
// Take the branch
CONDITIONAL_LOG_EXIT(targetAddr);
@ -455,7 +447,8 @@ void Jit::Comp_JumpReg(u32 op)
if (delaySlotIsNice)
{
CompileAt(js.compilerPC + 4);
// TODO: This flushes which is a waste, could add an extra param to skip.
CompileDelaySlot(false);
MOV(32, R(EAX), gpr.R(rs));
FlushAll();
}
@ -497,6 +490,11 @@ void Jit::Comp_Syscall(u32 op)
{
FlushAll();
// If we're in a delay slot, this is off by one.
const int offset = js.inDelaySlot ? -1 : 0;
WriteDowncount(offset);
js.downcountAmount = -offset;
ABI_CallFunctionC((void *)&CallSyscall, op);
WriteSyscallExit();

View File

@ -110,6 +110,12 @@ void Jit::FlushAll()
fpr.Flush(FLUSH_ALL);
}
void Jit::WriteDowncount(int offset)
{
const int downcount = js.downcountAmount + offset;
SUB(32, M(&currentMIPS->downcount), downcount > 127 ? Imm32(downcount) : Imm8(downcount));
}
void Jit::ClearCache()
{
blocks.Clear();
@ -127,13 +133,16 @@ void Jit::CompileDelaySlot(bool saveFlags)
const u32 addr = js.compilerPC + 4;
// TODO: If we ever support conditional breakpoints, we need to handle the flags more carefully.
CheckJitBreakpoint(addr);
// Need to offset the downcount which was already incremented for the branch + delay slot.
CheckJitBreakpoint(addr, -2);
if (saveFlags)
SAVE_FLAGS; // preserve flag around the delay slot!
js.inDelaySlot = true;
u32 op = Memory::Read_Instruction(addr);
MIPSCompileOp(op);
js.inDelaySlot = false;
FlushAll();
if (saveFlags)
@ -142,7 +151,7 @@ void Jit::CompileDelaySlot(bool saveFlags)
void Jit::CompileAt(u32 addr)
{
CheckJitBreakpoint(addr);
CheckJitBreakpoint(addr, 0);
u32 op = Memory::Read_Instruction(addr);
MIPSCompileOp(op);
}
@ -194,12 +203,12 @@ const u8 *Jit::DoJit(u32 em_address, JitBlock *b)
int numInstructions = 0;
while (js.compiling)
{
// Jit breakpoints are quite fast, so let's do them in release too.
CheckJitBreakpoint(js.compilerPC, 0);
u32 inst = Memory::Read_Instruction(js.compilerPC);
js.downcountAmount += MIPSGetInstructionCycleEstimate(inst);
// Jit breakpoints are quite fast, so let's do them in release too.
CheckJitBreakpoint(js.compilerPC);
MIPSCompileOp(inst);
js.compilerPC += 4;
@ -237,7 +246,7 @@ void Jit::Comp_Generic(u32 op)
void Jit::WriteExit(u32 destination, int exit_num)
{
SUB(32, M(&currentMIPS->downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
WriteDowncount();
//If nobody has taken care of this yet (this can be removed when all branches are done)
JitBlock *b = js.curBlock;
@ -259,26 +268,28 @@ void Jit::WriteExit(u32 destination, int exit_num)
void Jit::WriteExitDestInEAX()
{
// TODO: Some wasted potential, dispatcher will alwa
// TODO: Some wasted potential, dispatcher will always read this back into EAX.
MOV(32, M(&mips_->pc), R(EAX));
SUB(32, M(&currentMIPS->downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
WriteDowncount();
JMP(asm_.dispatcher, true);
}
void Jit::WriteSyscallExit()
{
SUB(32, M(&currentMIPS->downcount), js.downcountAmount > 127 ? Imm32(js.downcountAmount) : Imm8(js.downcountAmount));
WriteDowncount();
JMP(asm_.dispatcherCheckCoreState, true);
}
bool Jit::CheckJitBreakpoint(u32 addr)
bool Jit::CheckJitBreakpoint(u32 addr, int downcountOffset)
{
if (CBreakPoints::IsAddressBreakPoint(addr))
{
FlushAll();
MOV(32, M(&mips_->pc), Imm32(js.compilerPC));
CALL((void *)&JitBreakpoint);
WriteSyscallExit();
WriteDowncount(downcountOffset);
JMP(asm_.dispatcherCheckCoreState, true);
return true;
}

View File

@ -103,12 +103,13 @@ public:
void ClearCacheAt(u32 em_address);
private:
void FlushAll();
void WriteDowncount(int offset = 0);
void WriteExit(u32 destination, int exit_num);
void WriteExitDestInEAX();
// void WriteRfiExitDestInEAX();
void WriteSyscallExit();
bool CheckJitBreakpoint(u32 addr);
bool CheckJitBreakpoint(u32 addr, int downcountOffset);
// Utility compilation functions
void BranchFPFlag(u32 op, Gen::CCFlags cc, bool likely);

View File

@ -15,6 +15,7 @@
#include "../../Core/Core.h"
#include "../../Core/CPU.h"
#include "../../Core/HLE/HLE.h"
#include "../../Core/CoreTiming.h"
#include "base/stringutil.h"
@ -183,7 +184,7 @@ BOOL CDisasm::DlgProc(UINT message, WPARAM wParam, LPARAM lParam)
Sleep(1);
_dbg_update_();
ptr->gotoPC();
reglist->redraw();
UpdateDialog();
vfpudlg->Update();
}
break;
@ -197,7 +198,7 @@ BOOL CDisasm::DlgProc(UINT message, WPARAM wParam, LPARAM lParam)
MainWindow::UpdateMenus();
Sleep(1);
ptr->gotoPC();
reglist->redraw();
UpdateDialog();
}
break;
@ -217,10 +218,9 @@ BOOL CDisasm::DlgProc(UINT message, WPARAM wParam, LPARAM lParam)
Core_EnableStepping(true);
_dbg_update_();
MainWindow::UpdateMenus();
UpdateDialog();
Sleep(1); //let cpu catch up
ptr->gotoPC();
reglist->redraw();
UpdateDialog();
vfpudlg->Update();
}
break;
@ -359,6 +359,7 @@ void CDisasm::SetDebugMode(bool _bDebug)
ptr->gotoPC();
// update the callstack
//CDisam::blah blah
UpdateDialog();
}
else
{
@ -368,9 +369,9 @@ void CDisasm::SetDebugMode(bool _bDebug)
EnableWindow( GetDlgItem(hDlg, IDC_STEPHLE), FALSE);
EnableWindow( GetDlgItem(hDlg, IDC_STOP), TRUE);
EnableWindow( GetDlgItem(hDlg, IDC_SKIP), FALSE);
CtrlRegisterList *reglist = CtrlRegisterList::getFrom(GetDlgItem(m_hDlg,IDC_REGLIST));
reglist->redraw();
}
CtrlRegisterList *reglist = CtrlRegisterList::getFrom(GetDlgItem(m_hDlg,IDC_REGLIST));
reglist->redraw();
}
void CDisasm::NotifyMapLoaded()
@ -408,11 +409,10 @@ void CDisasm::UpdateDialog(bool _bComplete)
CtrlRegisterList *rl = CtrlRegisterList::getFrom(GetDlgItem(m_hDlg,IDC_REGLIST));
rl->redraw();
// Update Debug Counter
/*
char szBuffer[32];
sprintf(szBuffer, "%04X%08X", PowerPC::ppcState.TU,PowerPC::ppcState.TL);
SetDlgItemText(m_hDlg, IDC_DEBUG_COUNT, szBuffer);
*/
char tempTicks[24];
sprintf(tempTicks, "%lld", CoreTiming::GetTicks());
SetDlgItemText(m_hDlg, IDC_DEBUG_COUNT, tempTicks);
// Update Register Dialog
for (int i=0; i<numCPUs; i++)
if (memoryWindow[i])