Merge pull request #490 from unknownbrackets/jit-nice-delays

Nice delay slots, ahoy
This commit is contained in:
Henrik Rydgård 2013-01-25 00:57:13 -08:00
commit d062e3cc83
5 changed files with 134 additions and 52 deletions

View File

@ -61,6 +61,7 @@ namespace MIPSAnalyst
return false; //TODO: there are more cases!
}
// TODO: Remove me?
bool IsDelaySlotNice(u32 branch, u32 delayslot)
{
int outReg = GetOutReg(delayslot);
@ -85,6 +86,77 @@ namespace MIPSAnalyst
}
}
// Temporary, returns true for common ops which have proper flags in the table.
bool IsDelaySlotInfoSafe(u32 op)
{
const char *safeOps[] = {
"addi", "addiu", "slti", "sltiu", "andi", "ori", "xori", "lui",
"lb", "lh", "lwl", "lw", "lbu", "lhu", "lwr",
"sb", "sh", "swl", "sw", "swr",
"sll", "srl", "sra", "sllv", "srlv", "srav",
"add", "addu", "sub", "subu", "and", "or", "xor", "nor",
"slt", "sltu",
};
const char *opName = MIPSGetName(op);
for (int i = 0; i < ARRAY_SIZE(safeOps); ++i)
{
if (!strcmp(safeOps[i], opName))
return true;
}
return false;
}
bool IsDelaySlotNiceReg(u32 branchOp, u32 op, int reg1, int reg2)
{
// NOOPs are always nice.
if (op == 0)
return true;
// TODO: Once the flags are all correct on the tables, remove this safety.
if (IsDelaySlotInfoSafe(op))
{
// $0 is never an out reg, it's always 0.
if (reg1 != 0 && GetOutReg(op) == reg1)
return false;
if (reg2 != 0 && GetOutReg(op) == reg2)
return false;
return true;
}
return false;
}
bool IsDelaySlotNiceVFPU(u32 branchOp, u32 op)
{
// NOOPs are always nice.
if (op == 0)
return true;
// TODO: Once the flags are all correct on the tables, remove this safety.
if (IsDelaySlotInfoSafe(op))
{
// TODO: There may be IS_VFPU cases which are safe...
return (MIPSGetInfo(op) & IS_VFPU) == 0;
}
return false;
}
bool IsDelaySlotNiceFPU(u32 branchOp, u32 op)
{
// NOOPs are always nice.
if (op == 0)
return true;
// TODO: Once the flags are all correct on the tables, remove this safety.
if (IsDelaySlotInfoSafe(op))
return (MIPSGetInfo(op) & OUT_FPUFLAG) == 0;
return false;
}
bool IsSyscall(u32 op)
{
// Syscalls look like this: 0000 00-- ---- ---- ---- --00 1100

View File

@ -57,6 +57,9 @@ namespace MIPSAnalyst
int GetOutReg(u32 op);
bool ReadsFromReg(u32 op, u32 reg);
bool IsDelaySlotNice(u32 branch, u32 delayslot);
bool IsDelaySlotNiceReg(u32 branchOp, u32 op, int reg1, int reg2 = 0);
bool IsDelaySlotNiceVFPU(u32 branchOp, u32 op);
bool IsDelaySlotNiceFPU(u32 branchOp, u32 op);
bool IsSyscall(u32 op);

View File

@ -47,6 +47,10 @@ using namespace MIPSAnalyst;
// #define DO_CONDITIONAL_LOG 1
#define DO_CONDITIONAL_LOG 0
// We can also disable nice delay slots.
// #define CONDITIONAL_NICE_DELAYSLOT delaySlotIsNice = false;
#define CONDITIONAL_NICE_DELAYSLOT ;
#if DO_CONDITIONAL_LOG
#define CONDITIONAL_LOG BranchLog(op);
#define CONDITIONAL_LOG_EXIT(addr) BranchLogExit(op, addr, false);
@ -133,14 +137,10 @@ void Jit::BranchRSRTComp(u32 op, Gen::CCFlags cc, bool likely)
u32 targetAddr = js.compilerPC + offset + 4;
u32 delaySlotOp = Memory::ReadUnchecked_U32(js.compilerPC+4);
//Compile the delay slot
bool delaySlotIsNice = GetOutReg(delaySlotOp) != rt && GetOutReg(delaySlotOp) != rs;// IsDelaySlotNice(op, delaySlotOp);
if (!delaySlotIsNice)
{
//ERROR_LOG(CPU, "Not nice delay slot in BranchRSRTComp :( %08x", js.compilerPC);
}
delaySlotIsNice = false; // Until we have time to fully fix this
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rt, rs);
CONDITIONAL_NICE_DELAYSLOT;
if (!likely && delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_NICE);
if (rt == 0)
{
@ -157,13 +157,14 @@ void Jit::BranchRSRTComp(u32 op, Gen::CCFlags cc, bool likely)
Gen::FixupBranch ptr;
if (!likely)
{
CompileDelaySlot(!delaySlotIsNice);
if (!delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
ptr = J_CC(cc, true);
}
else
{
ptr = J_CC(cc, true);
CompileDelaySlot(false);
CompileDelaySlot(DELAYSLOT_FLUSH);
}
// Take the branch
@ -190,14 +191,11 @@ void Jit::BranchRSZeroComp(u32 op, Gen::CCFlags cc, bool andLink, bool likely)
u32 targetAddr = js.compilerPC + offset + 4;
u32 delaySlotOp = Memory::ReadUnchecked_U32(js.compilerPC + 4);
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs);
CONDITIONAL_NICE_DELAYSLOT;
if (!likely && delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_NICE);
bool delaySlotIsNice = GetOutReg(delaySlotOp) != rs; //IsDelaySlotNice(op, delaySlotOp);
if (!delaySlotIsNice)
{
//ERROR_LOG(CPU, "Not nice delay slot in BranchRSZeroComp :( %08x", js.compilerPC);
}
delaySlotIsNice = false; // Until we have time to fully fix this
gpr.BindToRegister(rs, true, false);
CMP(32, gpr.R(rs), Imm32(0));
FlushAll();
@ -205,13 +203,14 @@ void Jit::BranchRSZeroComp(u32 op, Gen::CCFlags cc, bool andLink, bool likely)
Gen::FixupBranch ptr;
if (!likely)
{
CompileDelaySlot(!delaySlotIsNice);
if (!delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
ptr = J_CC(cc, true);
}
else
{
ptr = J_CC(cc, true);
CompileDelaySlot(false);
CompileDelaySlot(DELAYSLOT_FLUSH);
}
// Take the branch
@ -284,14 +283,10 @@ void Jit::BranchFPFlag(u32 op, Gen::CCFlags cc, bool likely)
u32 targetAddr = js.compilerPC + offset + 4;
u32 delaySlotOp = Memory::ReadUnchecked_U32(js.compilerPC + 4);
bool delaySlotIsNice = IsDelaySlotNice(op, delaySlotOp);
if (!delaySlotIsNice)
{
//ERROR_LOG(CPU, "Not nice delay slot in BranchFPFlag :(");
}
delaySlotIsNice = false; // Until we have time to fully fix this
bool delaySlotIsNice = IsDelaySlotNiceFPU(op, delaySlotOp);
CONDITIONAL_NICE_DELAYSLOT;
if (!likely && delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_NICE);
FlushAll();
@ -299,13 +294,14 @@ void Jit::BranchFPFlag(u32 op, Gen::CCFlags cc, bool likely)
Gen::FixupBranch ptr;
if (!likely)
{
CompileDelaySlot(!delaySlotIsNice);
if (!delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
ptr = J_CC(cc, true);
}
else
{
ptr = J_CC(cc, true);
CompileDelaySlot(false);
CompileDelaySlot(DELAYSLOT_FLUSH);
}
// Take the branch
@ -348,14 +344,10 @@ void Jit::BranchVFPUFlag(u32 op, Gen::CCFlags cc, bool likely)
u32 targetAddr = js.compilerPC + offset + 4;
u32 delaySlotOp = Memory::ReadUnchecked_U32(js.compilerPC + 4);
bool delaySlotIsNice = IsDelaySlotNice(op, delaySlotOp);
if (!delaySlotIsNice)
{
//ERROR_LOG(CPU, "Not nice delay slot in BranchVFPUFlag :(");
}
delaySlotIsNice = false; // Until we have time to fully fix this
bool delaySlotIsNice = IsDelaySlotNiceVFPU(op, delaySlotOp);
CONDITIONAL_NICE_DELAYSLOT;
if (!likely && delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_NICE);
FlushAll();
@ -367,13 +359,14 @@ void Jit::BranchVFPUFlag(u32 op, Gen::CCFlags cc, bool likely)
Gen::FixupBranch ptr;
if (!likely)
{
CompileDelaySlot(!delaySlotIsNice);
if (!delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
ptr = J_CC(cc, true);
}
else
{
ptr = J_CC(cc, true);
CompileDelaySlot(false);
CompileDelaySlot(DELAYSLOT_FLUSH);
}
// Take the branch
@ -413,7 +406,8 @@ void Jit::Comp_Jump(u32 op)
}
u32 off = ((op & 0x3FFFFFF) << 2);
u32 targetAddr = (js.compilerPC & 0xF0000000) | off;
CompileDelaySlot(false);
CompileDelaySlot(DELAYSLOT_NICE);
FlushAll();
switch (op >> 26)
{
@ -447,16 +441,15 @@ void Jit::Comp_JumpReg(u32 op)
int rs = _RS;
u32 delaySlotOp = Memory::ReadUnchecked_U32(js.compilerPC + 4);
bool delaySlotIsNice = GetOutReg(delaySlotOp) != rs;
// Do what with that information?
delaySlotIsNice = false; // Until we have time to fully fix this
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs);
CONDITIONAL_NICE_DELAYSLOT;
if (IsSyscall(delaySlotOp))
{
// If this is a syscall, write the pc (for thread switching and other good reasons.)
gpr.BindToRegister(rs, true, false);
MOV(32, M(&currentMIPS->pc), gpr.R(rs));
CompileDelaySlot(false);
CompileDelaySlot(DELAYSLOT_FLUSH);
// Syscalls write the exit code for us.
_dbg_assert_msg_(JIT, !js.compiling, "Expected syscall to write an exit code.");
@ -464,8 +457,7 @@ void Jit::Comp_JumpReg(u32 op)
}
else if (delaySlotIsNice)
{
// TODO: This flushes which is a waste, could add an extra param to skip.
CompileDelaySlot(false);
CompileDelaySlot(DELAYSLOT_NICE);
MOV(32, R(EAX), gpr.R(rs));
FlushAll();
}
@ -474,8 +466,9 @@ void Jit::Comp_JumpReg(u32 op)
// Latch destination now - save it in memory.
gpr.BindToRegister(rs, true, false);
MOV(32, M(&savedPC), gpr.R(rs));
CompileDelaySlot(false);
CompileDelaySlot(DELAYSLOT_NICE);
MOV(32, R(EAX), M(&savedPC));
FlushAll();
}
switch (op & 0x3f)

View File

@ -128,7 +128,7 @@ void Jit::ClearCacheAt(u32 em_address)
ClearCache();
}
void Jit::CompileDelaySlot(bool saveFlags)
void Jit::CompileDelaySlot(int flags)
{
const u32 addr = js.compilerPC + 4;
@ -136,7 +136,7 @@ void Jit::CompileDelaySlot(bool saveFlags)
// Need to offset the downcount which was already incremented for the branch + delay slot.
CheckJitBreakpoint(addr, -2);
if (saveFlags)
if (flags & DELAYSLOT_SAFE)
SAVE_FLAGS; // preserve flag around the delay slot!
js.inDelaySlot = true;
@ -144,8 +144,9 @@ void Jit::CompileDelaySlot(bool saveFlags)
MIPSCompileOp(op);
js.inDelaySlot = false;
FlushAll();
if (saveFlags)
if (flags & DELAYSLOT_FLUSH)
FlushAll();
if (flags & DELAYSLOT_SAFE)
LOAD_FLAGS; // restore flag!
}

View File

@ -56,6 +56,18 @@ struct JitState
JitBlock *curBlock;
};
enum CompileDelaySlotFlags
{
// Easy, nothing extra.
DELAYSLOT_NICE = 0,
// Flush registers after delay slot.
DELAYSLOT_FLUSH = 1,
// Preserve flags.
DELAYSLOT_SAFE = 2,
// Flush registers after and preserve flags.
DELAYSLOT_SAFE_FLUSH = DELAYSLOT_FLUSH | DELAYSLOT_SAFE,
};
class Jit : public Gen::XCodeBlock
{
public:
@ -71,7 +83,8 @@ public:
void Compile(u32 em_address); // Compiles a block at current MIPS PC
const u8 *DoJit(u32 em_address, JitBlock *b);
void CompileDelaySlot(bool saveFlags = false);
// See CompileDelaySlotFlags for flags.
void CompileDelaySlot(int flags);
void CompileAt(u32 addr);
void Comp_RunBlock(u32 op);