mirror of
https://github.com/libretro/ppsspp.git
synced 2025-02-21 09:21:02 +00:00
Merge pull request #4506 from unknownbrackets/jit-continuing
Further attempts at continuing jit blocks (disabled)
This commit is contained in:
commit
9afe69fa62
@ -65,6 +65,34 @@ void Jit::BranchRSRTComp(MIPSOpcode op, ArmGen::CCFlags cc, bool likely)
|
||||
MIPSGPReg rs = _RS;
|
||||
u32 targetAddr = js.compilerPC + offset + 4;
|
||||
|
||||
if (jo.immBranches && gpr.IsImm(rs) && gpr.IsImm(rt) && js.numInstructions < jo.continueMaxInstructions) {
|
||||
// The cc flags are opposites: when NOT to take the branch.
|
||||
bool skipBranch;
|
||||
s32 rsImm = (s32)gpr.GetImm(rs);
|
||||
s32 rtImm = (s32)gpr.GetImm(rt);
|
||||
|
||||
switch (cc) {
|
||||
case CC_EQ: skipBranch = rsImm == rtImm; break;
|
||||
case CC_NEQ: skipBranch = rsImm != rtImm; break;
|
||||
default: skipBranch = false; _dbg_assert_msg_(JIT, false, "Bad cc flag in BranchRSRTComp().");
|
||||
}
|
||||
|
||||
if (skipBranch) {
|
||||
// Skip the delay slot if likely, otherwise it'll be the next instruction.
|
||||
if (likely)
|
||||
js.compilerPC += 4;
|
||||
return;
|
||||
}
|
||||
|
||||
// Branch taken. Always compile the delay slot, and then go to dest.
|
||||
CompileDelaySlot(DELAYSLOT_NICE);
|
||||
// Account for the increment in the loop.
|
||||
js.compilerPC = targetAddr - 4;
|
||||
// In case the delay slot was a break or something.
|
||||
js.compiling = true;
|
||||
return;
|
||||
}
|
||||
|
||||
MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC+4);
|
||||
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rt, rs);
|
||||
CONDITIONAL_NICE_DELAYSLOT;
|
||||
@ -129,6 +157,38 @@ void Jit::BranchRSZeroComp(MIPSOpcode op, ArmGen::CCFlags cc, bool andLink, bool
|
||||
MIPSGPReg rs = _RS;
|
||||
u32 targetAddr = js.compilerPC + offset + 4;
|
||||
|
||||
if (jo.immBranches && gpr.IsImm(rs) && js.numInstructions < jo.continueMaxInstructions) {
|
||||
// The cc flags are opposites: when NOT to take the branch.
|
||||
bool skipBranch;
|
||||
s32 imm = (s32)gpr.GetImm(rs);
|
||||
|
||||
switch (cc) {
|
||||
case CC_GT: skipBranch = imm > 0; break;
|
||||
case CC_GE: skipBranch = imm >= 0; break;
|
||||
case CC_LT: skipBranch = imm < 0; break;
|
||||
case CC_LE: skipBranch = imm <= 0; break;
|
||||
default: skipBranch = false; _dbg_assert_msg_(JIT, false, "Bad cc flag in BranchRSZeroComp().");
|
||||
}
|
||||
|
||||
if (skipBranch) {
|
||||
// Skip the delay slot if likely, otherwise it'll be the next instruction.
|
||||
if (likely)
|
||||
js.compilerPC += 4;
|
||||
return;
|
||||
}
|
||||
|
||||
// Branch taken. Always compile the delay slot, and then go to dest.
|
||||
CompileDelaySlot(DELAYSLOT_NICE);
|
||||
if (andLink)
|
||||
gpr.SetImm(MIPS_REG_RA, js.compilerPC + 8);
|
||||
|
||||
// Account for the increment in the loop.
|
||||
js.compilerPC = targetAddr - 4;
|
||||
// In case the delay slot was a break or something.
|
||||
js.compiling = true;
|
||||
return;
|
||||
}
|
||||
|
||||
MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC + 4);
|
||||
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs);
|
||||
CONDITIONAL_NICE_DELAYSLOT;
|
||||
@ -346,18 +406,42 @@ void Jit::Comp_Jump(MIPSOpcode op)
|
||||
u32 off = _IMM26 << 2;
|
||||
u32 targetAddr = (js.compilerPC & 0xF0000000) | off;
|
||||
|
||||
// Might be a stubbed address or something?
|
||||
if (!Memory::IsValidAddress(targetAddr))
|
||||
{
|
||||
if (js.nextExit == 0)
|
||||
ERROR_LOG_REPORT(JIT, "Jump to invalid address: %08x", targetAddr)
|
||||
else
|
||||
js.compiling = false;
|
||||
// TODO: Mark this block dirty or something? May be indication it will be changed by imports.
|
||||
return;
|
||||
}
|
||||
|
||||
switch (op >> 26)
|
||||
{
|
||||
case 2: //j
|
||||
CompileDelaySlot(DELAYSLOT_NICE);
|
||||
if (jo.continueJumps && js.numInstructions < jo.continueMaxInstructions) {
|
||||
// Account for the increment in the loop.
|
||||
js.compilerPC = targetAddr - 4;
|
||||
// In case the delay slot was a break or something.
|
||||
js.compiling = true;
|
||||
return;
|
||||
}
|
||||
FlushAll();
|
||||
WriteExit(targetAddr, js.nextExit++);
|
||||
break;
|
||||
|
||||
case 3: //jal
|
||||
gpr.MapReg(MIPS_REG_RA, MAP_NOINIT | MAP_DIRTY);
|
||||
gpr.SetRegImm(gpr.R(MIPS_REG_RA), js.compilerPC + 8);
|
||||
gpr.SetImm(MIPS_REG_RA, js.compilerPC + 8);
|
||||
CompileDelaySlot(DELAYSLOT_NICE);
|
||||
if (jo.continueJumps && js.numInstructions < jo.continueMaxInstructions) {
|
||||
// Account for the increment in the loop.
|
||||
js.compilerPC = targetAddr - 4;
|
||||
// In case the delay slot was a break or something.
|
||||
js.compiling = true;
|
||||
return;
|
||||
}
|
||||
FlushAll();
|
||||
WriteExit(targetAddr, js.nextExit++);
|
||||
break;
|
||||
@ -392,8 +476,7 @@ void Jit::Comp_JumpReg(MIPSOpcode op)
|
||||
return; // Syscall wrote exit code.
|
||||
} else if (delaySlotIsNice) {
|
||||
CompileDelaySlot(DELAYSLOT_NICE);
|
||||
gpr.MapReg(rs);
|
||||
destReg = gpr.R(rs); // Safe because FlushAll doesn't change any regs
|
||||
|
||||
if (rs == MIPS_REG_RA && g_Config.bDiscardRegsOnJRRA) {
|
||||
// According to the MIPS ABI, there are some regs we don't need to preserve.
|
||||
// Let's discard them so we don't need to write them back.
|
||||
@ -405,6 +488,20 @@ void Jit::Comp_JumpReg(MIPSOpcode op)
|
||||
gpr.DiscardR(MIPS_REG_T8);
|
||||
gpr.DiscardR(MIPS_REG_T9);
|
||||
}
|
||||
|
||||
if (jo.continueJumps && gpr.IsImm(rs) && js.numInstructions < jo.continueMaxInstructions) {
|
||||
// Account for the increment in the loop.
|
||||
js.compilerPC = gpr.GetImm(rs) - 4;
|
||||
if ((op & 0x3f) == 9) {
|
||||
gpr.SetImm(rd, js.compilerPC + 8);
|
||||
}
|
||||
// In case the delay slot was a break or something.
|
||||
js.compiling = true;
|
||||
return;
|
||||
}
|
||||
|
||||
gpr.MapReg(rs);
|
||||
destReg = gpr.R(rs); // Safe because FlushAll doesn't change any regs
|
||||
FlushAll();
|
||||
} else {
|
||||
// Delay slot - this case is very rare, might be able to free up R8.
|
||||
@ -434,13 +531,14 @@ void Jit::Comp_JumpReg(MIPSOpcode op)
|
||||
|
||||
void Jit::Comp_Syscall(MIPSOpcode op)
|
||||
{
|
||||
FlushAll();
|
||||
|
||||
// If we're in a delay slot, this is off by one.
|
||||
const int offset = js.inDelaySlot ? -1 : 0;
|
||||
WriteDownCount(offset);
|
||||
js.downcountAmount = -offset;
|
||||
|
||||
// TODO: Maybe discard v0, v1, and some temps? Definitely at?
|
||||
FlushAll();
|
||||
|
||||
SaveDowncount();
|
||||
// Skip the CallSyscall where possible.
|
||||
void *quickFunc = GetQuickSyscallFunc(op);
|
||||
|
@ -64,6 +64,12 @@ ArmJitOptions::ArmJitOptions()
|
||||
useBackJump = false;
|
||||
useForwardJump = false;
|
||||
cachePointers = true;
|
||||
// WARNING: These options don't work properly with cache clearing or jit compare.
|
||||
// Need to find a smart way to handle before enabling.
|
||||
immBranches = false;
|
||||
continueBranches = false;
|
||||
continueJumps = false;
|
||||
continueMaxInstructions = 300;
|
||||
}
|
||||
|
||||
Jit::Jit(MIPSState *mips) : blocks(mips, this), gpr(mips, &jo), fpr(mips), mips_(mips)
|
||||
@ -154,6 +160,7 @@ void Jit::EatInstruction(MIPSOpcode op) {
|
||||
ERROR_LOG_REPORT_ONCE(ateInDelaySlot, JIT, "Ate an instruction inside a delay slot.")
|
||||
}
|
||||
|
||||
js.numInstructions++;
|
||||
js.compilerPC += 4;
|
||||
js.downcountAmount += MIPSGetInstructionCycleEstimate(op);
|
||||
}
|
||||
@ -256,10 +263,9 @@ const u8 *Jit::DoJit(u32 em_address, JitBlock *b)
|
||||
gpr.Start(analysis);
|
||||
fpr.Start(analysis);
|
||||
|
||||
int numInstructions = 0;
|
||||
int cycles = 0;
|
||||
int partialFlushOffset = 0;
|
||||
|
||||
js.numInstructions = 0;
|
||||
while (js.compiling)
|
||||
{
|
||||
gpr.SetCompilerPC(js.compilerPC); // Let it know for log messages
|
||||
@ -270,7 +276,7 @@ const u8 *Jit::DoJit(u32 em_address, JitBlock *b)
|
||||
MIPSCompileOp(inst);
|
||||
|
||||
js.compilerPC += 4;
|
||||
numInstructions++;
|
||||
js.numInstructions++;
|
||||
if (!cpu_info.bArmV7 && (GetCodePtr() - b->checkedEntry - partialFlushOffset) > 3200)
|
||||
{
|
||||
// We need to prematurely flush as we are out of range
|
||||
@ -279,6 +285,14 @@ const u8 *Jit::DoJit(u32 em_address, JitBlock *b)
|
||||
SetJumpTarget(skip);
|
||||
partialFlushOffset = GetCodePtr() - b->checkedEntry;
|
||||
}
|
||||
|
||||
// Safety check, in case we get a bunch of really large jit ops without a lot of branching.
|
||||
if (GetSpaceLeft() < 0x800)
|
||||
{
|
||||
FlushAll();
|
||||
WriteExit(js.compilerPC, js.nextExit++);
|
||||
js.compiling = false;
|
||||
}
|
||||
}
|
||||
|
||||
if (jo.useForwardJump) {
|
||||
@ -312,7 +326,7 @@ const u8 *Jit::DoJit(u32 em_address, JitBlock *b)
|
||||
// Don't forget to zap the newly written instructions in the instruction cache!
|
||||
FlushIcache();
|
||||
|
||||
b->originalSize = numInstructions;
|
||||
b->originalSize = js.numInstructions;
|
||||
return b->normalEntry;
|
||||
}
|
||||
|
||||
|
@ -41,6 +41,10 @@ struct ArmJitOptions
|
||||
bool useBackJump;
|
||||
bool useForwardJump;
|
||||
bool cachePointers;
|
||||
bool immBranches;
|
||||
bool continueBranches;
|
||||
bool continueJumps;
|
||||
int continueMaxInstructions;
|
||||
};
|
||||
|
||||
class Jit : public ArmGen::ARMXCodeBlock
|
||||
|
@ -75,9 +75,9 @@ namespace MIPSComp
|
||||
case 8: // same as addiu?
|
||||
case 9: // R(rt) = R(rs) + simm; break; //addiu
|
||||
{
|
||||
if (gpr.IsImmediate(rs))
|
||||
if (gpr.IsImm(rs))
|
||||
{
|
||||
gpr.SetImmediate32(rt, gpr.GetImmediate32(rs) + simm);
|
||||
gpr.SetImm(rt, gpr.GetImm(rs) + simm);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -97,9 +97,9 @@ namespace MIPSComp
|
||||
|
||||
case 10: // R(rt) = (s32)R(rs) < simm; break; //slti
|
||||
// There's a mips compiler out there asking questions it already knows the answer to...
|
||||
if (gpr.IsImmediate(rs))
|
||||
if (gpr.IsImm(rs))
|
||||
{
|
||||
gpr.SetImmediate32(rt, (s32)gpr.GetImmediate32(rs) < simm);
|
||||
gpr.SetImm(rt, (s32)gpr.GetImm(rs) < simm);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -114,9 +114,9 @@ namespace MIPSComp
|
||||
break;
|
||||
|
||||
case 11: // R(rt) = R(rs) < uimm; break; //sltiu
|
||||
if (gpr.IsImmediate(rs))
|
||||
if (gpr.IsImm(rs))
|
||||
{
|
||||
gpr.SetImmediate32(rt, gpr.GetImmediate32(rs) < uimm);
|
||||
gpr.SetImm(rt, gpr.GetImm(rs) < uimm);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -132,29 +132,29 @@ namespace MIPSComp
|
||||
|
||||
case 12: // R(rt) = R(rs) & uimm; break; //andi
|
||||
if (uimm == 0)
|
||||
gpr.SetImmediate32(rt, 0);
|
||||
else if (gpr.IsImmediate(rs))
|
||||
gpr.SetImmediate32(rt, gpr.GetImmediate32(rs) & uimm);
|
||||
gpr.SetImm(rt, 0);
|
||||
else if (gpr.IsImm(rs))
|
||||
gpr.SetImm(rt, gpr.GetImm(rs) & uimm);
|
||||
else
|
||||
CompImmLogic(op, &XEmitter::AND);
|
||||
break;
|
||||
|
||||
case 13: // R(rt) = R(rs) | uimm; break; //ori
|
||||
if (gpr.IsImmediate(rs))
|
||||
gpr.SetImmediate32(rt, gpr.GetImmediate32(rs) | uimm);
|
||||
if (gpr.IsImm(rs))
|
||||
gpr.SetImm(rt, gpr.GetImm(rs) | uimm);
|
||||
else
|
||||
CompImmLogic(op, &XEmitter::OR);
|
||||
break;
|
||||
|
||||
case 14: // R(rt) = R(rs) ^ uimm; break; //xori
|
||||
if (gpr.IsImmediate(rs))
|
||||
gpr.SetImmediate32(rt, gpr.GetImmediate32(rs) ^ uimm);
|
||||
if (gpr.IsImm(rs))
|
||||
gpr.SetImm(rt, gpr.GetImm(rs) ^ uimm);
|
||||
else
|
||||
CompImmLogic(op, &XEmitter::XOR);
|
||||
break;
|
||||
|
||||
case 15: //R(rt) = uimm << 16; break; //lui
|
||||
gpr.SetImmediate32(rt, uimm << 16);
|
||||
gpr.SetImm(rt, uimm << 16);
|
||||
break;
|
||||
|
||||
default:
|
||||
@ -176,9 +176,9 @@ namespace MIPSComp
|
||||
switch (op & 63)
|
||||
{
|
||||
case 22: //clz
|
||||
if (gpr.IsImmediate(rs))
|
||||
if (gpr.IsImm(rs))
|
||||
{
|
||||
u32 value = gpr.GetImmediate32(rs);
|
||||
u32 value = gpr.GetImm(rs);
|
||||
int x = 31;
|
||||
int count = 0;
|
||||
while (!(value & (1 << x)) && x >= 0)
|
||||
@ -186,7 +186,7 @@ namespace MIPSComp
|
||||
count++;
|
||||
x--;
|
||||
}
|
||||
gpr.SetImmediate32(rd, count);
|
||||
gpr.SetImm(rd, count);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -207,9 +207,9 @@ namespace MIPSComp
|
||||
}
|
||||
break;
|
||||
case 23: //clo
|
||||
if (gpr.IsImmediate(rs))
|
||||
if (gpr.IsImm(rs))
|
||||
{
|
||||
u32 value = gpr.GetImmediate32(rs);
|
||||
u32 value = gpr.GetImm(rs);
|
||||
int x = 31;
|
||||
int count = 0;
|
||||
while ((value & (1 << x)) && x >= 0)
|
||||
@ -217,7 +217,7 @@ namespace MIPSComp
|
||||
count++;
|
||||
x--;
|
||||
}
|
||||
gpr.SetImmediate32(rd, count);
|
||||
gpr.SetImm(rd, count);
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -277,16 +277,16 @@ namespace MIPSComp
|
||||
MIPSGPReg rd = _RD;
|
||||
|
||||
// Yes, this happens. Let's make it fast.
|
||||
if (doImm && gpr.IsImmediate(rs) && gpr.IsImmediate(rt))
|
||||
if (doImm && gpr.IsImm(rs) && gpr.IsImm(rt))
|
||||
{
|
||||
gpr.SetImmediate32(rd, doImm(gpr.GetImmediate32(rs), gpr.GetImmediate32(rt)));
|
||||
gpr.SetImm(rd, doImm(gpr.GetImm(rs), gpr.GetImm(rt)));
|
||||
return;
|
||||
}
|
||||
|
||||
// Act like zero was used if the operand is equivalent. This happens.
|
||||
if (gpr.IsImmediate(rs) && gpr.GetImmediate32(rs) == 0)
|
||||
if (gpr.IsImm(rs) && gpr.GetImm(rs) == 0)
|
||||
rs = MIPS_REG_ZERO;
|
||||
if (gpr.IsImmediate(rt) && gpr.GetImmediate32(rt) == 0)
|
||||
if (gpr.IsImm(rt) && gpr.GetImm(rt) == 0)
|
||||
rt = MIPS_REG_ZERO;
|
||||
|
||||
gpr.Lock(rt, rs, rd);
|
||||
@ -294,7 +294,7 @@ namespace MIPSComp
|
||||
if (rt == MIPS_REG_ZERO || (rs == MIPS_REG_ZERO && doImm != &RType3_ImmSub))
|
||||
{
|
||||
if (doImm == &RType3_ImmAnd)
|
||||
gpr.SetImmediate32(rd, 0);
|
||||
gpr.SetImm(rd, 0);
|
||||
else
|
||||
{
|
||||
MIPSGPReg rsource = rt == MIPS_REG_ZERO ? rs : rt;
|
||||
@ -305,10 +305,10 @@ namespace MIPSComp
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (gpr.IsImmediate(rt))
|
||||
else if (gpr.IsImm(rt))
|
||||
{
|
||||
// No temporary needed.
|
||||
u32 rtval = gpr.GetImmediate32(rt);
|
||||
u32 rtval = gpr.GetImm(rt);
|
||||
gpr.MapReg(rd, rs == rd, true);
|
||||
if (rs != rd)
|
||||
MOV(32, gpr.R(rd), gpr.R(rs));
|
||||
@ -345,7 +345,7 @@ namespace MIPSComp
|
||||
if (rd == rs)
|
||||
break;
|
||||
gpr.Lock(rt, rs, rd);
|
||||
if (!gpr.IsImmediate(rt))
|
||||
if (!gpr.IsImm(rt))
|
||||
{
|
||||
gpr.KillImmediate(rs, true, false);
|
||||
// Need to load rd in case the condition fails.
|
||||
@ -353,11 +353,11 @@ namespace MIPSComp
|
||||
CMP(32, gpr.R(rt), Imm32(0));
|
||||
CMOVcc(32, gpr.RX(rd), gpr.R(rs), CC_E);
|
||||
}
|
||||
else if (gpr.GetImmediate32(rt) == 0)
|
||||
else if (gpr.GetImm(rt) == 0)
|
||||
{
|
||||
// Yes, this actually happens.
|
||||
if (gpr.IsImmediate(rs))
|
||||
gpr.SetImmediate32(rd, gpr.GetImmediate32(rs));
|
||||
if (gpr.IsImm(rs))
|
||||
gpr.SetImm(rd, gpr.GetImm(rs));
|
||||
else if (rd != rs)
|
||||
{
|
||||
gpr.MapReg(rd, false, true);
|
||||
@ -371,7 +371,7 @@ namespace MIPSComp
|
||||
if (rd == rs)
|
||||
break;
|
||||
gpr.Lock(rt, rs, rd);
|
||||
if (!gpr.IsImmediate(rt))
|
||||
if (!gpr.IsImm(rt))
|
||||
{
|
||||
gpr.KillImmediate(rs, true, false);
|
||||
// Need to load rd in case the condition fails.
|
||||
@ -379,10 +379,10 @@ namespace MIPSComp
|
||||
CMP(32, gpr.R(rt), Imm32(0));
|
||||
CMOVcc(32, gpr.RX(rd), gpr.R(rs), CC_NE);
|
||||
}
|
||||
else if (gpr.GetImmediate32(rt) != 0)
|
||||
else if (gpr.GetImm(rt) != 0)
|
||||
{
|
||||
if (gpr.IsImmediate(rs))
|
||||
gpr.SetImmediate32(rd, gpr.GetImmediate32(rs));
|
||||
if (gpr.IsImm(rs))
|
||||
gpr.SetImm(rd, gpr.GetImm(rs));
|
||||
else if (rd != rs)
|
||||
{
|
||||
gpr.MapReg(rd, false, true);
|
||||
@ -412,15 +412,15 @@ namespace MIPSComp
|
||||
|
||||
case 39: // R(rd) = ~(R(rs) | R(rt)); //nor
|
||||
CompTriArith(op, &XEmitter::OR, &RType3_ImmOr);
|
||||
if (gpr.IsImmediate(rd))
|
||||
gpr.SetImmediate32(rd, ~gpr.GetImmediate32(rd));
|
||||
if (gpr.IsImm(rd))
|
||||
gpr.SetImm(rd, ~gpr.GetImm(rd));
|
||||
else
|
||||
NOT(32, gpr.R(rd));
|
||||
break;
|
||||
|
||||
case 42: //R(rd) = (int)R(rs) < (int)R(rt); break; //slt
|
||||
if (gpr.IsImmediate(rs) && gpr.IsImmediate(rt))
|
||||
gpr.SetImmediate32(rd, (s32)gpr.GetImmediate32(rs) < (s32)gpr.GetImmediate32(rt));
|
||||
if (gpr.IsImm(rs) && gpr.IsImm(rt))
|
||||
gpr.SetImm(rd, (s32)gpr.GetImm(rs) < (s32)gpr.GetImm(rt));
|
||||
else
|
||||
{
|
||||
gpr.Lock(rt, rs, rd);
|
||||
@ -435,8 +435,8 @@ namespace MIPSComp
|
||||
break;
|
||||
|
||||
case 43: //R(rd) = R(rs) < R(rt); break; //sltu
|
||||
if (gpr.IsImmediate(rs) && gpr.IsImmediate(rt))
|
||||
gpr.SetImmediate32(rd, gpr.GetImmediate32(rs) < gpr.GetImmediate32(rt));
|
||||
if (gpr.IsImm(rs) && gpr.IsImm(rt))
|
||||
gpr.SetImm(rd, gpr.GetImm(rs) < gpr.GetImm(rt));
|
||||
else
|
||||
{
|
||||
gpr.Lock(rd, rs, rt);
|
||||
@ -451,8 +451,8 @@ namespace MIPSComp
|
||||
break;
|
||||
|
||||
case 44: //R(rd) = (R(rs) > R(rt)) ? R(rs) : R(rt); break; //max
|
||||
if (gpr.IsImmediate(rs) && gpr.IsImmediate(rt))
|
||||
gpr.SetImmediate32(rd, std::max((s32)gpr.GetImmediate32(rs), (s32)gpr.GetImmediate32(rt)));
|
||||
if (gpr.IsImm(rs) && gpr.IsImm(rt))
|
||||
gpr.SetImm(rd, std::max((s32)gpr.GetImm(rs), (s32)gpr.GetImm(rt)));
|
||||
else
|
||||
{
|
||||
MIPSGPReg rsrc = rd == rt ? rs : rt;
|
||||
@ -468,8 +468,8 @@ namespace MIPSComp
|
||||
break;
|
||||
|
||||
case 45: //R(rd) = (R(rs) < R(rt)) ? R(rs) : R(rt); break; //min
|
||||
if (gpr.IsImmediate(rs) && gpr.IsImmediate(rt))
|
||||
gpr.SetImmediate32(rd, std::min((s32)gpr.GetImmediate32(rs), (s32)gpr.GetImmediate32(rt)));
|
||||
if (gpr.IsImm(rs) && gpr.IsImm(rt))
|
||||
gpr.SetImm(rd, std::min((s32)gpr.GetImm(rs), (s32)gpr.GetImm(rt)));
|
||||
else
|
||||
{
|
||||
MIPSGPReg rsrc = rd == rt ? rs : rt;
|
||||
@ -517,9 +517,9 @@ namespace MIPSComp
|
||||
MIPSGPReg rt = _RT;
|
||||
int sa = _SA;
|
||||
|
||||
if (doImm && gpr.IsImmediate(rt))
|
||||
if (doImm && gpr.IsImm(rt))
|
||||
{
|
||||
gpr.SetImmediate32(rd, doImm(gpr.GetImmediate32(rt), sa));
|
||||
gpr.SetImm(rd, doImm(gpr.GetImm(rt), sa));
|
||||
return;
|
||||
}
|
||||
|
||||
@ -538,16 +538,16 @@ namespace MIPSComp
|
||||
MIPSGPReg rt = _RT;
|
||||
MIPSGPReg rs = _RS;
|
||||
|
||||
if (doImm && gpr.IsImmediate(rs) && gpr.IsImmediate(rt))
|
||||
if (doImm && gpr.IsImm(rs) && gpr.IsImm(rt))
|
||||
{
|
||||
gpr.SetImmediate32(rd, doImm(gpr.GetImmediate32(rt), gpr.GetImmediate32(rs)));
|
||||
gpr.SetImm(rd, doImm(gpr.GetImm(rt), gpr.GetImm(rs)));
|
||||
return;
|
||||
}
|
||||
|
||||
gpr.Lock(rd, rt, rs);
|
||||
if (gpr.IsImmediate(rs))
|
||||
if (gpr.IsImm(rs))
|
||||
{
|
||||
int sa = gpr.GetImmediate32(rs);
|
||||
int sa = gpr.GetImm(rs);
|
||||
gpr.MapReg(rd, rd == rt, true);
|
||||
if (rd != rt)
|
||||
MOV(32, gpr.R(rd), gpr.R(rt));
|
||||
@ -612,9 +612,9 @@ namespace MIPSComp
|
||||
switch (op & 0x3f)
|
||||
{
|
||||
case 0x0: //ext
|
||||
if (gpr.IsImmediate(rs))
|
||||
if (gpr.IsImm(rs))
|
||||
{
|
||||
gpr.SetImmediate32(rt, (gpr.GetImmediate32(rs) >> pos) & mask);
|
||||
gpr.SetImm(rt, (gpr.GetImm(rs) >> pos) & mask);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -631,12 +631,12 @@ namespace MIPSComp
|
||||
{
|
||||
u32 sourcemask = mask >> pos;
|
||||
u32 destmask = ~(sourcemask << pos);
|
||||
if (gpr.IsImmediate(rs))
|
||||
if (gpr.IsImm(rs))
|
||||
{
|
||||
u32 inserted = (gpr.GetImmediate32(rs) & sourcemask) << pos;
|
||||
if (gpr.IsImmediate(rt))
|
||||
u32 inserted = (gpr.GetImm(rs) & sourcemask) << pos;
|
||||
if (gpr.IsImm(rt))
|
||||
{
|
||||
gpr.SetImmediate32(rt, (gpr.GetImmediate32(rt) & destmask) | inserted);
|
||||
gpr.SetImm(rt, (gpr.GetImm(rt) & destmask) | inserted);
|
||||
return;
|
||||
}
|
||||
|
||||
@ -675,9 +675,9 @@ namespace MIPSComp
|
||||
switch ((op >> 6) & 31)
|
||||
{
|
||||
case 16: // seb // R(rd) = (u32)(s32)(s8)(u8)R(rt);
|
||||
if (gpr.IsImmediate(rt))
|
||||
if (gpr.IsImm(rt))
|
||||
{
|
||||
gpr.SetImmediate32(rd, (u32)(s32)(s8)(u8)gpr.GetImmediate32(rt));
|
||||
gpr.SetImm(rd, (u32)(s32)(s8)(u8)gpr.GetImm(rt));
|
||||
break;
|
||||
}
|
||||
|
||||
@ -700,10 +700,10 @@ namespace MIPSComp
|
||||
break;
|
||||
|
||||
case 20: //bitrev
|
||||
if (gpr.IsImmediate(rt))
|
||||
if (gpr.IsImm(rt))
|
||||
{
|
||||
// http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
|
||||
u32 v = gpr.GetImmediate32(rt);
|
||||
u32 v = gpr.GetImm(rt);
|
||||
// swap odd and even bits
|
||||
v = ((v >> 1) & 0x55555555) | ((v & 0x55555555) << 1);
|
||||
// swap consecutive pairs
|
||||
@ -714,7 +714,7 @@ namespace MIPSComp
|
||||
v = ((v >> 8) & 0x00FF00FF) | ((v & 0x00FF00FF) << 8);
|
||||
// swap 2-byte long pairs
|
||||
v = ( v >> 16 ) | ( v << 16);
|
||||
gpr.SetImmediate32(rd, v);
|
||||
gpr.SetImm(rd, v);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -755,9 +755,9 @@ namespace MIPSComp
|
||||
break;
|
||||
|
||||
case 24: // seh // R(rd) = (u32)(s32)(s16)(u16)R(rt);
|
||||
if (gpr.IsImmediate(rt))
|
||||
if (gpr.IsImm(rt))
|
||||
{
|
||||
gpr.SetImmediate32(rd, (u32)(s32)(s16)(u16)gpr.GetImmediate32(rt));
|
||||
gpr.SetImm(rd, (u32)(s32)(s16)(u16)gpr.GetImm(rt));
|
||||
break;
|
||||
}
|
||||
|
||||
@ -785,9 +785,9 @@ namespace MIPSComp
|
||||
switch (op & 0x3ff)
|
||||
{
|
||||
case 0xA0: //wsbh
|
||||
if (gpr.IsImmediate(rt)) {
|
||||
u32 rtImm = gpr.GetImmediate32(rt);
|
||||
gpr.SetImmediate32(rd, ((rtImm & 0xFF00FF00) >> 8) | ((rtImm & 0x00FF00FF) << 8));
|
||||
if (gpr.IsImm(rt)) {
|
||||
u32 rtImm = gpr.GetImm(rt);
|
||||
gpr.SetImm(rd, ((rtImm & 0xFF00FF00) >> 8) | ((rtImm & 0x00FF00FF) << 8));
|
||||
break;
|
||||
}
|
||||
gpr.Lock(rd, rt);
|
||||
@ -800,8 +800,8 @@ namespace MIPSComp
|
||||
gpr.UnlockAll();
|
||||
break;
|
||||
case 0xE0: //wsbw
|
||||
if (gpr.IsImmediate(rt)) {
|
||||
gpr.SetImmediate32(rd, swap32(gpr.GetImmediate32(rt)));
|
||||
if (gpr.IsImm(rt)) {
|
||||
gpr.SetImm(rd, swap32(gpr.GetImm(rt)));
|
||||
break;
|
||||
}
|
||||
gpr.Lock(rd, rt);
|
||||
|
@ -129,6 +129,42 @@ void Jit::BranchLogExit(MIPSOpcode op, u32 dest, bool useEAX)
|
||||
SetJumpTarget(skip);
|
||||
}
|
||||
|
||||
static CCFlags FlipCCFlag(CCFlags flag)
|
||||
{
|
||||
switch (flag)
|
||||
{
|
||||
case CC_O: return CC_NO;
|
||||
case CC_NO: return CC_O;
|
||||
case CC_B: return CC_NB;
|
||||
case CC_NB: return CC_B;
|
||||
case CC_Z: return CC_NZ;
|
||||
case CC_NZ: return CC_Z;
|
||||
case CC_BE: return CC_NBE;
|
||||
case CC_NBE: return CC_BE;
|
||||
case CC_S: return CC_NS;
|
||||
case CC_NS: return CC_S;
|
||||
case CC_P: return CC_NP;
|
||||
case CC_NP: return CC_P;
|
||||
case CC_L: return CC_NL;
|
||||
case CC_NL: return CC_L;
|
||||
case CC_LE: return CC_NLE;
|
||||
case CC_NLE: return CC_LE;
|
||||
}
|
||||
ERROR_LOG_REPORT(JIT, false, "FlipCCFlag: Unexpected CC flag: %d", flag);
|
||||
return CC_O;
|
||||
}
|
||||
|
||||
bool Jit::PredictTakeBranch(u32 targetAddr, bool likely) {
|
||||
// If it's likely, it's... probably likely, right?
|
||||
if (likely)
|
||||
return true;
|
||||
|
||||
// TODO: Normal branch prediction would be to take branches going upward to lower addresses.
|
||||
// However, this results in worse performance as of this comment's writing.
|
||||
// The reverse check generally gives better or same performance.
|
||||
return targetAddr > js.compilerPC;
|
||||
}
|
||||
|
||||
void Jit::BranchRSRTComp(MIPSOpcode op, Gen::CCFlags cc, bool likely)
|
||||
{
|
||||
CONDITIONAL_LOG;
|
||||
@ -141,16 +177,12 @@ void Jit::BranchRSRTComp(MIPSOpcode op, Gen::CCFlags cc, bool likely)
|
||||
MIPSGPReg rs = _RS;
|
||||
u32 targetAddr = js.compilerPC + offset + 4;
|
||||
|
||||
MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC+4);
|
||||
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rt, rs);
|
||||
CONDITIONAL_NICE_DELAYSLOT;
|
||||
|
||||
if (jo.immBranches && gpr.IsImmediate(rs) && gpr.IsImmediate(rt) && js.numInstructions < jo.continueMaxInstructions)
|
||||
if (jo.immBranches && gpr.IsImm(rs) && gpr.IsImm(rt) && js.numInstructions < jo.continueMaxInstructions)
|
||||
{
|
||||
// The cc flags are opposites: when NOT to take the branch.
|
||||
bool skipBranch;
|
||||
s32 rsImm = (s32)gpr.GetImmediate32(rs);
|
||||
s32 rtImm = (s32)gpr.GetImmediate32(rt);
|
||||
s32 rsImm = (s32)gpr.GetImm(rs);
|
||||
s32 rtImm = (s32)gpr.GetImm(rt);
|
||||
|
||||
switch (cc)
|
||||
{
|
||||
@ -176,10 +208,13 @@ void Jit::BranchRSRTComp(MIPSOpcode op, Gen::CCFlags cc, bool likely)
|
||||
return;
|
||||
}
|
||||
|
||||
MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC+4);
|
||||
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rt, rs);
|
||||
CONDITIONAL_NICE_DELAYSLOT;
|
||||
if (!likely && delaySlotIsNice)
|
||||
CompileDelaySlot(DELAYSLOT_NICE);
|
||||
|
||||
if (gpr.IsImmediate(rt) && gpr.GetImmediate32(rt) == 0)
|
||||
if (gpr.IsImm(rt) && gpr.GetImm(rt) == 0)
|
||||
{
|
||||
gpr.KillImmediate(rs, true, false);
|
||||
CMP(32, gpr.R(rs), Imm32(0));
|
||||
@ -190,6 +225,17 @@ void Jit::BranchRSRTComp(MIPSOpcode op, Gen::CCFlags cc, bool likely)
|
||||
CMP(32, gpr.R(rs), gpr.R(rt));
|
||||
}
|
||||
|
||||
// We may want to try to continue along this branch a little while, to reduce reg flushing.
|
||||
bool predictTakeBranch = PredictTakeBranch(targetAddr, likely);
|
||||
bool continueBranch = false;
|
||||
// Likely delay slots may change regs, can't take those branches inline safely.
|
||||
if (CanContinueBranch() && (!likely || !predictTakeBranch))
|
||||
{
|
||||
continueBranch = true;
|
||||
if (predictTakeBranch)
|
||||
cc = FlipCCFlag(cc);
|
||||
}
|
||||
|
||||
Gen::FixupBranch ptr;
|
||||
RegCacheState state;
|
||||
if (!likely)
|
||||
@ -206,15 +252,32 @@ void Jit::BranchRSRTComp(MIPSOpcode op, Gen::CCFlags cc, bool likely)
|
||||
ptr = J_CC(cc, true);
|
||||
CompileDelaySlot(DELAYSLOT_FLUSH);
|
||||
}
|
||||
|
||||
if (continueBranch && predictTakeBranch)
|
||||
{
|
||||
// We flipped the cc, the not taken case is first.
|
||||
CONDITIONAL_LOG_EXIT(js.compilerPC + 8);
|
||||
WriteExit(js.compilerPC + 8, js.nextExit++);
|
||||
|
||||
// Now our taken path.
|
||||
SetJumpTarget(ptr);
|
||||
CONDITIONAL_LOG_EXIT(targetAddr);
|
||||
// Account for the increment in the loop.
|
||||
js.compilerPC = targetAddr - 4;
|
||||
// In case the delay slot was a break or something.
|
||||
js.compiling = true;
|
||||
return;
|
||||
}
|
||||
|
||||
// Take the branch
|
||||
CONDITIONAL_LOG_EXIT(targetAddr);
|
||||
WriteExit(targetAddr, js.nextExit++);
|
||||
|
||||
SetJumpTarget(ptr);
|
||||
// Not taken
|
||||
SetJumpTarget(ptr);
|
||||
CONDITIONAL_LOG_EXIT(js.compilerPC + 8);
|
||||
|
||||
if (CanContinueBranch())
|
||||
if (continueBranch && !predictTakeBranch)
|
||||
{
|
||||
// Account for the delay slot.
|
||||
js.compilerPC += 4;
|
||||
@ -240,15 +303,11 @@ void Jit::BranchRSZeroComp(MIPSOpcode op, Gen::CCFlags cc, bool andLink, bool li
|
||||
MIPSGPReg rs = _RS;
|
||||
u32 targetAddr = js.compilerPC + offset + 4;
|
||||
|
||||
MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC + 4);
|
||||
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs);
|
||||
CONDITIONAL_NICE_DELAYSLOT;
|
||||
|
||||
if (jo.immBranches && gpr.IsImmediate(rs) && js.numInstructions < jo.continueMaxInstructions)
|
||||
if (jo.immBranches && gpr.IsImm(rs) && js.numInstructions < jo.continueMaxInstructions)
|
||||
{
|
||||
// The cc flags are opposites: when NOT to take the branch.
|
||||
bool skipBranch;
|
||||
s32 imm = (s32)gpr.GetImmediate32(rs);
|
||||
s32 imm = (s32)gpr.GetImm(rs);
|
||||
|
||||
switch (cc)
|
||||
{
|
||||
@ -270,10 +329,8 @@ void Jit::BranchRSZeroComp(MIPSOpcode op, Gen::CCFlags cc, bool andLink, bool li
|
||||
// Branch taken. Always compile the delay slot, and then go to dest.
|
||||
CompileDelaySlot(DELAYSLOT_NICE);
|
||||
if (andLink)
|
||||
{
|
||||
gpr.MapReg(MIPS_REG_RA, false, true);
|
||||
MOV(32, gpr.R(MIPS_REG_RA), Imm32(js.compilerPC + 8));
|
||||
}
|
||||
gpr.SetImm(MIPS_REG_RA, js.compilerPC + 8);
|
||||
|
||||
// Account for the increment in the loop.
|
||||
js.compilerPC = targetAddr - 4;
|
||||
// In case the delay slot was a break or something.
|
||||
@ -281,12 +338,26 @@ void Jit::BranchRSZeroComp(MIPSOpcode op, Gen::CCFlags cc, bool andLink, bool li
|
||||
return;
|
||||
}
|
||||
|
||||
MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC + 4);
|
||||
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs);
|
||||
CONDITIONAL_NICE_DELAYSLOT;
|
||||
if (!likely && delaySlotIsNice)
|
||||
CompileDelaySlot(DELAYSLOT_NICE);
|
||||
|
||||
gpr.MapReg(rs, true, false);
|
||||
CMP(32, gpr.R(rs), Imm32(0));
|
||||
|
||||
// We may want to try to continue along this branch a little while, to reduce reg flushing.
|
||||
bool predictTakeBranch = PredictTakeBranch(targetAddr, likely);
|
||||
bool continueBranch = false;
|
||||
// Likely delay slots may change regs, can't take those branches inline safely.
|
||||
if (CanContinueBranch() && (!likely || !predictTakeBranch))
|
||||
{
|
||||
continueBranch = true;
|
||||
if (predictTakeBranch)
|
||||
cc = FlipCCFlag(cc);
|
||||
}
|
||||
|
||||
Gen::FixupBranch ptr;
|
||||
RegCacheState state;
|
||||
if (!likely)
|
||||
@ -304,17 +375,35 @@ void Jit::BranchRSZeroComp(MIPSOpcode op, Gen::CCFlags cc, bool andLink, bool li
|
||||
CompileDelaySlot(DELAYSLOT_FLUSH);
|
||||
}
|
||||
|
||||
if (continueBranch && predictTakeBranch)
|
||||
{
|
||||
// We flipped the cc, the not taken case is first.
|
||||
CONDITIONAL_LOG_EXIT(js.compilerPC + 8);
|
||||
WriteExit(js.compilerPC + 8, js.nextExit++);
|
||||
|
||||
// Now our taken path.
|
||||
SetJumpTarget(ptr);
|
||||
if (andLink)
|
||||
gpr.SetImm(MIPS_REG_RA, js.compilerPC + 8);
|
||||
CONDITIONAL_LOG_EXIT(targetAddr);
|
||||
// Account for the increment in the loop.
|
||||
js.compilerPC = targetAddr - 4;
|
||||
// In case the delay slot was a break or something.
|
||||
js.compiling = true;
|
||||
return;
|
||||
}
|
||||
|
||||
// Take the branch
|
||||
if (andLink)
|
||||
MOV(32, M(&mips_->r[MIPS_REG_RA]), Imm32(js.compilerPC + 8));
|
||||
CONDITIONAL_LOG_EXIT(targetAddr);
|
||||
WriteExit(targetAddr, js.nextExit++);
|
||||
|
||||
SetJumpTarget(ptr);
|
||||
// Not taken
|
||||
SetJumpTarget(ptr);
|
||||
CONDITIONAL_LOG_EXIT(js.compilerPC + 8);
|
||||
|
||||
if (CanContinueBranch())
|
||||
if (continueBranch && !predictTakeBranch)
|
||||
{
|
||||
// Account for the delay slot.
|
||||
js.compilerPC += 4;
|
||||
@ -388,6 +477,17 @@ void Jit::BranchFPFlag(MIPSOpcode op, Gen::CCFlags cc, bool likely)
|
||||
if (!likely && delaySlotIsNice)
|
||||
CompileDelaySlot(DELAYSLOT_NICE);
|
||||
|
||||
// We may want to try to continue along this branch a little while, to reduce reg flushing.
|
||||
bool predictTakeBranch = PredictTakeBranch(targetAddr, likely);
|
||||
bool continueBranch = false;
|
||||
// Likely delay slots may change regs, can't take those branches inline safely.
|
||||
if (CanContinueBranch() && (!likely || !predictTakeBranch))
|
||||
{
|
||||
continueBranch = true;
|
||||
if (predictTakeBranch)
|
||||
cc = FlipCCFlag(cc);
|
||||
}
|
||||
|
||||
TEST(32, M((void *)&(mips_->fpcond)), Imm32(1));
|
||||
Gen::FixupBranch ptr;
|
||||
RegCacheState state;
|
||||
@ -406,15 +506,31 @@ void Jit::BranchFPFlag(MIPSOpcode op, Gen::CCFlags cc, bool likely)
|
||||
CompileDelaySlot(DELAYSLOT_FLUSH);
|
||||
}
|
||||
|
||||
if (continueBranch && predictTakeBranch)
|
||||
{
|
||||
// We flipped the cc, the not taken case is first.
|
||||
CONDITIONAL_LOG_EXIT(js.compilerPC + 8);
|
||||
WriteExit(js.compilerPC + 8, js.nextExit++);
|
||||
|
||||
// Now our taken path.
|
||||
SetJumpTarget(ptr);
|
||||
CONDITIONAL_LOG_EXIT(targetAddr);
|
||||
// Account for the increment in the loop.
|
||||
js.compilerPC = targetAddr - 4;
|
||||
// In case the delay slot was a break or something.
|
||||
js.compiling = true;
|
||||
return;
|
||||
}
|
||||
|
||||
// Take the branch
|
||||
CONDITIONAL_LOG_EXIT(targetAddr);
|
||||
WriteExit(targetAddr, js.nextExit++);
|
||||
|
||||
SetJumpTarget(ptr);
|
||||
// Not taken
|
||||
SetJumpTarget(ptr);
|
||||
CONDITIONAL_LOG_EXIT(js.compilerPC + 8);
|
||||
|
||||
if (CanContinueBranch())
|
||||
if (continueBranch && !predictTakeBranch)
|
||||
{
|
||||
// Account for the delay slot.
|
||||
js.compilerPC += 4;
|
||||
@ -468,6 +584,18 @@ void Jit::BranchVFPUFlag(MIPSOpcode op, Gen::CCFlags cc, bool likely)
|
||||
if (delaySlotIsBranch && (signed short)(delaySlotOp & 0xFFFF) != (signed short)(op & 0xFFFF) - 1)
|
||||
ERROR_LOG_REPORT(JIT, "VFPU branch in VFPU delay slot at %08x with different target %d / %d", js.compilerPC, (signed short)(delaySlotOp & 0xFFFF), (signed short)(op & 0xFFFF) - 1);
|
||||
|
||||
// We may want to try to continue along this branch a little while, to reduce reg flushing.
|
||||
bool predictTakeBranch = PredictTakeBranch(targetAddr, likely);
|
||||
bool continueBranch = false;
|
||||
// Likely delay slots may change regs, can't take those branches inline safely.
|
||||
// TODO: Maybe delaySlotIsBranch could work, but let's play it safe.
|
||||
if (CanContinueBranch() && (!likely || !predictTakeBranch) && !delaySlotIsBranch)
|
||||
{
|
||||
continueBranch = true;
|
||||
if (predictTakeBranch)
|
||||
cc = FlipCCFlag(cc);
|
||||
}
|
||||
|
||||
// THE CONDITION
|
||||
int imm3 = (op >> 18) & 7;
|
||||
|
||||
@ -491,19 +619,37 @@ void Jit::BranchVFPUFlag(MIPSOpcode op, Gen::CCFlags cc, bool likely)
|
||||
CompileDelaySlot(DELAYSLOT_FLUSH);
|
||||
}
|
||||
|
||||
u32 notTakenTarget = js.compilerPC + (delaySlotIsBranch ? 4 : 8);
|
||||
|
||||
if (continueBranch && predictTakeBranch)
|
||||
{
|
||||
// We flipped the cc, the not taken case is first.
|
||||
CONDITIONAL_LOG_EXIT(notTakenTarget);
|
||||
WriteExit(notTakenTarget, js.nextExit++);
|
||||
|
||||
// Now our taken path.
|
||||
SetJumpTarget(ptr);
|
||||
CONDITIONAL_LOG_EXIT(targetAddr);
|
||||
// Account for the increment in the loop.
|
||||
js.compilerPC = targetAddr - 4;
|
||||
// In case the delay slot was a break or something.
|
||||
js.compiling = true;
|
||||
return;
|
||||
}
|
||||
|
||||
// Take the branch
|
||||
CONDITIONAL_LOG_EXIT(targetAddr);
|
||||
WriteExit(targetAddr, js.nextExit++);
|
||||
|
||||
SetJumpTarget(ptr);
|
||||
// Not taken
|
||||
u32 notTakenTarget = js.compilerPC + (delaySlotIsBranch ? 4 : 8);
|
||||
SetJumpTarget(ptr);
|
||||
CONDITIONAL_LOG_EXIT(notTakenTarget);
|
||||
|
||||
if (CanContinueBranch() && !delaySlotIsBranch)
|
||||
if (continueBranch && !predictTakeBranch)
|
||||
{
|
||||
// Account for the delay slot.
|
||||
js.compilerPC += 4;
|
||||
if (!delaySlotIsBranch)
|
||||
js.compilerPC += 4;
|
||||
RestoreState(state);
|
||||
// In case the delay slot was a break or something.
|
||||
js.compiling = true;
|
||||
@ -540,19 +686,46 @@ void Jit::Comp_Jump(MIPSOpcode op)
|
||||
u32 off = _IMM26 << 2;
|
||||
u32 targetAddr = (js.compilerPC & 0xF0000000) | off;
|
||||
|
||||
// Might be a stubbed address or something?
|
||||
if (!Memory::IsValidAddress(targetAddr))
|
||||
{
|
||||
if (js.nextExit == 0)
|
||||
ERROR_LOG_REPORT(JIT, "Jump to invalid address: %08x", targetAddr)
|
||||
else
|
||||
js.compiling = false;
|
||||
// TODO: Mark this block dirty or something? May be indication it will be changed by imports.
|
||||
return;
|
||||
}
|
||||
|
||||
switch (op >> 26)
|
||||
{
|
||||
case 2: //j
|
||||
CompileDelaySlot(DELAYSLOT_NICE);
|
||||
if (jo.continueJumps && js.numInstructions < jo.continueMaxInstructions)
|
||||
{
|
||||
// Account for the increment in the loop.
|
||||
js.compilerPC = targetAddr - 4;
|
||||
// In case the delay slot was a break or something.
|
||||
js.compiling = true;
|
||||
return;
|
||||
}
|
||||
FlushAll();
|
||||
CONDITIONAL_LOG_EXIT(targetAddr);
|
||||
WriteExit(targetAddr, js.nextExit++);
|
||||
break;
|
||||
|
||||
case 3: //jal
|
||||
gpr.MapReg(MIPS_REG_RA, false, true);
|
||||
MOV(32, gpr.R(MIPS_REG_RA), Imm32(js.compilerPC + 8)); // Save return address
|
||||
// Save return address - might be overwritten by delay slot.
|
||||
gpr.SetImm(MIPS_REG_RA, js.compilerPC + 8);
|
||||
CompileDelaySlot(DELAYSLOT_NICE);
|
||||
if (jo.continueJumps && js.numInstructions < jo.continueMaxInstructions)
|
||||
{
|
||||
// Account for the increment in the loop.
|
||||
js.compilerPC = targetAddr - 4;
|
||||
// In case the delay slot was a break or something.
|
||||
js.compiling = true;
|
||||
return;
|
||||
}
|
||||
FlushAll();
|
||||
CONDITIONAL_LOG_EXIT(targetAddr);
|
||||
WriteExit(targetAddr, js.nextExit++);
|
||||
@ -597,19 +770,31 @@ void Jit::Comp_JumpReg(MIPSOpcode op)
|
||||
else if (delaySlotIsNice)
|
||||
{
|
||||
CompileDelaySlot(DELAYSLOT_NICE);
|
||||
MOV(32, R(EAX), gpr.R(rs));
|
||||
|
||||
if (rs == MIPS_REG_RA && g_Config.bDiscardRegsOnJRRA) {
|
||||
// According to the MIPS ABI, there are some regs we don't need to preserve.
|
||||
// Let's discard them so we don't need to write them back.
|
||||
// NOTE: Not all games follow the MIPS ABI! Tekken 6, for example, will crash
|
||||
// with this enabled.
|
||||
gpr.DiscardRegContentsIfCached(MIPS_REG_COMPILER_SCRATCH);
|
||||
for (int i = MIPS_REG_A0; i <= MIPS_REG_T7; i++)
|
||||
gpr.DiscardRegContentsIfCached((MIPSGPReg)i);
|
||||
gpr.DiscardRegContentsIfCached(MIPS_REG_T8);
|
||||
gpr.DiscardRegContentsIfCached(MIPS_REG_T9);
|
||||
}
|
||||
|
||||
if (jo.continueJumps && gpr.IsImm(rs) && js.numInstructions < jo.continueMaxInstructions)
|
||||
{
|
||||
// Account for the increment in the loop.
|
||||
js.compilerPC = gpr.GetImm(rs) - 4;
|
||||
if ((op & 0x3f) == 9)
|
||||
gpr.SetImm(rd, js.compilerPC + 8);
|
||||
// In case the delay slot was a break or something.
|
||||
js.compiling = true;
|
||||
return;
|
||||
}
|
||||
|
||||
MOV(32, R(EAX), gpr.R(rs));
|
||||
FlushAll();
|
||||
}
|
||||
else
|
||||
@ -641,6 +826,7 @@ void Jit::Comp_JumpReg(MIPSOpcode op)
|
||||
|
||||
void Jit::Comp_Syscall(MIPSOpcode op)
|
||||
{
|
||||
// TODO: Maybe discard v0, v1, and some temps? Definitely at?
|
||||
FlushAll();
|
||||
|
||||
// If we're in a delay slot, this is off by one.
|
||||
|
@ -109,6 +109,17 @@ static void JitLogMiss(MIPSOpcode op)
|
||||
func(op);
|
||||
}
|
||||
|
||||
JitOptions::JitOptions()
|
||||
{
|
||||
enableBlocklink = true;
|
||||
// WARNING: These options don't work properly with cache clearing.
|
||||
// Need to find a smart way to handle before enabling.
|
||||
immBranches = false;
|
||||
continueBranches = false;
|
||||
continueJumps = false;
|
||||
continueMaxInstructions = 300;
|
||||
}
|
||||
|
||||
#ifdef _MSC_VER
|
||||
// JitBlockCache doesn't use this, just stores it.
|
||||
#pragma warning(disable:4355)
|
||||
@ -524,8 +535,8 @@ Jit::JitSafeMem::JitSafeMem(Jit *jit, MIPSGPReg raddr, s32 offset, u32 alignMask
|
||||
{
|
||||
// This makes it more instructions, so let's play it safe and say we need a far jump.
|
||||
far_ = !g_Config.bIgnoreBadMemAccess || !CBreakPoints::GetMemChecks().empty();
|
||||
if (jit_->gpr.IsImmediate(raddr_))
|
||||
iaddr_ = jit_->gpr.GetImmediate32(raddr_) + offset_;
|
||||
if (jit_->gpr.IsImm(raddr_))
|
||||
iaddr_ = jit_->gpr.GetImm(raddr_) + offset_;
|
||||
else
|
||||
iaddr_ = (u32) -1;
|
||||
|
||||
@ -590,9 +601,9 @@ bool Jit::JitSafeMem::PrepareRead(OpArg &src, int size)
|
||||
|
||||
OpArg Jit::JitSafeMem::NextFastAddress(int suboffset)
|
||||
{
|
||||
if (jit_->gpr.IsImmediate(raddr_))
|
||||
if (jit_->gpr.IsImm(raddr_))
|
||||
{
|
||||
u32 addr = (jit_->gpr.GetImmediate32(raddr_) + offset_ + suboffset) & alignMask_;
|
||||
u32 addr = (jit_->gpr.GetImm(raddr_) + offset_ + suboffset) & alignMask_;
|
||||
|
||||
#ifdef _M_IX86
|
||||
return M(Memory::base + (addr & Memory::MEMVIEW32_MASK));
|
||||
@ -747,7 +758,7 @@ void Jit::JitSafeMem::NextSlowRead(void *safeFunc, int suboffset)
|
||||
if (suboffset == 0)
|
||||
return;
|
||||
|
||||
if (jit_->gpr.IsImmediate(raddr_))
|
||||
if (jit_->gpr.IsImm(raddr_))
|
||||
{
|
||||
_dbg_assert_msg_(JIT, !Memory::IsValidAddress(iaddr_ + suboffset), "NextSlowRead() for an invalid immediate address?");
|
||||
|
||||
|
@ -39,19 +39,12 @@ u32 JitBreakpoint();
|
||||
|
||||
struct JitOptions
|
||||
{
|
||||
JitOptions()
|
||||
{
|
||||
enableBlocklink = true;
|
||||
// WARNING: These options don't work properly with cache clearing.
|
||||
// Need to find a smart way to handle before enabling.
|
||||
immBranches = false;
|
||||
continueBranches = false;
|
||||
continueMaxInstructions = 300;
|
||||
}
|
||||
JitOptions();
|
||||
|
||||
bool enableBlocklink;
|
||||
bool immBranches;
|
||||
bool continueBranches;
|
||||
bool continueJumps;
|
||||
int continueMaxInstructions;
|
||||
};
|
||||
|
||||
@ -212,12 +205,13 @@ private:
|
||||
void CallProtectedFunction(void *func, const u32 arg1, const u32 arg2, const u32 arg3);
|
||||
void CallProtectedFunction(void *func, const OpArg &arg1, const u32 arg2, const u32 arg3);
|
||||
|
||||
bool PredictTakeBranch(u32 targetAddr, bool likely);
|
||||
bool CanContinueBranch() {
|
||||
if (!jo.continueBranches || js.numInstructions >= jo.continueMaxInstructions) {
|
||||
return false;
|
||||
}
|
||||
// Need at least 2 exits left over.
|
||||
if (js.nextExit >= MAX_JIT_BLOCK_EXITS - 1) {
|
||||
if (js.nextExit >= MAX_JIT_BLOCK_EXITS - 2) {
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
|
@ -180,7 +180,7 @@ void GPRRegCache::DiscardRegContentsIfCached(MIPSGPReg preg) {
|
||||
}
|
||||
|
||||
|
||||
void GPRRegCache::SetImmediate32(MIPSGPReg preg, u32 immValue) {
|
||||
void GPRRegCache::SetImm(MIPSGPReg preg, u32 immValue) {
|
||||
// ZERO is always zero. Let's just make sure.
|
||||
if (preg == MIPS_REG_ZERO)
|
||||
immValue = 0;
|
||||
@ -190,15 +190,15 @@ void GPRRegCache::SetImmediate32(MIPSGPReg preg, u32 immValue) {
|
||||
regs[preg].location = Imm32(immValue);
|
||||
}
|
||||
|
||||
bool GPRRegCache::IsImmediate(MIPSGPReg preg) const {
|
||||
bool GPRRegCache::IsImm(MIPSGPReg preg) const {
|
||||
// Always say yes for ZERO, even if it's in a temp reg.
|
||||
if (preg == MIPS_REG_ZERO)
|
||||
return true;
|
||||
return regs[preg].location.IsImm();
|
||||
}
|
||||
|
||||
u32 GPRRegCache::GetImmediate32(MIPSGPReg preg) const {
|
||||
_dbg_assert_msg_(JIT, IsImmediate(preg), "Reg %d must be an immediate.", preg);
|
||||
u32 GPRRegCache::GetImm(MIPSGPReg preg) const {
|
||||
_dbg_assert_msg_(JIT, IsImm(preg), "Reg %d must be an immediate.", preg);
|
||||
// Always 0 for ZERO.
|
||||
if (preg == MIPS_REG_ZERO)
|
||||
return 0;
|
||||
|
@ -93,9 +93,9 @@ public:
|
||||
void UnlockAll();
|
||||
void UnlockAllX();
|
||||
|
||||
void SetImmediate32(MIPSGPReg preg, u32 immValue);
|
||||
bool IsImmediate(MIPSGPReg preg) const;
|
||||
u32 GetImmediate32(MIPSGPReg preg) const;
|
||||
void SetImm(MIPSGPReg preg, u32 immValue);
|
||||
bool IsImm(MIPSGPReg preg) const;
|
||||
u32 GetImm(MIPSGPReg preg) const;
|
||||
|
||||
void GetState(GPRRegCacheState &state) const;
|
||||
void RestoreState(const GPRRegCacheState state);
|
||||
|
@ -322,6 +322,7 @@ void JitCompareScreen::UpdateDisasm() {
|
||||
|
||||
// Alright. First generate the MIPS disassembly.
|
||||
|
||||
// TODO: Need a way to communicate branch continuing.
|
||||
for (u32 addr = block->originalAddress; addr <= block->originalAddress + block->originalSize * 4; addr += 4) {
|
||||
char temp[256];
|
||||
MIPSDisAsm(Memory::Read_Instruction(addr), addr, temp, true);
|
||||
|
Loading…
x
Reference in New Issue
Block a user