Merge pull request #4506 from unknownbrackets/jit-continuing

Further attempts at continuing jit blocks (disabled)
This commit is contained in:
Henrik Rydgård 2013-11-11 00:33:05 -08:00
commit 9afe69fa62
10 changed files with 440 additions and 132 deletions

View File

@ -65,6 +65,34 @@ void Jit::BranchRSRTComp(MIPSOpcode op, ArmGen::CCFlags cc, bool likely)
MIPSGPReg rs = _RS;
u32 targetAddr = js.compilerPC + offset + 4;
if (jo.immBranches && gpr.IsImm(rs) && gpr.IsImm(rt) && js.numInstructions < jo.continueMaxInstructions) {
// The cc flags are opposites: when NOT to take the branch.
bool skipBranch;
s32 rsImm = (s32)gpr.GetImm(rs);
s32 rtImm = (s32)gpr.GetImm(rt);
switch (cc) {
case CC_EQ: skipBranch = rsImm == rtImm; break;
case CC_NEQ: skipBranch = rsImm != rtImm; break;
default: skipBranch = false; _dbg_assert_msg_(JIT, false, "Bad cc flag in BranchRSRTComp().");
}
if (skipBranch) {
// Skip the delay slot if likely, otherwise it'll be the next instruction.
if (likely)
js.compilerPC += 4;
return;
}
// Branch taken. Always compile the delay slot, and then go to dest.
CompileDelaySlot(DELAYSLOT_NICE);
// Account for the increment in the loop.
js.compilerPC = targetAddr - 4;
// In case the delay slot was a break or something.
js.compiling = true;
return;
}
MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC+4);
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rt, rs);
CONDITIONAL_NICE_DELAYSLOT;
@ -129,6 +157,38 @@ void Jit::BranchRSZeroComp(MIPSOpcode op, ArmGen::CCFlags cc, bool andLink, bool
MIPSGPReg rs = _RS;
u32 targetAddr = js.compilerPC + offset + 4;
if (jo.immBranches && gpr.IsImm(rs) && js.numInstructions < jo.continueMaxInstructions) {
// The cc flags are opposites: when NOT to take the branch.
bool skipBranch;
s32 imm = (s32)gpr.GetImm(rs);
switch (cc) {
case CC_GT: skipBranch = imm > 0; break;
case CC_GE: skipBranch = imm >= 0; break;
case CC_LT: skipBranch = imm < 0; break;
case CC_LE: skipBranch = imm <= 0; break;
default: skipBranch = false; _dbg_assert_msg_(JIT, false, "Bad cc flag in BranchRSZeroComp().");
}
if (skipBranch) {
// Skip the delay slot if likely, otherwise it'll be the next instruction.
if (likely)
js.compilerPC += 4;
return;
}
// Branch taken. Always compile the delay slot, and then go to dest.
CompileDelaySlot(DELAYSLOT_NICE);
if (andLink)
gpr.SetImm(MIPS_REG_RA, js.compilerPC + 8);
// Account for the increment in the loop.
js.compilerPC = targetAddr - 4;
// In case the delay slot was a break or something.
js.compiling = true;
return;
}
MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC + 4);
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs);
CONDITIONAL_NICE_DELAYSLOT;
@ -346,18 +406,42 @@ void Jit::Comp_Jump(MIPSOpcode op)
u32 off = _IMM26 << 2;
u32 targetAddr = (js.compilerPC & 0xF0000000) | off;
// Might be a stubbed address or something?
if (!Memory::IsValidAddress(targetAddr))
{
if (js.nextExit == 0)
ERROR_LOG_REPORT(JIT, "Jump to invalid address: %08x", targetAddr)
else
js.compiling = false;
// TODO: Mark this block dirty or something? May be indication it will be changed by imports.
return;
}
switch (op >> 26)
{
case 2: //j
CompileDelaySlot(DELAYSLOT_NICE);
if (jo.continueJumps && js.numInstructions < jo.continueMaxInstructions) {
// Account for the increment in the loop.
js.compilerPC = targetAddr - 4;
// In case the delay slot was a break or something.
js.compiling = true;
return;
}
FlushAll();
WriteExit(targetAddr, js.nextExit++);
break;
case 3: //jal
gpr.MapReg(MIPS_REG_RA, MAP_NOINIT | MAP_DIRTY);
gpr.SetRegImm(gpr.R(MIPS_REG_RA), js.compilerPC + 8);
gpr.SetImm(MIPS_REG_RA, js.compilerPC + 8);
CompileDelaySlot(DELAYSLOT_NICE);
if (jo.continueJumps && js.numInstructions < jo.continueMaxInstructions) {
// Account for the increment in the loop.
js.compilerPC = targetAddr - 4;
// In case the delay slot was a break or something.
js.compiling = true;
return;
}
FlushAll();
WriteExit(targetAddr, js.nextExit++);
break;
@ -392,8 +476,7 @@ void Jit::Comp_JumpReg(MIPSOpcode op)
return; // Syscall wrote exit code.
} else if (delaySlotIsNice) {
CompileDelaySlot(DELAYSLOT_NICE);
gpr.MapReg(rs);
destReg = gpr.R(rs); // Safe because FlushAll doesn't change any regs
if (rs == MIPS_REG_RA && g_Config.bDiscardRegsOnJRRA) {
// According to the MIPS ABI, there are some regs we don't need to preserve.
// Let's discard them so we don't need to write them back.
@ -405,6 +488,20 @@ void Jit::Comp_JumpReg(MIPSOpcode op)
gpr.DiscardR(MIPS_REG_T8);
gpr.DiscardR(MIPS_REG_T9);
}
if (jo.continueJumps && gpr.IsImm(rs) && js.numInstructions < jo.continueMaxInstructions) {
// Account for the increment in the loop.
js.compilerPC = gpr.GetImm(rs) - 4;
if ((op & 0x3f) == 9) {
gpr.SetImm(rd, js.compilerPC + 8);
}
// In case the delay slot was a break or something.
js.compiling = true;
return;
}
gpr.MapReg(rs);
destReg = gpr.R(rs); // Safe because FlushAll doesn't change any regs
FlushAll();
} else {
// Delay slot - this case is very rare, might be able to free up R8.
@ -434,13 +531,14 @@ void Jit::Comp_JumpReg(MIPSOpcode op)
void Jit::Comp_Syscall(MIPSOpcode op)
{
FlushAll();
// If we're in a delay slot, this is off by one.
const int offset = js.inDelaySlot ? -1 : 0;
WriteDownCount(offset);
js.downcountAmount = -offset;
// TODO: Maybe discard v0, v1, and some temps? Definitely at?
FlushAll();
SaveDowncount();
// Skip the CallSyscall where possible.
void *quickFunc = GetQuickSyscallFunc(op);

View File

@ -64,6 +64,12 @@ ArmJitOptions::ArmJitOptions()
useBackJump = false;
useForwardJump = false;
cachePointers = true;
// WARNING: These options don't work properly with cache clearing or jit compare.
// Need to find a smart way to handle before enabling.
immBranches = false;
continueBranches = false;
continueJumps = false;
continueMaxInstructions = 300;
}
Jit::Jit(MIPSState *mips) : blocks(mips, this), gpr(mips, &jo), fpr(mips), mips_(mips)
@ -154,6 +160,7 @@ void Jit::EatInstruction(MIPSOpcode op) {
ERROR_LOG_REPORT_ONCE(ateInDelaySlot, JIT, "Ate an instruction inside a delay slot.")
}
js.numInstructions++;
js.compilerPC += 4;
js.downcountAmount += MIPSGetInstructionCycleEstimate(op);
}
@ -256,10 +263,9 @@ const u8 *Jit::DoJit(u32 em_address, JitBlock *b)
gpr.Start(analysis);
fpr.Start(analysis);
int numInstructions = 0;
int cycles = 0;
int partialFlushOffset = 0;
js.numInstructions = 0;
while (js.compiling)
{
gpr.SetCompilerPC(js.compilerPC); // Let it know for log messages
@ -270,7 +276,7 @@ const u8 *Jit::DoJit(u32 em_address, JitBlock *b)
MIPSCompileOp(inst);
js.compilerPC += 4;
numInstructions++;
js.numInstructions++;
if (!cpu_info.bArmV7 && (GetCodePtr() - b->checkedEntry - partialFlushOffset) > 3200)
{
// We need to prematurely flush as we are out of range
@ -279,6 +285,14 @@ const u8 *Jit::DoJit(u32 em_address, JitBlock *b)
SetJumpTarget(skip);
partialFlushOffset = GetCodePtr() - b->checkedEntry;
}
// Safety check, in case we get a bunch of really large jit ops without a lot of branching.
if (GetSpaceLeft() < 0x800)
{
FlushAll();
WriteExit(js.compilerPC, js.nextExit++);
js.compiling = false;
}
}
if (jo.useForwardJump) {
@ -312,7 +326,7 @@ const u8 *Jit::DoJit(u32 em_address, JitBlock *b)
// Don't forget to zap the newly written instructions in the instruction cache!
FlushIcache();
b->originalSize = numInstructions;
b->originalSize = js.numInstructions;
return b->normalEntry;
}

View File

@ -41,6 +41,10 @@ struct ArmJitOptions
bool useBackJump;
bool useForwardJump;
bool cachePointers;
bool immBranches;
bool continueBranches;
bool continueJumps;
int continueMaxInstructions;
};
class Jit : public ArmGen::ARMXCodeBlock

View File

@ -75,9 +75,9 @@ namespace MIPSComp
case 8: // same as addiu?
case 9: // R(rt) = R(rs) + simm; break; //addiu
{
if (gpr.IsImmediate(rs))
if (gpr.IsImm(rs))
{
gpr.SetImmediate32(rt, gpr.GetImmediate32(rs) + simm);
gpr.SetImm(rt, gpr.GetImm(rs) + simm);
break;
}
@ -97,9 +97,9 @@ namespace MIPSComp
case 10: // R(rt) = (s32)R(rs) < simm; break; //slti
// There's a mips compiler out there asking questions it already knows the answer to...
if (gpr.IsImmediate(rs))
if (gpr.IsImm(rs))
{
gpr.SetImmediate32(rt, (s32)gpr.GetImmediate32(rs) < simm);
gpr.SetImm(rt, (s32)gpr.GetImm(rs) < simm);
break;
}
@ -114,9 +114,9 @@ namespace MIPSComp
break;
case 11: // R(rt) = R(rs) < uimm; break; //sltiu
if (gpr.IsImmediate(rs))
if (gpr.IsImm(rs))
{
gpr.SetImmediate32(rt, gpr.GetImmediate32(rs) < uimm);
gpr.SetImm(rt, gpr.GetImm(rs) < uimm);
break;
}
@ -132,29 +132,29 @@ namespace MIPSComp
case 12: // R(rt) = R(rs) & uimm; break; //andi
if (uimm == 0)
gpr.SetImmediate32(rt, 0);
else if (gpr.IsImmediate(rs))
gpr.SetImmediate32(rt, gpr.GetImmediate32(rs) & uimm);
gpr.SetImm(rt, 0);
else if (gpr.IsImm(rs))
gpr.SetImm(rt, gpr.GetImm(rs) & uimm);
else
CompImmLogic(op, &XEmitter::AND);
break;
case 13: // R(rt) = R(rs) | uimm; break; //ori
if (gpr.IsImmediate(rs))
gpr.SetImmediate32(rt, gpr.GetImmediate32(rs) | uimm);
if (gpr.IsImm(rs))
gpr.SetImm(rt, gpr.GetImm(rs) | uimm);
else
CompImmLogic(op, &XEmitter::OR);
break;
case 14: // R(rt) = R(rs) ^ uimm; break; //xori
if (gpr.IsImmediate(rs))
gpr.SetImmediate32(rt, gpr.GetImmediate32(rs) ^ uimm);
if (gpr.IsImm(rs))
gpr.SetImm(rt, gpr.GetImm(rs) ^ uimm);
else
CompImmLogic(op, &XEmitter::XOR);
break;
case 15: //R(rt) = uimm << 16; break; //lui
gpr.SetImmediate32(rt, uimm << 16);
gpr.SetImm(rt, uimm << 16);
break;
default:
@ -176,9 +176,9 @@ namespace MIPSComp
switch (op & 63)
{
case 22: //clz
if (gpr.IsImmediate(rs))
if (gpr.IsImm(rs))
{
u32 value = gpr.GetImmediate32(rs);
u32 value = gpr.GetImm(rs);
int x = 31;
int count = 0;
while (!(value & (1 << x)) && x >= 0)
@ -186,7 +186,7 @@ namespace MIPSComp
count++;
x--;
}
gpr.SetImmediate32(rd, count);
gpr.SetImm(rd, count);
}
else
{
@ -207,9 +207,9 @@ namespace MIPSComp
}
break;
case 23: //clo
if (gpr.IsImmediate(rs))
if (gpr.IsImm(rs))
{
u32 value = gpr.GetImmediate32(rs);
u32 value = gpr.GetImm(rs);
int x = 31;
int count = 0;
while ((value & (1 << x)) && x >= 0)
@ -217,7 +217,7 @@ namespace MIPSComp
count++;
x--;
}
gpr.SetImmediate32(rd, count);
gpr.SetImm(rd, count);
}
else
{
@ -277,16 +277,16 @@ namespace MIPSComp
MIPSGPReg rd = _RD;
// Yes, this happens. Let's make it fast.
if (doImm && gpr.IsImmediate(rs) && gpr.IsImmediate(rt))
if (doImm && gpr.IsImm(rs) && gpr.IsImm(rt))
{
gpr.SetImmediate32(rd, doImm(gpr.GetImmediate32(rs), gpr.GetImmediate32(rt)));
gpr.SetImm(rd, doImm(gpr.GetImm(rs), gpr.GetImm(rt)));
return;
}
// Act like zero was used if the operand is equivalent. This happens.
if (gpr.IsImmediate(rs) && gpr.GetImmediate32(rs) == 0)
if (gpr.IsImm(rs) && gpr.GetImm(rs) == 0)
rs = MIPS_REG_ZERO;
if (gpr.IsImmediate(rt) && gpr.GetImmediate32(rt) == 0)
if (gpr.IsImm(rt) && gpr.GetImm(rt) == 0)
rt = MIPS_REG_ZERO;
gpr.Lock(rt, rs, rd);
@ -294,7 +294,7 @@ namespace MIPSComp
if (rt == MIPS_REG_ZERO || (rs == MIPS_REG_ZERO && doImm != &RType3_ImmSub))
{
if (doImm == &RType3_ImmAnd)
gpr.SetImmediate32(rd, 0);
gpr.SetImm(rd, 0);
else
{
MIPSGPReg rsource = rt == MIPS_REG_ZERO ? rs : rt;
@ -305,10 +305,10 @@ namespace MIPSComp
}
}
}
else if (gpr.IsImmediate(rt))
else if (gpr.IsImm(rt))
{
// No temporary needed.
u32 rtval = gpr.GetImmediate32(rt);
u32 rtval = gpr.GetImm(rt);
gpr.MapReg(rd, rs == rd, true);
if (rs != rd)
MOV(32, gpr.R(rd), gpr.R(rs));
@ -345,7 +345,7 @@ namespace MIPSComp
if (rd == rs)
break;
gpr.Lock(rt, rs, rd);
if (!gpr.IsImmediate(rt))
if (!gpr.IsImm(rt))
{
gpr.KillImmediate(rs, true, false);
// Need to load rd in case the condition fails.
@ -353,11 +353,11 @@ namespace MIPSComp
CMP(32, gpr.R(rt), Imm32(0));
CMOVcc(32, gpr.RX(rd), gpr.R(rs), CC_E);
}
else if (gpr.GetImmediate32(rt) == 0)
else if (gpr.GetImm(rt) == 0)
{
// Yes, this actually happens.
if (gpr.IsImmediate(rs))
gpr.SetImmediate32(rd, gpr.GetImmediate32(rs));
if (gpr.IsImm(rs))
gpr.SetImm(rd, gpr.GetImm(rs));
else if (rd != rs)
{
gpr.MapReg(rd, false, true);
@ -371,7 +371,7 @@ namespace MIPSComp
if (rd == rs)
break;
gpr.Lock(rt, rs, rd);
if (!gpr.IsImmediate(rt))
if (!gpr.IsImm(rt))
{
gpr.KillImmediate(rs, true, false);
// Need to load rd in case the condition fails.
@ -379,10 +379,10 @@ namespace MIPSComp
CMP(32, gpr.R(rt), Imm32(0));
CMOVcc(32, gpr.RX(rd), gpr.R(rs), CC_NE);
}
else if (gpr.GetImmediate32(rt) != 0)
else if (gpr.GetImm(rt) != 0)
{
if (gpr.IsImmediate(rs))
gpr.SetImmediate32(rd, gpr.GetImmediate32(rs));
if (gpr.IsImm(rs))
gpr.SetImm(rd, gpr.GetImm(rs));
else if (rd != rs)
{
gpr.MapReg(rd, false, true);
@ -412,15 +412,15 @@ namespace MIPSComp
case 39: // R(rd) = ~(R(rs) | R(rt)); //nor
CompTriArith(op, &XEmitter::OR, &RType3_ImmOr);
if (gpr.IsImmediate(rd))
gpr.SetImmediate32(rd, ~gpr.GetImmediate32(rd));
if (gpr.IsImm(rd))
gpr.SetImm(rd, ~gpr.GetImm(rd));
else
NOT(32, gpr.R(rd));
break;
case 42: //R(rd) = (int)R(rs) < (int)R(rt); break; //slt
if (gpr.IsImmediate(rs) && gpr.IsImmediate(rt))
gpr.SetImmediate32(rd, (s32)gpr.GetImmediate32(rs) < (s32)gpr.GetImmediate32(rt));
if (gpr.IsImm(rs) && gpr.IsImm(rt))
gpr.SetImm(rd, (s32)gpr.GetImm(rs) < (s32)gpr.GetImm(rt));
else
{
gpr.Lock(rt, rs, rd);
@ -435,8 +435,8 @@ namespace MIPSComp
break;
case 43: //R(rd) = R(rs) < R(rt); break; //sltu
if (gpr.IsImmediate(rs) && gpr.IsImmediate(rt))
gpr.SetImmediate32(rd, gpr.GetImmediate32(rs) < gpr.GetImmediate32(rt));
if (gpr.IsImm(rs) && gpr.IsImm(rt))
gpr.SetImm(rd, gpr.GetImm(rs) < gpr.GetImm(rt));
else
{
gpr.Lock(rd, rs, rt);
@ -451,8 +451,8 @@ namespace MIPSComp
break;
case 44: //R(rd) = (R(rs) > R(rt)) ? R(rs) : R(rt); break; //max
if (gpr.IsImmediate(rs) && gpr.IsImmediate(rt))
gpr.SetImmediate32(rd, std::max((s32)gpr.GetImmediate32(rs), (s32)gpr.GetImmediate32(rt)));
if (gpr.IsImm(rs) && gpr.IsImm(rt))
gpr.SetImm(rd, std::max((s32)gpr.GetImm(rs), (s32)gpr.GetImm(rt)));
else
{
MIPSGPReg rsrc = rd == rt ? rs : rt;
@ -468,8 +468,8 @@ namespace MIPSComp
break;
case 45: //R(rd) = (R(rs) < R(rt)) ? R(rs) : R(rt); break; //min
if (gpr.IsImmediate(rs) && gpr.IsImmediate(rt))
gpr.SetImmediate32(rd, std::min((s32)gpr.GetImmediate32(rs), (s32)gpr.GetImmediate32(rt)));
if (gpr.IsImm(rs) && gpr.IsImm(rt))
gpr.SetImm(rd, std::min((s32)gpr.GetImm(rs), (s32)gpr.GetImm(rt)));
else
{
MIPSGPReg rsrc = rd == rt ? rs : rt;
@ -517,9 +517,9 @@ namespace MIPSComp
MIPSGPReg rt = _RT;
int sa = _SA;
if (doImm && gpr.IsImmediate(rt))
if (doImm && gpr.IsImm(rt))
{
gpr.SetImmediate32(rd, doImm(gpr.GetImmediate32(rt), sa));
gpr.SetImm(rd, doImm(gpr.GetImm(rt), sa));
return;
}
@ -538,16 +538,16 @@ namespace MIPSComp
MIPSGPReg rt = _RT;
MIPSGPReg rs = _RS;
if (doImm && gpr.IsImmediate(rs) && gpr.IsImmediate(rt))
if (doImm && gpr.IsImm(rs) && gpr.IsImm(rt))
{
gpr.SetImmediate32(rd, doImm(gpr.GetImmediate32(rt), gpr.GetImmediate32(rs)));
gpr.SetImm(rd, doImm(gpr.GetImm(rt), gpr.GetImm(rs)));
return;
}
gpr.Lock(rd, rt, rs);
if (gpr.IsImmediate(rs))
if (gpr.IsImm(rs))
{
int sa = gpr.GetImmediate32(rs);
int sa = gpr.GetImm(rs);
gpr.MapReg(rd, rd == rt, true);
if (rd != rt)
MOV(32, gpr.R(rd), gpr.R(rt));
@ -612,9 +612,9 @@ namespace MIPSComp
switch (op & 0x3f)
{
case 0x0: //ext
if (gpr.IsImmediate(rs))
if (gpr.IsImm(rs))
{
gpr.SetImmediate32(rt, (gpr.GetImmediate32(rs) >> pos) & mask);
gpr.SetImm(rt, (gpr.GetImm(rs) >> pos) & mask);
return;
}
@ -631,12 +631,12 @@ namespace MIPSComp
{
u32 sourcemask = mask >> pos;
u32 destmask = ~(sourcemask << pos);
if (gpr.IsImmediate(rs))
if (gpr.IsImm(rs))
{
u32 inserted = (gpr.GetImmediate32(rs) & sourcemask) << pos;
if (gpr.IsImmediate(rt))
u32 inserted = (gpr.GetImm(rs) & sourcemask) << pos;
if (gpr.IsImm(rt))
{
gpr.SetImmediate32(rt, (gpr.GetImmediate32(rt) & destmask) | inserted);
gpr.SetImm(rt, (gpr.GetImm(rt) & destmask) | inserted);
return;
}
@ -675,9 +675,9 @@ namespace MIPSComp
switch ((op >> 6) & 31)
{
case 16: // seb // R(rd) = (u32)(s32)(s8)(u8)R(rt);
if (gpr.IsImmediate(rt))
if (gpr.IsImm(rt))
{
gpr.SetImmediate32(rd, (u32)(s32)(s8)(u8)gpr.GetImmediate32(rt));
gpr.SetImm(rd, (u32)(s32)(s8)(u8)gpr.GetImm(rt));
break;
}
@ -700,10 +700,10 @@ namespace MIPSComp
break;
case 20: //bitrev
if (gpr.IsImmediate(rt))
if (gpr.IsImm(rt))
{
// http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel
u32 v = gpr.GetImmediate32(rt);
u32 v = gpr.GetImm(rt);
// swap odd and even bits
v = ((v >> 1) & 0x55555555) | ((v & 0x55555555) << 1);
// swap consecutive pairs
@ -714,7 +714,7 @@ namespace MIPSComp
v = ((v >> 8) & 0x00FF00FF) | ((v & 0x00FF00FF) << 8);
// swap 2-byte long pairs
v = ( v >> 16 ) | ( v << 16);
gpr.SetImmediate32(rd, v);
gpr.SetImm(rd, v);
break;
}
@ -755,9 +755,9 @@ namespace MIPSComp
break;
case 24: // seh // R(rd) = (u32)(s32)(s16)(u16)R(rt);
if (gpr.IsImmediate(rt))
if (gpr.IsImm(rt))
{
gpr.SetImmediate32(rd, (u32)(s32)(s16)(u16)gpr.GetImmediate32(rt));
gpr.SetImm(rd, (u32)(s32)(s16)(u16)gpr.GetImm(rt));
break;
}
@ -785,9 +785,9 @@ namespace MIPSComp
switch (op & 0x3ff)
{
case 0xA0: //wsbh
if (gpr.IsImmediate(rt)) {
u32 rtImm = gpr.GetImmediate32(rt);
gpr.SetImmediate32(rd, ((rtImm & 0xFF00FF00) >> 8) | ((rtImm & 0x00FF00FF) << 8));
if (gpr.IsImm(rt)) {
u32 rtImm = gpr.GetImm(rt);
gpr.SetImm(rd, ((rtImm & 0xFF00FF00) >> 8) | ((rtImm & 0x00FF00FF) << 8));
break;
}
gpr.Lock(rd, rt);
@ -800,8 +800,8 @@ namespace MIPSComp
gpr.UnlockAll();
break;
case 0xE0: //wsbw
if (gpr.IsImmediate(rt)) {
gpr.SetImmediate32(rd, swap32(gpr.GetImmediate32(rt)));
if (gpr.IsImm(rt)) {
gpr.SetImm(rd, swap32(gpr.GetImm(rt)));
break;
}
gpr.Lock(rd, rt);

View File

@ -129,6 +129,42 @@ void Jit::BranchLogExit(MIPSOpcode op, u32 dest, bool useEAX)
SetJumpTarget(skip);
}
static CCFlags FlipCCFlag(CCFlags flag)
{
switch (flag)
{
case CC_O: return CC_NO;
case CC_NO: return CC_O;
case CC_B: return CC_NB;
case CC_NB: return CC_B;
case CC_Z: return CC_NZ;
case CC_NZ: return CC_Z;
case CC_BE: return CC_NBE;
case CC_NBE: return CC_BE;
case CC_S: return CC_NS;
case CC_NS: return CC_S;
case CC_P: return CC_NP;
case CC_NP: return CC_P;
case CC_L: return CC_NL;
case CC_NL: return CC_L;
case CC_LE: return CC_NLE;
case CC_NLE: return CC_LE;
}
ERROR_LOG_REPORT(JIT, false, "FlipCCFlag: Unexpected CC flag: %d", flag);
return CC_O;
}
bool Jit::PredictTakeBranch(u32 targetAddr, bool likely) {
// If it's likely, it's... probably likely, right?
if (likely)
return true;
// TODO: Normal branch prediction would be to take branches going upward to lower addresses.
// However, this results in worse performance as of this comment's writing.
// The reverse check generally gives better or same performance.
return targetAddr > js.compilerPC;
}
void Jit::BranchRSRTComp(MIPSOpcode op, Gen::CCFlags cc, bool likely)
{
CONDITIONAL_LOG;
@ -141,16 +177,12 @@ void Jit::BranchRSRTComp(MIPSOpcode op, Gen::CCFlags cc, bool likely)
MIPSGPReg rs = _RS;
u32 targetAddr = js.compilerPC + offset + 4;
MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC+4);
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rt, rs);
CONDITIONAL_NICE_DELAYSLOT;
if (jo.immBranches && gpr.IsImmediate(rs) && gpr.IsImmediate(rt) && js.numInstructions < jo.continueMaxInstructions)
if (jo.immBranches && gpr.IsImm(rs) && gpr.IsImm(rt) && js.numInstructions < jo.continueMaxInstructions)
{
// The cc flags are opposites: when NOT to take the branch.
bool skipBranch;
s32 rsImm = (s32)gpr.GetImmediate32(rs);
s32 rtImm = (s32)gpr.GetImmediate32(rt);
s32 rsImm = (s32)gpr.GetImm(rs);
s32 rtImm = (s32)gpr.GetImm(rt);
switch (cc)
{
@ -176,10 +208,13 @@ void Jit::BranchRSRTComp(MIPSOpcode op, Gen::CCFlags cc, bool likely)
return;
}
MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC+4);
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rt, rs);
CONDITIONAL_NICE_DELAYSLOT;
if (!likely && delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_NICE);
if (gpr.IsImmediate(rt) && gpr.GetImmediate32(rt) == 0)
if (gpr.IsImm(rt) && gpr.GetImm(rt) == 0)
{
gpr.KillImmediate(rs, true, false);
CMP(32, gpr.R(rs), Imm32(0));
@ -190,6 +225,17 @@ void Jit::BranchRSRTComp(MIPSOpcode op, Gen::CCFlags cc, bool likely)
CMP(32, gpr.R(rs), gpr.R(rt));
}
// We may want to try to continue along this branch a little while, to reduce reg flushing.
bool predictTakeBranch = PredictTakeBranch(targetAddr, likely);
bool continueBranch = false;
// Likely delay slots may change regs, can't take those branches inline safely.
if (CanContinueBranch() && (!likely || !predictTakeBranch))
{
continueBranch = true;
if (predictTakeBranch)
cc = FlipCCFlag(cc);
}
Gen::FixupBranch ptr;
RegCacheState state;
if (!likely)
@ -206,15 +252,32 @@ void Jit::BranchRSRTComp(MIPSOpcode op, Gen::CCFlags cc, bool likely)
ptr = J_CC(cc, true);
CompileDelaySlot(DELAYSLOT_FLUSH);
}
if (continueBranch && predictTakeBranch)
{
// We flipped the cc, the not taken case is first.
CONDITIONAL_LOG_EXIT(js.compilerPC + 8);
WriteExit(js.compilerPC + 8, js.nextExit++);
// Now our taken path.
SetJumpTarget(ptr);
CONDITIONAL_LOG_EXIT(targetAddr);
// Account for the increment in the loop.
js.compilerPC = targetAddr - 4;
// In case the delay slot was a break or something.
js.compiling = true;
return;
}
// Take the branch
CONDITIONAL_LOG_EXIT(targetAddr);
WriteExit(targetAddr, js.nextExit++);
SetJumpTarget(ptr);
// Not taken
SetJumpTarget(ptr);
CONDITIONAL_LOG_EXIT(js.compilerPC + 8);
if (CanContinueBranch())
if (continueBranch && !predictTakeBranch)
{
// Account for the delay slot.
js.compilerPC += 4;
@ -240,15 +303,11 @@ void Jit::BranchRSZeroComp(MIPSOpcode op, Gen::CCFlags cc, bool andLink, bool li
MIPSGPReg rs = _RS;
u32 targetAddr = js.compilerPC + offset + 4;
MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC + 4);
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs);
CONDITIONAL_NICE_DELAYSLOT;
if (jo.immBranches && gpr.IsImmediate(rs) && js.numInstructions < jo.continueMaxInstructions)
if (jo.immBranches && gpr.IsImm(rs) && js.numInstructions < jo.continueMaxInstructions)
{
// The cc flags are opposites: when NOT to take the branch.
bool skipBranch;
s32 imm = (s32)gpr.GetImmediate32(rs);
s32 imm = (s32)gpr.GetImm(rs);
switch (cc)
{
@ -270,10 +329,8 @@ void Jit::BranchRSZeroComp(MIPSOpcode op, Gen::CCFlags cc, bool andLink, bool li
// Branch taken. Always compile the delay slot, and then go to dest.
CompileDelaySlot(DELAYSLOT_NICE);
if (andLink)
{
gpr.MapReg(MIPS_REG_RA, false, true);
MOV(32, gpr.R(MIPS_REG_RA), Imm32(js.compilerPC + 8));
}
gpr.SetImm(MIPS_REG_RA, js.compilerPC + 8);
// Account for the increment in the loop.
js.compilerPC = targetAddr - 4;
// In case the delay slot was a break or something.
@ -281,12 +338,26 @@ void Jit::BranchRSZeroComp(MIPSOpcode op, Gen::CCFlags cc, bool andLink, bool li
return;
}
MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC + 4);
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs);
CONDITIONAL_NICE_DELAYSLOT;
if (!likely && delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_NICE);
gpr.MapReg(rs, true, false);
CMP(32, gpr.R(rs), Imm32(0));
// We may want to try to continue along this branch a little while, to reduce reg flushing.
bool predictTakeBranch = PredictTakeBranch(targetAddr, likely);
bool continueBranch = false;
// Likely delay slots may change regs, can't take those branches inline safely.
if (CanContinueBranch() && (!likely || !predictTakeBranch))
{
continueBranch = true;
if (predictTakeBranch)
cc = FlipCCFlag(cc);
}
Gen::FixupBranch ptr;
RegCacheState state;
if (!likely)
@ -304,17 +375,35 @@ void Jit::BranchRSZeroComp(MIPSOpcode op, Gen::CCFlags cc, bool andLink, bool li
CompileDelaySlot(DELAYSLOT_FLUSH);
}
if (continueBranch && predictTakeBranch)
{
// We flipped the cc, the not taken case is first.
CONDITIONAL_LOG_EXIT(js.compilerPC + 8);
WriteExit(js.compilerPC + 8, js.nextExit++);
// Now our taken path.
SetJumpTarget(ptr);
if (andLink)
gpr.SetImm(MIPS_REG_RA, js.compilerPC + 8);
CONDITIONAL_LOG_EXIT(targetAddr);
// Account for the increment in the loop.
js.compilerPC = targetAddr - 4;
// In case the delay slot was a break or something.
js.compiling = true;
return;
}
// Take the branch
if (andLink)
MOV(32, M(&mips_->r[MIPS_REG_RA]), Imm32(js.compilerPC + 8));
CONDITIONAL_LOG_EXIT(targetAddr);
WriteExit(targetAddr, js.nextExit++);
SetJumpTarget(ptr);
// Not taken
SetJumpTarget(ptr);
CONDITIONAL_LOG_EXIT(js.compilerPC + 8);
if (CanContinueBranch())
if (continueBranch && !predictTakeBranch)
{
// Account for the delay slot.
js.compilerPC += 4;
@ -388,6 +477,17 @@ void Jit::BranchFPFlag(MIPSOpcode op, Gen::CCFlags cc, bool likely)
if (!likely && delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_NICE);
// We may want to try to continue along this branch a little while, to reduce reg flushing.
bool predictTakeBranch = PredictTakeBranch(targetAddr, likely);
bool continueBranch = false;
// Likely delay slots may change regs, can't take those branches inline safely.
if (CanContinueBranch() && (!likely || !predictTakeBranch))
{
continueBranch = true;
if (predictTakeBranch)
cc = FlipCCFlag(cc);
}
TEST(32, M((void *)&(mips_->fpcond)), Imm32(1));
Gen::FixupBranch ptr;
RegCacheState state;
@ -406,15 +506,31 @@ void Jit::BranchFPFlag(MIPSOpcode op, Gen::CCFlags cc, bool likely)
CompileDelaySlot(DELAYSLOT_FLUSH);
}
if (continueBranch && predictTakeBranch)
{
// We flipped the cc, the not taken case is first.
CONDITIONAL_LOG_EXIT(js.compilerPC + 8);
WriteExit(js.compilerPC + 8, js.nextExit++);
// Now our taken path.
SetJumpTarget(ptr);
CONDITIONAL_LOG_EXIT(targetAddr);
// Account for the increment in the loop.
js.compilerPC = targetAddr - 4;
// In case the delay slot was a break or something.
js.compiling = true;
return;
}
// Take the branch
CONDITIONAL_LOG_EXIT(targetAddr);
WriteExit(targetAddr, js.nextExit++);
SetJumpTarget(ptr);
// Not taken
SetJumpTarget(ptr);
CONDITIONAL_LOG_EXIT(js.compilerPC + 8);
if (CanContinueBranch())
if (continueBranch && !predictTakeBranch)
{
// Account for the delay slot.
js.compilerPC += 4;
@ -468,6 +584,18 @@ void Jit::BranchVFPUFlag(MIPSOpcode op, Gen::CCFlags cc, bool likely)
if (delaySlotIsBranch && (signed short)(delaySlotOp & 0xFFFF) != (signed short)(op & 0xFFFF) - 1)
ERROR_LOG_REPORT(JIT, "VFPU branch in VFPU delay slot at %08x with different target %d / %d", js.compilerPC, (signed short)(delaySlotOp & 0xFFFF), (signed short)(op & 0xFFFF) - 1);
// We may want to try to continue along this branch a little while, to reduce reg flushing.
bool predictTakeBranch = PredictTakeBranch(targetAddr, likely);
bool continueBranch = false;
// Likely delay slots may change regs, can't take those branches inline safely.
// TODO: Maybe delaySlotIsBranch could work, but let's play it safe.
if (CanContinueBranch() && (!likely || !predictTakeBranch) && !delaySlotIsBranch)
{
continueBranch = true;
if (predictTakeBranch)
cc = FlipCCFlag(cc);
}
// THE CONDITION
int imm3 = (op >> 18) & 7;
@ -491,19 +619,37 @@ void Jit::BranchVFPUFlag(MIPSOpcode op, Gen::CCFlags cc, bool likely)
CompileDelaySlot(DELAYSLOT_FLUSH);
}
u32 notTakenTarget = js.compilerPC + (delaySlotIsBranch ? 4 : 8);
if (continueBranch && predictTakeBranch)
{
// We flipped the cc, the not taken case is first.
CONDITIONAL_LOG_EXIT(notTakenTarget);
WriteExit(notTakenTarget, js.nextExit++);
// Now our taken path.
SetJumpTarget(ptr);
CONDITIONAL_LOG_EXIT(targetAddr);
// Account for the increment in the loop.
js.compilerPC = targetAddr - 4;
// In case the delay slot was a break or something.
js.compiling = true;
return;
}
// Take the branch
CONDITIONAL_LOG_EXIT(targetAddr);
WriteExit(targetAddr, js.nextExit++);
SetJumpTarget(ptr);
// Not taken
u32 notTakenTarget = js.compilerPC + (delaySlotIsBranch ? 4 : 8);
SetJumpTarget(ptr);
CONDITIONAL_LOG_EXIT(notTakenTarget);
if (CanContinueBranch() && !delaySlotIsBranch)
if (continueBranch && !predictTakeBranch)
{
// Account for the delay slot.
js.compilerPC += 4;
if (!delaySlotIsBranch)
js.compilerPC += 4;
RestoreState(state);
// In case the delay slot was a break or something.
js.compiling = true;
@ -540,19 +686,46 @@ void Jit::Comp_Jump(MIPSOpcode op)
u32 off = _IMM26 << 2;
u32 targetAddr = (js.compilerPC & 0xF0000000) | off;
// Might be a stubbed address or something?
if (!Memory::IsValidAddress(targetAddr))
{
if (js.nextExit == 0)
ERROR_LOG_REPORT(JIT, "Jump to invalid address: %08x", targetAddr)
else
js.compiling = false;
// TODO: Mark this block dirty or something? May be indication it will be changed by imports.
return;
}
switch (op >> 26)
{
case 2: //j
CompileDelaySlot(DELAYSLOT_NICE);
if (jo.continueJumps && js.numInstructions < jo.continueMaxInstructions)
{
// Account for the increment in the loop.
js.compilerPC = targetAddr - 4;
// In case the delay slot was a break or something.
js.compiling = true;
return;
}
FlushAll();
CONDITIONAL_LOG_EXIT(targetAddr);
WriteExit(targetAddr, js.nextExit++);
break;
case 3: //jal
gpr.MapReg(MIPS_REG_RA, false, true);
MOV(32, gpr.R(MIPS_REG_RA), Imm32(js.compilerPC + 8)); // Save return address
// Save return address - might be overwritten by delay slot.
gpr.SetImm(MIPS_REG_RA, js.compilerPC + 8);
CompileDelaySlot(DELAYSLOT_NICE);
if (jo.continueJumps && js.numInstructions < jo.continueMaxInstructions)
{
// Account for the increment in the loop.
js.compilerPC = targetAddr - 4;
// In case the delay slot was a break or something.
js.compiling = true;
return;
}
FlushAll();
CONDITIONAL_LOG_EXIT(targetAddr);
WriteExit(targetAddr, js.nextExit++);
@ -597,19 +770,31 @@ void Jit::Comp_JumpReg(MIPSOpcode op)
else if (delaySlotIsNice)
{
CompileDelaySlot(DELAYSLOT_NICE);
MOV(32, R(EAX), gpr.R(rs));
if (rs == MIPS_REG_RA && g_Config.bDiscardRegsOnJRRA) {
// According to the MIPS ABI, there are some regs we don't need to preserve.
// Let's discard them so we don't need to write them back.
// NOTE: Not all games follow the MIPS ABI! Tekken 6, for example, will crash
// with this enabled.
gpr.DiscardRegContentsIfCached(MIPS_REG_COMPILER_SCRATCH);
for (int i = MIPS_REG_A0; i <= MIPS_REG_T7; i++)
gpr.DiscardRegContentsIfCached((MIPSGPReg)i);
gpr.DiscardRegContentsIfCached(MIPS_REG_T8);
gpr.DiscardRegContentsIfCached(MIPS_REG_T9);
}
if (jo.continueJumps && gpr.IsImm(rs) && js.numInstructions < jo.continueMaxInstructions)
{
// Account for the increment in the loop.
js.compilerPC = gpr.GetImm(rs) - 4;
if ((op & 0x3f) == 9)
gpr.SetImm(rd, js.compilerPC + 8);
// In case the delay slot was a break or something.
js.compiling = true;
return;
}
MOV(32, R(EAX), gpr.R(rs));
FlushAll();
}
else
@ -641,6 +826,7 @@ void Jit::Comp_JumpReg(MIPSOpcode op)
void Jit::Comp_Syscall(MIPSOpcode op)
{
// TODO: Maybe discard v0, v1, and some temps? Definitely at?
FlushAll();
// If we're in a delay slot, this is off by one.

View File

@ -109,6 +109,17 @@ static void JitLogMiss(MIPSOpcode op)
func(op);
}
JitOptions::JitOptions()
{
enableBlocklink = true;
// WARNING: These options don't work properly with cache clearing.
// Need to find a smart way to handle before enabling.
immBranches = false;
continueBranches = false;
continueJumps = false;
continueMaxInstructions = 300;
}
#ifdef _MSC_VER
// JitBlockCache doesn't use this, just stores it.
#pragma warning(disable:4355)
@ -524,8 +535,8 @@ Jit::JitSafeMem::JitSafeMem(Jit *jit, MIPSGPReg raddr, s32 offset, u32 alignMask
{
// This makes it more instructions, so let's play it safe and say we need a far jump.
far_ = !g_Config.bIgnoreBadMemAccess || !CBreakPoints::GetMemChecks().empty();
if (jit_->gpr.IsImmediate(raddr_))
iaddr_ = jit_->gpr.GetImmediate32(raddr_) + offset_;
if (jit_->gpr.IsImm(raddr_))
iaddr_ = jit_->gpr.GetImm(raddr_) + offset_;
else
iaddr_ = (u32) -1;
@ -590,9 +601,9 @@ bool Jit::JitSafeMem::PrepareRead(OpArg &src, int size)
OpArg Jit::JitSafeMem::NextFastAddress(int suboffset)
{
if (jit_->gpr.IsImmediate(raddr_))
if (jit_->gpr.IsImm(raddr_))
{
u32 addr = (jit_->gpr.GetImmediate32(raddr_) + offset_ + suboffset) & alignMask_;
u32 addr = (jit_->gpr.GetImm(raddr_) + offset_ + suboffset) & alignMask_;
#ifdef _M_IX86
return M(Memory::base + (addr & Memory::MEMVIEW32_MASK));
@ -747,7 +758,7 @@ void Jit::JitSafeMem::NextSlowRead(void *safeFunc, int suboffset)
if (suboffset == 0)
return;
if (jit_->gpr.IsImmediate(raddr_))
if (jit_->gpr.IsImm(raddr_))
{
_dbg_assert_msg_(JIT, !Memory::IsValidAddress(iaddr_ + suboffset), "NextSlowRead() for an invalid immediate address?");

View File

@ -39,19 +39,12 @@ u32 JitBreakpoint();
struct JitOptions
{
JitOptions()
{
enableBlocklink = true;
// WARNING: These options don't work properly with cache clearing.
// Need to find a smart way to handle before enabling.
immBranches = false;
continueBranches = false;
continueMaxInstructions = 300;
}
JitOptions();
bool enableBlocklink;
bool immBranches;
bool continueBranches;
bool continueJumps;
int continueMaxInstructions;
};
@ -212,12 +205,13 @@ private:
void CallProtectedFunction(void *func, const u32 arg1, const u32 arg2, const u32 arg3);
void CallProtectedFunction(void *func, const OpArg &arg1, const u32 arg2, const u32 arg3);
bool PredictTakeBranch(u32 targetAddr, bool likely);
bool CanContinueBranch() {
if (!jo.continueBranches || js.numInstructions >= jo.continueMaxInstructions) {
return false;
}
// Need at least 2 exits left over.
if (js.nextExit >= MAX_JIT_BLOCK_EXITS - 1) {
if (js.nextExit >= MAX_JIT_BLOCK_EXITS - 2) {
return false;
}
return true;

View File

@ -180,7 +180,7 @@ void GPRRegCache::DiscardRegContentsIfCached(MIPSGPReg preg) {
}
void GPRRegCache::SetImmediate32(MIPSGPReg preg, u32 immValue) {
void GPRRegCache::SetImm(MIPSGPReg preg, u32 immValue) {
// ZERO is always zero. Let's just make sure.
if (preg == MIPS_REG_ZERO)
immValue = 0;
@ -190,15 +190,15 @@ void GPRRegCache::SetImmediate32(MIPSGPReg preg, u32 immValue) {
regs[preg].location = Imm32(immValue);
}
bool GPRRegCache::IsImmediate(MIPSGPReg preg) const {
bool GPRRegCache::IsImm(MIPSGPReg preg) const {
// Always say yes for ZERO, even if it's in a temp reg.
if (preg == MIPS_REG_ZERO)
return true;
return regs[preg].location.IsImm();
}
u32 GPRRegCache::GetImmediate32(MIPSGPReg preg) const {
_dbg_assert_msg_(JIT, IsImmediate(preg), "Reg %d must be an immediate.", preg);
u32 GPRRegCache::GetImm(MIPSGPReg preg) const {
_dbg_assert_msg_(JIT, IsImm(preg), "Reg %d must be an immediate.", preg);
// Always 0 for ZERO.
if (preg == MIPS_REG_ZERO)
return 0;

View File

@ -93,9 +93,9 @@ public:
void UnlockAll();
void UnlockAllX();
void SetImmediate32(MIPSGPReg preg, u32 immValue);
bool IsImmediate(MIPSGPReg preg) const;
u32 GetImmediate32(MIPSGPReg preg) const;
void SetImm(MIPSGPReg preg, u32 immValue);
bool IsImm(MIPSGPReg preg) const;
u32 GetImm(MIPSGPReg preg) const;
void GetState(GPRRegCacheState &state) const;
void RestoreState(const GPRRegCacheState state);

View File

@ -322,6 +322,7 @@ void JitCompareScreen::UpdateDisasm() {
// Alright. First generate the MIPS disassembly.
// TODO: Need a way to communicate branch continuing.
for (u32 addr = block->originalAddress; addr <= block->originalAddress + block->originalSize * 4; addr += 4) {
char temp[256];
MIPSDisAsm(Memory::Read_Instruction(addr), addr, temp, true);