jit: Statically jump for fixed branches.

This handles both loops (first step is known) and static branches (some
code uses them instead of jumps, and we disassemble that to "b".)

Not likely to be a big improvement, but might help if the branch predictor
was wrong.

This is as opposed to continuing, which would build a larger jit block.
This commit is contained in:
Unknown W. Brackets 2014-10-12 12:37:54 -07:00
parent 9228ac72da
commit 2f598e8f38
3 changed files with 156 additions and 94 deletions

View File

@ -67,9 +67,10 @@ void Jit::BranchRSRTComp(MIPSOpcode op, ArmGen::CCFlags cc, bool likely)
u32 targetAddr = js.compilerPC + offset + 4;
bool immBranch = false;
bool immBranchNotTaken = false;
bool immBranchTaken = false;
if (gpr.IsImm(rs) && gpr.IsImm(rt)) {
// The cc flags are opposites: when NOT to take the branch.
bool immBranchNotTaken;
s32 rsImm = (s32)gpr.GetImm(rs);
s32 rtImm = (s32)gpr.GetImm(rt);
@ -80,10 +81,11 @@ void Jit::BranchRSRTComp(MIPSOpcode op, ArmGen::CCFlags cc, bool likely)
default: immBranchNotTaken = false; _dbg_assert_msg_(JIT, false, "Bad cc flag in BranchRSRTComp().");
}
immBranch = true;
immBranchTaken = !immBranchNotTaken;
}
if (jo.immBranches && immBranch && js.numInstructions < jo.continueMaxInstructions) {
if (immBranchNotTaken) {
if (!immBranchTaken) {
// Skip the delay slot if likely, otherwise it'll be the next instruction.
if (likely)
js.compilerPC += 4;
@ -102,53 +104,65 @@ void Jit::BranchRSRTComp(MIPSOpcode op, ArmGen::CCFlags cc, bool likely)
MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC+4);
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rt, rs);
CONDITIONAL_NICE_DELAYSLOT;
if (!likely && delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_NICE);
// We might be able to flip the condition (EQ/NEQ are easy.)
const bool canFlip = cc == CC_EQ || cc == CC_NEQ;
Operand2 op2;
bool negated;
if (gpr.IsImm(rt) && TryMakeOperand2_AllowNegation(gpr.GetImm(rt), op2, &negated)) {
gpr.MapReg(rs);
if (!negated)
CMP(gpr.R(rs), op2);
else
CMN(gpr.R(rs), op2);
} else {
if (gpr.IsImm(rs) && TryMakeOperand2_AllowNegation(gpr.GetImm(rs), op2, &negated) && canFlip) {
gpr.MapReg(rt);
if (!negated)
CMP(gpr.R(rt), op2);
else
CMN(gpr.R(rt), op2);
} else {
gpr.MapInIn(rs, rt);
CMP(gpr.R(rs), gpr.R(rt));
}
}
ArmGen::FixupBranch ptr;
if (!likely) {
if (!delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
if (immBranch) {
// Continuing is handled above, this is just static jumping.
if (immBranchTaken || !likely)
CompileDelaySlot(DELAYSLOT_FLUSH);
else
FlushAll();
ptr = B_CC(cc);
const u32 destAddr = immBranchTaken ? targetAddr : js.compilerPC + 8;
WriteExit(destAddr, js.nextExit++);
} else {
FlushAll();
ptr = B_CC(cc);
CompileDelaySlot(DELAYSLOT_FLUSH);
if (!likely && delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_NICE);
// We might be able to flip the condition (EQ/NEQ are easy.)
const bool canFlip = cc == CC_EQ || cc == CC_NEQ;
Operand2 op2;
bool negated;
if (gpr.IsImm(rt) && TryMakeOperand2_AllowNegation(gpr.GetImm(rt), op2, &negated)) {
gpr.MapReg(rs);
if (!negated)
CMP(gpr.R(rs), op2);
else
CMN(gpr.R(rs), op2);
} else {
if (gpr.IsImm(rs) && TryMakeOperand2_AllowNegation(gpr.GetImm(rs), op2, &negated) && canFlip) {
gpr.MapReg(rt);
if (!negated)
CMP(gpr.R(rt), op2);
else
CMN(gpr.R(rt), op2);
} else {
gpr.MapInIn(rs, rt);
CMP(gpr.R(rs), gpr.R(rt));
}
}
ArmGen::FixupBranch ptr;
if (!likely) {
if (!delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
else
FlushAll();
ptr = B_CC(cc);
} else {
FlushAll();
ptr = B_CC(cc);
CompileDelaySlot(DELAYSLOT_FLUSH);
}
// Take the branch
WriteExit(targetAddr, js.nextExit++);
SetJumpTarget(ptr);
// Not taken
WriteExit(js.compilerPC + 8, js.nextExit++);
}
// Take the branch
WriteExit(targetAddr, js.nextExit++);
SetJumpTarget(ptr);
// Not taken
WriteExit(js.compilerPC+8, js.nextExit++);
js.compiling = false;
}
@ -164,9 +178,10 @@ void Jit::BranchRSZeroComp(MIPSOpcode op, ArmGen::CCFlags cc, bool andLink, bool
u32 targetAddr = js.compilerPC + offset + 4;
bool immBranch = false;
bool immBranchNotTaken = false;
bool immBranchTaken = false;
if (gpr.IsImm(rs)) {
// The cc flags are opposites: when NOT to take the branch.
bool immBranchNotTaken;
s32 imm = (s32)gpr.GetImm(rs);
switch (cc)
@ -178,10 +193,11 @@ void Jit::BranchRSZeroComp(MIPSOpcode op, ArmGen::CCFlags cc, bool andLink, bool
default: immBranchNotTaken = false; _dbg_assert_msg_(JIT, false, "Bad cc flag in BranchRSZeroComp().");
}
immBranch = true;
immBranchTaken = !immBranchNotTaken;
}
if (jo.immBranches && immBranch && js.numInstructions < jo.continueMaxInstructions) {
if (immBranchNotTaken) {
if (!immBranchTaken) {
// Skip the delay slot if likely, otherwise it'll be the next instruction.
if (likely)
js.compilerPC += 4;
@ -203,40 +219,54 @@ void Jit::BranchRSZeroComp(MIPSOpcode op, ArmGen::CCFlags cc, bool andLink, bool
MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC + 4);
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs);
CONDITIONAL_NICE_DELAYSLOT;
if (!likely && delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_NICE);
gpr.MapReg(rs);
CMP(gpr.R(rs), Operand2(0, TYPE_IMM));
ArmGen::FixupBranch ptr;
if (!likely)
{
if (!delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
if (immBranch) {
// Continuing is handled above, this is just static jumping.
if (immBranchTaken && andLink)
gpr.SetImm(MIPS_REG_RA, js.compilerPC + 8);
if (immBranchTaken || !likely)
CompileDelaySlot(DELAYSLOT_FLUSH);
else
FlushAll();
ptr = B_CC(cc);
}
else
{
FlushAll();
ptr = B_CC(cc);
CompileDelaySlot(DELAYSLOT_FLUSH);
}
// Take the branch
if (andLink)
{
gpr.SetRegImm(SCRATCHREG1, js.compilerPC + 8);
STR(SCRATCHREG1, CTXREG, MIPS_REG_RA * 4);
const u32 destAddr = immBranchTaken ? targetAddr : js.compilerPC + 8;
WriteExit(destAddr, js.nextExit++);
} else {
if (!likely && delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_NICE);
gpr.MapReg(rs);
CMP(gpr.R(rs), Operand2(0, TYPE_IMM));
ArmGen::FixupBranch ptr;
if (!likely)
{
if (!delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
else
FlushAll();
ptr = B_CC(cc);
}
else
{
FlushAll();
ptr = B_CC(cc);
CompileDelaySlot(DELAYSLOT_FLUSH);
}
// Take the branch
if (andLink)
{
gpr.SetRegImm(SCRATCHREG1, js.compilerPC + 8);
STR(SCRATCHREG1, CTXREG, MIPS_REG_RA * 4);
}
WriteExit(targetAddr, js.nextExit++);
SetJumpTarget(ptr);
// Not taken
WriteExit(js.compilerPC + 8, js.nextExit++);
}
WriteExit(targetAddr, js.nextExit++);
SetJumpTarget(ptr);
// Not taken
WriteExit(js.compilerPC + 8, js.nextExit++);
js.compiling = false;
}

View File

@ -266,6 +266,21 @@ void Jit::CompBranchExits(CCFlags cc, u32 targetAddr, u32 notTakenAddr, bool del
}
}
void Jit::CompBranchExit(bool taken, u32 targetAddr, u32 notTakenAddr, bool delaySlotIsNice, bool likely, bool andLink) {
// Continuing is handled in the imm branch case... TODO: move it here?
if (taken && andLink)
gpr.SetImm(MIPS_REG_RA, js.compilerPC + 8);
if (taken || !likely)
CompileDelaySlot(DELAYSLOT_FLUSH);
else
FlushAll();
const u32 destAddr = taken ? targetAddr : notTakenAddr;
CONDITIONAL_LOG_EXIT(destAddr);
WriteExit(destAddr, js.nextExit++);
js.compiling = false;
}
void Jit::BranchRSRTComp(MIPSOpcode op, Gen::CCFlags cc, bool likely)
{
CONDITIONAL_LOG;
@ -279,9 +294,10 @@ void Jit::BranchRSRTComp(MIPSOpcode op, Gen::CCFlags cc, bool likely)
u32 targetAddr = js.compilerPC + offset + 4;
bool immBranch = false;
bool immBranchNotTaken = false;
bool immBranchTaken = false;
if (gpr.IsImm(rs) && gpr.IsImm(rt)) {
// The cc flags are opposites: when NOT to take the branch.
bool immBranchNotTaken;
s32 rsImm = (s32)gpr.GetImm(rs);
s32 rtImm = (s32)gpr.GetImm(rt);
@ -292,11 +308,12 @@ void Jit::BranchRSRTComp(MIPSOpcode op, Gen::CCFlags cc, bool likely)
default: immBranchNotTaken = false; _dbg_assert_msg_(JIT, false, "Bad cc flag in BranchRSRTComp().");
}
immBranch = true;
immBranchTaken = !immBranchNotTaken;
}
if (jo.immBranches && immBranch && js.numInstructions < jo.continueMaxInstructions)
{
if (immBranchNotTaken)
if (!immBranchTaken)
{
// Skip the delay slot if likely, otherwise it'll be the next instruction.
if (likely)
@ -316,21 +333,27 @@ void Jit::BranchRSRTComp(MIPSOpcode op, Gen::CCFlags cc, bool likely)
MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC+4);
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rt, rs);
CONDITIONAL_NICE_DELAYSLOT;
if (!likely && delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_NICE);
if (gpr.IsImm(rt) && gpr.GetImm(rt) == 0)
{
gpr.KillImmediate(rs, true, false);
CMP(32, gpr.R(rs), Imm32(0));
}
if (immBranch)
CompBranchExit(immBranchTaken, targetAddr, js.compilerPC + 8, delaySlotIsNice, likely, false);
else
{
gpr.MapReg(rs, true, false);
CMP(32, gpr.R(rs), gpr.R(rt));
}
if (!likely && delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_NICE);
CompBranchExits(cc, targetAddr, js.compilerPC + 8, delaySlotIsNice, likely, false);
if (gpr.IsImm(rt) && gpr.GetImm(rt) == 0)
{
gpr.KillImmediate(rs, true, false);
CMP(32, gpr.R(rs), Imm32(0));
}
else
{
gpr.MapReg(rs, true, false);
CMP(32, gpr.R(rs), gpr.R(rt));
}
CompBranchExits(cc, targetAddr, js.compilerPC + 8, delaySlotIsNice, likely, false);
}
}
void Jit::BranchRSZeroComp(MIPSOpcode op, Gen::CCFlags cc, bool andLink, bool likely)
@ -345,9 +368,10 @@ void Jit::BranchRSZeroComp(MIPSOpcode op, Gen::CCFlags cc, bool andLink, bool li
u32 targetAddr = js.compilerPC + offset + 4;
bool immBranch = false;
bool immBranchNotTaken = false;
bool immBranchTaken = false;
if (gpr.IsImm(rs)) {
// The cc flags are opposites: when NOT to take the branch.
bool immBranchNotTaken;
s32 imm = (s32)gpr.GetImm(rs);
switch (cc)
@ -359,11 +383,12 @@ void Jit::BranchRSZeroComp(MIPSOpcode op, Gen::CCFlags cc, bool andLink, bool li
default: immBranchNotTaken = false; _dbg_assert_msg_(JIT, false, "Bad cc flag in BranchRSZeroComp().");
}
immBranch = true;
immBranchTaken = !immBranchNotTaken;
}
if (jo.immBranches && immBranch && js.numInstructions < jo.continueMaxInstructions)
{
if (immBranchNotTaken)
if (!immBranchTaken)
{
// Skip the delay slot if likely, otherwise it'll be the next instruction.
if (likely)
@ -386,13 +411,19 @@ void Jit::BranchRSZeroComp(MIPSOpcode op, Gen::CCFlags cc, bool andLink, bool li
MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC + 4);
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs);
CONDITIONAL_NICE_DELAYSLOT;
if (!likely && delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_NICE);
gpr.MapReg(rs, true, false);
CMP(32, gpr.R(rs), Imm32(0));
if (immBranch)
CompBranchExit(immBranchTaken, targetAddr, js.compilerPC + 8, delaySlotIsNice, likely, false);
else
{
if (!likely && delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_NICE);
CompBranchExits(cc, targetAddr, js.compilerPC + 8, delaySlotIsNice, likely, andLink);
gpr.MapReg(rs, true, false);
CMP(32, gpr.R(rs), Imm32(0));
CompBranchExits(cc, targetAddr, js.compilerPC + 8, delaySlotIsNice, likely, andLink);
}
}

View File

@ -228,6 +228,7 @@ private:
void CompITypeMemUnpairedLR(MIPSOpcode op, bool isStore);
void CompITypeMemUnpairedLRInner(MIPSOpcode op, X64Reg shiftReg);
void CompBranchExits(CCFlags cc, u32 targetAddr, u32 notTakenAddr, bool delaySlotIsNice, bool likely, bool andLink);
void CompBranchExit(bool taken, u32 targetAddr, u32 notTakenAddr, bool delaySlotIsNice, bool likely, bool andLink);
void CompFPTriArith(MIPSOpcode op, void (XEmitter::*arith)(X64Reg reg, OpArg), bool orderMatters);
void CompFPComp(int lhs, int rhs, u8 compare, bool allowNaN = false);