Merge pull request #8319 from unknownbrackets/jit-branch

Jit: Always link RA, even if branch not taken
This commit is contained in:
Henrik Rydgård 2015-12-28 19:47:14 +01:00
commit d70125f953
4 changed files with 26 additions and 25 deletions

View File

@ -204,15 +204,17 @@ void ArmJit::BranchRSZeroComp(MIPSOpcode op, CCFlags cc, bool andLink, bool like
if (jo.immBranches && immBranch && js.numInstructions < jo.continueMaxInstructions) {
if (!immBranchTaken) {
// Skip the delay slot if likely, otherwise it'll be the next instruction.
if (andLink)
gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 8);
if (likely)
js.compilerPC += 4;
return;
}
// Branch taken. Always compile the delay slot, and then go to dest.
CompileDelaySlot(DELAYSLOT_NICE);
if (andLink)
gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 8);
CompileDelaySlot(DELAYSLOT_NICE);
AddContinuedBlock(targetAddr);
// Account for the increment in the loop.
@ -228,7 +230,7 @@ void ArmJit::BranchRSZeroComp(MIPSOpcode op, CCFlags cc, bool andLink, bool like
if (immBranch) {
// Continuing is handled above, this is just static jumping.
if (immBranchTaken && andLink)
if (andLink)
gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 8);
if (immBranchTaken || !likely)
CompileDelaySlot(DELAYSLOT_FLUSH);
@ -244,6 +246,9 @@ void ArmJit::BranchRSZeroComp(MIPSOpcode op, CCFlags cc, bool andLink, bool like
gpr.MapReg(rs);
CMP(gpr.R(rs), Operand2(0, TYPE_IMM));
if (andLink)
gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 8);
ArmGen::FixupBranch ptr;
if (!likely)
{
@ -261,12 +266,6 @@ void ArmJit::BranchRSZeroComp(MIPSOpcode op, CCFlags cc, bool andLink, bool like
}
// Take the branch
if (andLink)
{
gpr.SetRegImm(SCRATCHREG1, GetCompilerPC() + 8);
STR(SCRATCHREG1, CTXREG, MIPS_REG_RA * 4);
}
WriteExit(targetAddr, js.nextExit++);
SetJumpTarget(ptr);

View File

@ -192,15 +192,17 @@ void Arm64Jit::BranchRSZeroComp(MIPSOpcode op, CCFlags cc, bool andLink, bool li
if (jo.immBranches && immBranch && js.numInstructions < jo.continueMaxInstructions) {
if (!immBranchTaken) {
// Skip the delay slot if likely, otherwise it'll be the next instruction.
if (andLink)
gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 8);
if (likely)
js.compilerPC += 4;
return;
}
// Branch taken. Always compile the delay slot, and then go to dest.
CompileDelaySlot(DELAYSLOT_NICE);
if (andLink)
gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 8);
CompileDelaySlot(DELAYSLOT_NICE);
AddContinuedBlock(targetAddr);
// Account for the increment in the loop.
@ -216,7 +218,7 @@ void Arm64Jit::BranchRSZeroComp(MIPSOpcode op, CCFlags cc, bool andLink, bool li
if (immBranch) {
// Continuing is handled above, this is just static jumping.
if (immBranchTaken && andLink)
if (andLink)
gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 8);
if (immBranchTaken || !likely)
CompileDelaySlot(DELAYSLOT_FLUSH);
@ -233,6 +235,9 @@ void Arm64Jit::BranchRSZeroComp(MIPSOpcode op, CCFlags cc, bool andLink, bool li
gpr.MapReg(rs);
CMP(gpr.R(rs), 0);
if (andLink)
gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 8);
Arm64Gen::FixupBranch ptr;
if (!likely)
{
@ -250,12 +255,6 @@ void Arm64Jit::BranchRSZeroComp(MIPSOpcode op, CCFlags cc, bool andLink, bool li
}
// Take the branch
if (andLink)
{
gpr.SetRegImm(SCRATCH1, GetCompilerPC() + 8);
STR(INDEX_UNSIGNED, SCRATCH1, CTXREG, MIPS_REG_RA * 4);
}
WriteExit(targetAddr, js.nextExit++);
SetJumpTarget(ptr);

View File

@ -503,6 +503,7 @@ namespace MIPSAnalyst {
}
bool IsDelaySlotNiceReg(MIPSOpcode branchOp, MIPSOpcode op, MIPSGPReg reg1, MIPSGPReg reg2) {
MIPSInfo branchInfo = MIPSGetInfo(branchOp);
MIPSInfo info = MIPSGetInfo(op);
if (info & IS_CONDBRANCH) {
return false;
@ -514,6 +515,10 @@ namespace MIPSAnalyst {
if (reg2 != MIPS_REG_ZERO && GetOutGPReg(op) == reg2) {
return false;
}
// If the branch is an "and link" branch, check the delay slot for RA.
if ((branchInfo & OUT_RA) != 0) {
return GetOutGPReg(op) != MIPS_REG_RA && !ReadsFromGPReg(op, MIPS_REG_RA);
}
return true;
}

View File

@ -196,6 +196,9 @@ bool Jit::PredictTakeBranch(u32 targetAddr, bool likely) {
}
void Jit::CompBranchExits(CCFlags cc, u32 targetAddr, u32 notTakenAddr, bool delaySlotIsNice, bool likely, bool andLink) {
if (andLink)
gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 8);
// We may want to try to continue along this branch a little while, to reduce reg flushing.
bool predictTakeBranch = PredictTakeBranch(targetAddr, likely);
if (CanContinueBranch(predictTakeBranch ? targetAddr : notTakenAddr))
@ -237,9 +240,6 @@ void Jit::CompBranchExits(CCFlags cc, u32 targetAddr, u32 notTakenAddr, bool del
RestoreState(state);
CONDITIONAL_LOG_EXIT(targetAddr);
if (andLink)
gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 8);
// Don't forget to run the delay slot if likely.
if (likely)
CompileDelaySlot(DELAYSLOT_NICE);
@ -253,8 +253,6 @@ void Jit::CompBranchExits(CCFlags cc, u32 targetAddr, u32 notTakenAddr, bool del
else
{
// Take the branch
if (andLink)
MOV(32, gpr.GetDefaultLocation(MIPS_REG_RA), Imm32(GetCompilerPC() + 8));
CONDITIONAL_LOG_EXIT(targetAddr);
WriteExit(targetAddr, js.nextExit++);
@ -288,8 +286,6 @@ void Jit::CompBranchExits(CCFlags cc, u32 targetAddr, u32 notTakenAddr, bool del
}
// Take the branch
if (andLink)
MOV(32, gpr.GetDefaultLocation(MIPS_REG_RA), Imm32(GetCompilerPC() + 8));
CONDITIONAL_LOG_EXIT(targetAddr);
WriteExit(targetAddr, js.nextExit++);
@ -303,7 +299,7 @@ void Jit::CompBranchExits(CCFlags cc, u32 targetAddr, u32 notTakenAddr, bool del
void Jit::CompBranchExit(bool taken, u32 targetAddr, u32 notTakenAddr, bool delaySlotIsNice, bool likely, bool andLink) {
// Continuing is handled in the imm branch case... TODO: move it here?
if (taken && andLink)
if (andLink)
gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 8);
if (taken || !likely)
CompileDelaySlot(DELAYSLOT_FLUSH);
@ -427,15 +423,17 @@ void Jit::BranchRSZeroComp(MIPSOpcode op, Gen::CCFlags cc, bool andLink, bool li
if (!immBranchTaken)
{
// Skip the delay slot if likely, otherwise it'll be the next instruction.
if (andLink)
gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 8);
if (likely)
js.compilerPC += 4;
return;
}
// Branch taken. Always compile the delay slot, and then go to dest.
CompileDelaySlot(DELAYSLOT_NICE);
if (andLink)
gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 8);
CompileDelaySlot(DELAYSLOT_NICE);
AddContinuedBlock(targetAddr);
// Account for the increment in the loop.