ppsspp/Core/MIPS/ARM/ArmCompBranch.cpp
2013-08-24 19:41:10 -07:00

449 lines
12 KiB
C++

// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include "Core/Reporting.h"
#include "Core/HLE/HLE.h"
#include "Core/HLE/HLETables.h"
#include "Core/MIPS/MIPS.h"
#include "Core/MIPS/MIPSCodeUtils.h"
#include "Core/MIPS/MIPSAnalyst.h"
#include "Core/MIPS/MIPSTables.h"
#include "Core/MIPS/ARM/ArmJit.h"
#include "Core/MIPS/ARM/ArmRegCache.h"
#include "Core/MIPS/JitCommon/JitBlockCache.h"
#include "Common/ArmEmitter.h"
#define _RS MIPS_GET_RS(op)
#define _RT MIPS_GET_RT(op)
#define _RD MIPS_GET_RD(op)
#define _FS MIPS_GET_FS(op)
#define _FT MIPS_GET_FT(op)
#define _FD MIPS_GET_FD(op)
#define _SA MIPS_GET_SA(op)
#define _POS ((op>> 6) & 0x1F)
#define _SIZE ((op>>11) & 0x1F)
#define _IMM16 (signed short)(op & 0xFFFF)
#define _IMM26 (op & 0x03FFFFFF)
#define LOOPOPTIMIZATION 0
// We can disable nice delay slots.
#define CONDITIONAL_NICE_DELAYSLOT delaySlotIsNice = false;
// #define CONDITIONAL_NICE_DELAYSLOT ;
using namespace MIPSAnalyst;
namespace MIPSComp
{
void Jit::BranchRSRTComp(MIPSOpcode op, ArmGen::CCFlags cc, bool likely)
{
if (js.inDelaySlot) {
ERROR_LOG_REPORT(JIT, "Branch in RSRTComp delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
return;
}
int offset = _IMM16 << 2;
MIPSGPReg rt = _RT;
MIPSGPReg rs = _RS;
u32 targetAddr = js.compilerPC + offset + 4;
MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC+4);
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rt, rs);
CONDITIONAL_NICE_DELAYSLOT;
if (!likely && delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_NICE);
if (gpr.IsImm(rt) && gpr.GetImm(rt) == 0)
{
gpr.MapReg(rs);
CMP(gpr.R(rs), Operand2(0, TYPE_IMM));
}
else if (gpr.IsImm(rs) && gpr.GetImm(rs) == 0 && (cc == CC_EQ || cc == CC_NEQ)) // only these are easily 'flippable'
{
gpr.MapReg(rt);
CMP(gpr.R(rt), Operand2(0, TYPE_IMM));
}
else
{
gpr.MapInIn(rs, rt);
CMP(gpr.R(rs), gpr.R(rt));
}
ArmGen::FixupBranch ptr;
if (!likely)
{
if (!delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
else
FlushAll();
ptr = B_CC(cc);
}
else
{
FlushAll();
ptr = B_CC(cc);
CompileDelaySlot(DELAYSLOT_FLUSH);
}
// Take the branch
WriteExit(targetAddr, js.nextExit++);
SetJumpTarget(ptr);
// Not taken
WriteExit(js.compilerPC+8, js.nextExit++);
js.compiling = false;
}
void Jit::BranchRSZeroComp(MIPSOpcode op, ArmGen::CCFlags cc, bool andLink, bool likely)
{
if (js.inDelaySlot) {
ERROR_LOG_REPORT(JIT, "Branch in RSZeroComp delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
return;
}
int offset = _IMM16 << 2;
MIPSGPReg rs = _RS;
u32 targetAddr = js.compilerPC + offset + 4;
MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC + 4);
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs);
CONDITIONAL_NICE_DELAYSLOT;
if (!likely && delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_NICE);
gpr.MapReg(rs);
CMP(gpr.R(rs), Operand2(0, TYPE_IMM));
ArmGen::FixupBranch ptr;
if (!likely)
{
if (!delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
else
FlushAll();
ptr = B_CC(cc);
}
else
{
FlushAll();
ptr = B_CC(cc);
CompileDelaySlot(DELAYSLOT_FLUSH);
}
// Take the branch
if (andLink)
{
MOVI2R(R0, js.compilerPC + 8);
STR(R0, CTXREG, MIPS_REG_RA * 4);
}
WriteExit(targetAddr, js.nextExit++);
SetJumpTarget(ptr);
// Not taken
WriteExit(js.compilerPC + 8, js.nextExit++);
js.compiling = false;
}
void Jit::Comp_RelBranch(MIPSOpcode op)
{
// The CC flags here should be opposite of the actual branch becuase they skip the branching action.
switch (op>>26)
{
case 4: BranchRSRTComp(op, CC_NEQ, false); break;//beq
case 5: BranchRSRTComp(op, CC_EQ, false); break;//bne
case 6: BranchRSZeroComp(op, CC_GT, false, false); break;//blez
case 7: BranchRSZeroComp(op, CC_LE, false, false); break;//bgtz
case 20: BranchRSRTComp(op, CC_NEQ, true); break;//beql
case 21: BranchRSRTComp(op, CC_EQ, true); break;//bnel
case 22: BranchRSZeroComp(op, CC_GT, false, true); break;//blezl
case 23: BranchRSZeroComp(op, CC_LE, false, true); break;//bgtzl
default:
_dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled");
break;
}
}
void Jit::Comp_RelBranchRI(MIPSOpcode op)
{
switch ((op >> 16) & 0x1F)
{
case 0: BranchRSZeroComp(op, CC_GE, false, false); break; //if ((s32)R(rs) < 0) DelayBranchTo(addr); else PC += 4; break;//bltz
case 1: BranchRSZeroComp(op, CC_LT, false, false); break; //if ((s32)R(rs) >= 0) DelayBranchTo(addr); else PC += 4; break;//bgez
case 2: BranchRSZeroComp(op, CC_GE, false, true); break; //if ((s32)R(rs) < 0) DelayBranchTo(addr); else PC += 8; break;//bltzl
case 3: BranchRSZeroComp(op, CC_LT, false, true); break; //if ((s32)R(rs) >= 0) DelayBranchTo(addr); else PC += 8; break;//bgezl
case 16: BranchRSZeroComp(op, CC_GE, true, false); break; //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) < 0) DelayBranchTo(addr); else PC += 4; break;//bltzal
case 17: BranchRSZeroComp(op, CC_LT, true, false); break; //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) >= 0) DelayBranchTo(addr); else PC += 4; break;//bgezal
case 18: BranchRSZeroComp(op, CC_GE, true, true); break; //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) < 0) DelayBranchTo(addr); else SkipLikely(); break;//bltzall
case 19: BranchRSZeroComp(op, CC_LT, true, true); break; //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) >= 0) DelayBranchTo(addr); else SkipLikely(); break;//bgezall
default:
_dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled");
break;
}
}
// If likely is set, discard the branch slot if NOT taken.
void Jit::BranchFPFlag(MIPSOpcode op, ArmGen::CCFlags cc, bool likely)
{
if (js.inDelaySlot) {
ERROR_LOG_REPORT(JIT, "Branch in FPFlag delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
return;
}
int offset = _IMM16 << 2;
u32 targetAddr = js.compilerPC + offset + 4;
MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC + 4);
bool delaySlotIsNice = IsDelaySlotNiceFPU(op, delaySlotOp);
CONDITIONAL_NICE_DELAYSLOT;
if (!likely && delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_NICE);
LDR(R0, CTXREG, offsetof(MIPSState, fpcond));
TST(R0, Operand2(1, TYPE_IMM));
ArmGen::FixupBranch ptr;
if (!likely)
{
if (!delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
else
FlushAll();
ptr = B_CC(cc);
}
else
{
FlushAll();
ptr = B_CC(cc);
CompileDelaySlot(DELAYSLOT_FLUSH);
}
// Take the branch
WriteExit(targetAddr, js.nextExit++);
SetJumpTarget(ptr);
// Not taken
WriteExit(js.compilerPC + 8, js.nextExit++);
js.compiling = false;
}
void Jit::Comp_FPUBranch(MIPSOpcode op)
{
switch((op >> 16) & 0x1f)
{
case 0: BranchFPFlag(op, CC_NEQ, false); break; // bc1f
case 1: BranchFPFlag(op, CC_EQ, false); break; // bc1t
case 2: BranchFPFlag(op, CC_NEQ, true); break; // bc1fl
case 3: BranchFPFlag(op, CC_EQ, true); break; // bc1tl
default:
_dbg_assert_msg_(CPU,0,"Trying to interpret instruction that can't be interpreted");
break;
}
}
// If likely is set, discard the branch slot if NOT taken.
void Jit::BranchVFPUFlag(MIPSOpcode op, ArmGen::CCFlags cc, bool likely)
{
if (js.inDelaySlot) {
ERROR_LOG_REPORT(JIT, "Branch in VFPU delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
return;
}
int offset = _IMM16 << 2;
u32 targetAddr = js.compilerPC + offset + 4;
MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC + 4);
// Sometimes there's a VFPU branch in a delay slot (Disgaea 2: Dark Hero Days, Zettai Hero Project, La Pucelle)
// The behavior is undefined - the CPU may take the second branch even if the first one passes.
// However, it does consistently try each branch, which these games seem to expect.
bool delaySlotIsBranch = MIPSCodeUtils::IsVFPUBranch(delaySlotOp);
bool delaySlotIsNice = !delaySlotIsBranch && IsDelaySlotNiceVFPU(op, delaySlotOp);
CONDITIONAL_NICE_DELAYSLOT;
if (!likely && delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_NICE);
if (delaySlotIsBranch && (delaySlotOp & 0xFFFF) != (signed short)(op & 0xFFFF) - 1)
ERROR_LOG_REPORT(JIT, "VFPU branch in VFPU delay slot at %08x with different target", js.compilerPC);
int imm3 = (op >> 18) & 7;
MOVI2R(R0, (u32)&(mips_->vfpuCtrl[VFPU_CTRL_CC]));
LDR(R0, R0, Operand2(0, TYPE_IMM));
TST(R0, Operand2(1 << imm3, TYPE_IMM));
ArmGen::FixupBranch ptr;
js.inDelaySlot = true;
if (!likely)
{
if (!delaySlotIsNice && !delaySlotIsBranch)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
else
FlushAll();
ptr = B_CC(cc);
}
else
{
FlushAll();
ptr = B_CC(cc);
if (!delaySlotIsBranch)
CompileDelaySlot(DELAYSLOT_FLUSH);
}
js.inDelaySlot = false;
// Take the branch
WriteExit(targetAddr, js.nextExit++);
SetJumpTarget(ptr);
// Not taken
u32 notTakenTarget = js.compilerPC + (delaySlotIsBranch ? 4 : 8);
WriteExit(notTakenTarget, js.nextExit++);
js.compiling = false;
}
void Jit::Comp_VBranch(MIPSOpcode op)
{
switch ((op >> 16) & 3)
{
case 0: BranchVFPUFlag(op, CC_NEQ, false); break; // bvf
case 1: BranchVFPUFlag(op, CC_EQ, false); break; // bvt
case 2: BranchVFPUFlag(op, CC_NEQ, true); break; // bvfl
case 3: BranchVFPUFlag(op, CC_EQ, true); break; // bvtl
}
}
void Jit::Comp_Jump(MIPSOpcode op)
{
if (js.inDelaySlot) {
ERROR_LOG_REPORT(JIT, "Branch in Jump delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
return;
}
u32 off = _IMM26 << 2;
u32 targetAddr = (js.compilerPC & 0xF0000000) | off;
switch (op >> 26)
{
case 2: //j
CompileDelaySlot(DELAYSLOT_NICE);
FlushAll();
WriteExit(targetAddr, js.nextExit++);
break;
case 3: //jal
gpr.MapReg(MIPS_REG_RA, MAP_NOINIT | MAP_DIRTY);
MOVI2R(gpr.R(MIPS_REG_RA), js.compilerPC + 8);
CompileDelaySlot(DELAYSLOT_NICE);
FlushAll();
WriteExit(targetAddr, js.nextExit++);
break;
default:
_dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled");
break;
}
js.compiling = false;
}
void Jit::Comp_JumpReg(MIPSOpcode op)
{
if (js.inDelaySlot) {
ERROR_LOG_REPORT(JIT, "Branch in JumpReg delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
return;
}
MIPSGPReg rs = _RS;
MIPSOpcode delaySlotOp = Memory::Read_Instruction(js.compilerPC + 4);
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs);
CONDITIONAL_NICE_DELAYSLOT;
if (IsSyscall(delaySlotOp)) {
gpr.MapReg(rs);
MOV(R8, gpr.R(rs));
MovToPC(R8); // For syscall to be able to return.
CompileDelaySlot(DELAYSLOT_FLUSH);
return; // Syscall wrote exit code.
} else if (delaySlotIsNice) {
CompileDelaySlot(DELAYSLOT_NICE);
gpr.MapReg(rs);
MOV(R8, gpr.R(rs)); // Save the destination address through the delay slot. Could use isNice to avoid when the jit is fully implemented
FlushAll();
} else {
// Delay slot
gpr.MapReg(rs);
MOV(R8, gpr.R(rs)); // Save the destination address through the delay slot. Could use isNice to avoid when the jit is fully implemented
CompileDelaySlot(DELAYSLOT_NICE);
FlushAll();
}
switch (op & 0x3f)
{
case 8: //jr
break;
case 9: //jalr
MOVI2R(R0, js.compilerPC + 8);
STR(R0, CTXREG, MIPS_REG_RA * 4);
break;
default:
_dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled");
break;
}
WriteExitDestInR(R8);
js.compiling = false;
}
void Jit::Comp_Syscall(MIPSOpcode op)
{
FlushAll();
// If we're in a delay slot, this is off by one.
const int offset = js.inDelaySlot ? -1 : 0;
WriteDownCount(offset);
js.downcountAmount = -offset;
SaveDowncount();
// Skip the CallSyscall overhead for __KernelIdle, which is called a lot.
if (op == GetSyscallOp("FakeSysCalls", NID_IDLE))
QuickCallFunction(R1, (void *)GetFunc("FakeSysCalls", NID_IDLE)->func);
else
{
MOVI2R(R0, op.encoding);
QuickCallFunction(R1, (void *)&CallSyscall);
}
RestoreDowncount();
WriteSyscallExit();
js.compiling = false;
}
void Jit::Comp_Break(MIPSOpcode op)
{
Comp_Generic(op);
WriteSyscallExit();
js.compiling = false;
}
} // namespace Mipscomp