broken ppc jit

This commit is contained in:
Ced2911 2013-08-10 16:38:09 +02:00
parent cc0a8d1321
commit 9431c811a7
10 changed files with 1985 additions and 130 deletions

346
Core/MIPS/PPC/PpcAsm.cpp Normal file
View File

@ -0,0 +1,346 @@
#include "Common/ChunkFile.h"
#include "../../Core.h"
#include "../../CoreTiming.h"
#include "../MIPS.h"
#include "../MIPSCodeUtils.h"
#include "../MIPSInt.h"
#include "../MIPSTables.h"
#include "PpcRegCache.h"
#include "ppcEmitter.h"
#include "PpcJit.h"
#include <ppcintrinsics.h>
using namespace PpcGen;
extern volatile CoreState coreState;
static void JitAt()
{
MIPSComp::jit->Compile(currentMIPS->pc);
}
namespace MIPSComp
{
//Jit * jit=NULL;
static int dontLogBlocks = 20;
static int logBlocks = 40;
const u8 *Jit::DoJit(u32 em_address, JitBlock *b)
{
NOTICE_LOG(CPU, "DoJit %08x - %08x\n", mips_->pc, mips_->downcount);
js.cancel = false;
js.blockStart = js.compilerPC = mips_->pc;
js.downcountAmount = 0;
js.curBlock = b;
js.compiling = true;
js.inDelaySlot = false;
js.PrefixStart();
// We add a check before the block, used when entering from a linked block.
b->checkedEntry = GetCodePtr();
// Downcount flag check. The last block decremented downcounter, and the flag should still be available.
//SetCC(CC_LT);
MOVI2R(SREG, js.blockStart);
//Break();
// Cmp ??
//CMPLI(DCNTREG, 0);
//BLT((const void *)outerLoopPCInR0);
// if (currentMIPS->downcount<0)
BGT((const void *)outerLoopPCInR0);
b->normalEntry = GetCodePtr();
// TODO: this needs work
MIPSAnalyst::AnalysisResults analysis; // = MIPSAnalyst::Analyze(em_address);
gpr.Start(analysis);
//fpr.Start(analysis);
int numInstructions = 0;
int cycles = 0;
int partialFlushOffset = 0;
if (logBlocks > 0) logBlocks--;
if (dontLogBlocks > 0) dontLogBlocks--;
// #define LOGASM
#ifdef LOGASM
char temp[256];
#endif
while (js.compiling)
{
gpr.SetCompilerPC(js.compilerPC); // Let it know for log messages
//fpr.SetCompilerPC(js.compilerPC);
u32 inst = Memory::Read_Instruction(js.compilerPC);
js.downcountAmount += MIPSGetInstructionCycleEstimate(inst);
MIPSCompileOp(inst);
js.compilerPC += 4;
numInstructions++;
/*
if (!cpu_info.bArmV7 && (GetCodePtr() - b->checkedEntry - partialFlushOffset) > 4020)
{
// We need to prematurely flush as we are out of range
FixupBranch skip = B_CC(CC_AL);
FlushLitPool();
SetJumpTarget(skip);
partialFlushOffset = GetCodePtr() - b->checkedEntry;
}
*/
}
//FlushLitPool();
#ifdef LOGASM
if (logBlocks > 0 && dontLogBlocks == 0) {
for (u32 cpc = em_address; cpc != js.compilerPC + 4; cpc += 4) {
MIPSDisAsm(Memory::Read_Instruction(cpc), cpc, temp, true);
INFO_LOG(DYNA_REC, "M: %08x %s", cpc, temp);
}
}
#endif
b->codeSize = GetCodePtr() - b->normalEntry;
#ifdef LOGASM
if (logBlocks > 0 && dontLogBlocks == 0) {
INFO_LOG(DYNA_REC, "=============== ARM ===============");
DisassembleArm(b->normalEntry, GetCodePtr() - b->normalEntry);
}
#endif
//printf("DoJitend %08x - %08x - %08x\n", mips_->pc, mips_->downcount, js.compilerPC);
DumpJit();
AlignCode16();
// Don't forget to zap the instruction cache!
FlushIcache();
b->originalSize = numInstructions;
return b->normalEntry;
}
void Jit::DumpJit() {
u32 len = (u32)GetCodePtr() - (u32)GetBasePtr();
FILE * fd;
fd = fopen("game:\\jit.bin", "wb");
fwrite(GetBasePtr(), len, 1, fd);
fclose(fd);
}
void Jit::GenerateFixedCode() {
enterCode = AlignCode16();
INFO_LOG(HLE, "Base: %08x", (u32)Memory::base);
INFO_LOG(HLE, "enterCode: 0x%08p", enterCode);
INFO_LOG(HLE, "GetBasePtr: 0x%08p", GetBasePtr());
#if 1
// Write Prologue (setup stack frame etc ...)
// Save Lr
MFLR(R12);
// Save regs
u32 regSize = 8; // 4 in 32bit system
u32 stackFrameSize = 32*32;//(35 - 12) * regSize;
for(int i = 14; i < 32; i ++) {
STD((PPCReg)i, R1, -((33 - i) * regSize));
}
// Save r12
STW(R12, R1, -0x8);
// allocate stack
STWU(R1, R1, -stackFrameSize);
#endif
// Map fixed register
MOVI2R(BASEREG, (u32)Memory::base);
MOVI2R(CTXREG, (u32)mips_);
MOVI2R(CODEREG, (u32)GetBasePtr());
// Update downcount reg value from memory
RestoreDowncount(DCNTREG);
// SREG = mips->pc
MovFromPC(SREG);
// Keep current location, TODO rename it, outerLoopPCInR0 to outerLoopPCInR3 ??
outerLoopPCInR0 = GetCodePtr();
// mips->pc = SREG
MovToPC(SREG);
// Keep current location
outerLoop = GetCodePtr();
// Jit loop
// {
// Save downcount reg value to memory
SaveDowncount(DCNTREG);
// Call CoreTiming::Advance() => update donwcount
QuickCallFunction((void *)&CoreTiming::Advance);
// Update downcount reg value from memory
RestoreDowncount(DCNTREG);
// branch to skipToRealDispatch
FixupBranch skipToRealDispatch = B(); //skip the sync and compare first time
// Keep current location dispatcherCheckCoreState:
dispatcherCheckCoreState = GetCodePtr();
// The result of slice decrementation should be in flags if somebody jumped here
// IMPORTANT - We jump on negative, not carry!!!
// branch to bailCoreState: (jump if(what ??) negative )
//FixupBranch bailCoreState = B_CC(CC_MI); // BLT ???
FixupBranch bailCoreState = BLT(); // BLT ???
// SREG = coreState
MOVI2R(SREG, (u32)&coreState);
// ??? Compare coreState and CORE_RUNNING
LWZ(SREG, SREG); // SREG = *SREG
CMPLI(SREG, 0); // compare 0(CORE_RUNNING) and CR0
// branch to badCoreState: (jump if coreState != CORE_RUNNING)
FixupBranch badCoreState = BNE(); // B_CC(CC_NEQ)
// branch to skipToRealDispatch2:
FixupBranch skipToRealDispatch2 = B(); //skip the sync and compare first time
// Keep current location, TODO rename it, outerLoopPCInR0 to outerLoopPCInSREG ??
dispatcherPCInR0 = GetCodePtr();
// mips->pc = SREG
MovToPC(SREG);
// At this point : flags = EQ. Fine for the next check, no need to jump over it.
// label dispatcher:
dispatcher = GetCodePtr();
// {
// The result of slice decrementation should be in flags if somebody jumped here
// IMPORTANT - We jump on negative, not carry!!!
// label bail:
// arm B_CC(CC_MI);
FixupBranch bail = BLT();
// label skipToRealDispatch:
SetJumpTarget(skipToRealDispatch);
// label skipToRealDispatch2:
SetJumpTarget(skipToRealDispatch2);
// Keep current location
dispatcherNoCheck = GetCodePtr();
// read op
// R3 = mips->pc & Memory::MEMVIEW32_MASK
LWZ(R3, CTXREG, offsetof(MIPSState, pc));
MOVI2R(SREG, Memory::MEMVIEW32_MASK);
AND(R3, R3, SREG);
// R3 = memory::base[r3];
ADD(R3, BASEREG, R3);
MOVI2R(R0, 0);
LWBRX(R3, R3, R0); // R3 = op now !
// R4 = R3 & MIPS_EMUHACK_VALUE_MASK
MOVI2R(SREG, MIPS_EMUHACK_VALUE_MASK);
AND(R4, R3, SREG);
// R3 = R3 & MIPS_EMUHACK_MASK
ANDIS(R3, R3, (MIPS_EMUHACK_MASK>>16));
// compare, op == MIPS_EMUHACK_OPCODE
MOVI2R(SREG, MIPS_EMUHACK_OPCODE);
CMP(R3, SREG);
// Branch if func block not found
FixupBranch notfound = BNE();
// {
// R3 = R4 + GetBasePtr()
ADD(R3, R4, CODEREG);
MTCTR(R3);
BCTR();
// }
// label notfound:
SetJumpTarget(notfound);
//Ok, no block, let's jit
// Save downcount reg value to memory
SaveDowncount(DCNTREG);
// Exec JitAt => Compile block !
QuickCallFunction((void *)&JitAt);
// Update downcount reg value from memory
RestoreDowncount(DCNTREG);
// branch to dispatcherNoCheck:
B(dispatcherNoCheck); // no point in special casing this
// }
// label bail:
SetJumpTarget(bail);
// label bailCoreState:
SetJumpTarget(bailCoreState);
// Compare coreState and CORE_RUNNING
MOVI2R(SREG, (u32)&coreState);
LWZ(SREG, SREG); // SREG = *SREG => SREG = coreState
CMPLI(SREG, 0); // compare 0(CORE_RUNNING) and corestate
// branch to outerLoop if (coreState == CORE_RUNNING)
// arm: B_CC(CC_EQ, outerLoop);
//Break();
BEQ(outerLoop);
// }
// badCoreState label:
SetJumpTarget(badCoreState);
// Keep current location
breakpointBailout = GetCodePtr();
// mips->downcount = DCNTREG
SaveDowncount(DCNTREG);
#if 1
// Write Epilogue (restore stack frame, return)
// free stack
ADDI(R1, R1, stackFrameSize);
// Restore regs
for(int i = 14; i < 32; i ++) {
LD((PPCReg)i, R1, -((33 - i) * regSize));
}
// recover r12 (LR saved register)
LWZ (R12, R1, -0x8);
// Restore Lr
MTLR(R12);
//BLR();
#endif
// Don't forget to zap the instruction cache!
FlushIcache();
}
}

View File

@ -0,0 +1,54 @@
#include "Common/ChunkFile.h"
#include "../../Core.h"
#include "../../CoreTiming.h"
#include "../MIPS.h"
#include "../MIPSCodeUtils.h"
#include "../MIPSInt.h"
#include "../MIPSTables.h"
#include "PpcRegCache.h"
#include "ppcEmitter.h"
#include "PpcJit.h"
#include <ppcintrinsics.h>
using namespace PpcGen;
extern volatile CoreState coreState;
namespace MIPSComp
{
void Jit::Comp_IType(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_RType2(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_RType3(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_ShiftType(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Allegrex(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Allegrex2(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_MulDivType(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Special3(u32 op) {
Comp_Generic(op);
}
}

View File

@ -0,0 +1,452 @@
#include "Common/ChunkFile.h"
#include "../../Core.h"
#include "../../CoreTiming.h"
#include "../MIPS.h"
#include "../MIPSCodeUtils.h"
#include "../MIPSInt.h"
#include "../MIPSTables.h"
#include "Core/Reporting.h"
#include "Core/HLE/HLE.h"
#include "PpcRegCache.h"
#include "ppcEmitter.h"
#include "PpcJit.h"
#include <ppcintrinsics.h>
#define _RS ((op>>21) & 0x1F)
#define _RT ((op>>16) & 0x1F)
#define _RD ((op>>11) & 0x1F)
#define _FS ((op>>11) & 0x1F)
#define _FT ((op>>16) & 0x1F)
#define _FD ((op>>6 ) & 0x1F)
#define _POS ((op>>6 ) & 0x1F)
#define _SIZE ((op>>11 ) & 0x1F)
#define LOOPOPTIMIZATION 0
// We can disable nice delay slots.
#define CONDITIONAL_NICE_DELAYSLOT delaySlotIsNice = false;
// #define CONDITIONAL_NICE_DELAYSLOT ;
#define SHOW_JS_COMPILER_PC { printf("js.compilerPC: %08x\n", js.compilerPC); }
#define BRANCH_COMPILE_LOG { printf("JIT(%8x): %s => %d - %08x\n", (u32)GetCodePtr() ,__FUNCTION__, cc, js.compilerPC); }
using namespace MIPSAnalyst;
using namespace PpcGen;
namespace MIPSComp
{
void Jit::BranchRSRTComp(u32 op, PpcGen::FixupBranchType cc, bool likely)
{
if (js.inDelaySlot) {
ERROR_LOG_REPORT(JIT, "Branch in RSRTComp delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
return;
}
int offset = (signed short)(op&0xFFFF)<<2;
int rt = _RT;
int rs = _RS;
u32 targetAddr = js.compilerPC + offset + 4;
u32 delaySlotOp = Memory::ReadUnchecked_U32(js.compilerPC+4);
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rt, rs);
CONDITIONAL_NICE_DELAYSLOT;
if (!likely && delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_NICE);
if (gpr.IsImm(rt) && gpr.GetImm(rt) == 0)
{
gpr.MapReg(rs);
CMPLI(gpr.R(rs), 0);
}
else if (gpr.IsImm(rs) && gpr.GetImm(rs) == 0 && (cc == _BEQ || cc == _BNE)) // only these are easily 'flippable'
{
gpr.MapReg(rt);
CMPLI(gpr.R(rt),0);
}
else
{
gpr.MapInIn(rs, rt);
CMP(gpr.R(rs), gpr.R(rt));
}
//if (js.compilerPC == 0x089001c4) {
// Break();
// Break();
//}
PpcGen::FixupBranch ptr;
if (!likely)
{
if (!delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
else
FlushAll();
ptr = B_Cond(cc);
}
else
{
FlushAll();
ptr = B_Cond(cc);
CompileDelaySlot(DELAYSLOT_FLUSH);
}
INFO_LOG(CPU, "targetAddr: %08x,js.compilerPC: %08x offset: %08x, op: %08x\n", targetAddr, js.compilerPC, offset, op);
// Take the branch
WriteExit(targetAddr, 0);
// !cond
SetJumpTarget(ptr);
// Not taken
WriteExit(js.compilerPC+8, 1);
js.compiling = false;
}
void Jit::BranchRSZeroComp(u32 op, PpcGen::FixupBranchType cc, bool andLink, bool likely)
{
if (js.inDelaySlot) {
ERROR_LOG_REPORT(JIT, "Branch in RSZeroComp delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
return;
}
int offset = (signed short)(op&0xFFFF)<<2;
int rs = _RS;
u32 targetAddr = js.compilerPC + offset + 4;
u32 delaySlotOp = Memory::ReadUnchecked_U32(js.compilerPC + 4);
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs);
CONDITIONAL_NICE_DELAYSLOT;
if (!likely && delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_NICE);
gpr.MapReg(rs);
CMPLI(gpr.R(rs), 0);
PpcGen::FixupBranch ptr;
if (!likely)
{
if (!delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
else
FlushAll();
ptr = B_Cond(cc);
}
else
{
FlushAll();
ptr = B_Cond(cc);
CompileDelaySlot(DELAYSLOT_FLUSH);
}
// Take the branch
if (andLink)
{
//Break();
MOVI2R(SREG, js.compilerPC + 8);
STW(SREG, CTXREG, MIPS_REG_RA * 4);
}
WriteExit(targetAddr, 0);
SetJumpTarget(ptr);
// Not taken
WriteExit(js.compilerPC + 8, 1);
js.compiling = false;
}
void Jit::Comp_RelBranch(u32 op) {
// The CC flags here should be opposite of the actual branch becuase they skip the branching action.
switch (op>>26)
{
case 4: BranchRSRTComp(op, _BNE, false); break;//beq
case 5: BranchRSRTComp(op, _BEQ, false); break;//bne
case 6: BranchRSZeroComp(op, _BGT, false, false); break;//blez
case 7: BranchRSZeroComp(op, _BLE, false, false); break;//bgtz
case 20: BranchRSRTComp(op, _BNE, true); break;//beql
case 21: BranchRSRTComp(op, _BEQ, true); break;//bnel
case 22: BranchRSZeroComp(op, _BGT, false, true); break;//blezl
case 23: BranchRSZeroComp(op, _BLE, false, true); break;//bgtzl
default:
_dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled");
break;
}
js.compiling = false;
}
void Jit::Comp_RelBranchRI(u32 op) {
switch ((op >> 16) & 0x1F)
{
case 0: BranchRSZeroComp(op, _BGE, false, false); break; //if ((s32)R(rs) < 0) DelayBranchTo(addr); else PC += 4; break;//bltz
case 1: BranchRSZeroComp(op, _BLT, false, false); break; //if ((s32)R(rs) >= 0) DelayBranchTo(addr); else PC += 4; break;//bgez
case 2: BranchRSZeroComp(op, _BGE, false, true); break; //if ((s32)R(rs) < 0) DelayBranchTo(addr); else PC += 8; break;//bltzl
case 3: BranchRSZeroComp(op, _BLT, false, true); break; //if ((s32)R(rs) >= 0) DelayBranchTo(addr); else PC += 8; break;//bgezl
case 16: BranchRSZeroComp(op, _BGE, true, false); break; //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) < 0) DelayBranchTo(addr); else PC += 4; break;//bltzal
case 17: BranchRSZeroComp(op, _BLT, true, false); break; //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) >= 0) DelayBranchTo(addr); else PC += 4; break;//bgezal
case 18: BranchRSZeroComp(op, _BGE, true, true); break; //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) < 0) DelayBranchTo(addr); else SkipLikely(); break;//bltzall
case 19: BranchRSZeroComp(op, _BLT, true, true); break; //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) >= 0) DelayBranchTo(addr); else SkipLikely(); break;//bgezall
default:
_dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled");
break;
}
js.compiling = false;
}
// If likely is set, discard the branch slot if NOT taken.
void Jit::BranchFPFlag(u32 op, PpcGen::FixupBranchType cc, bool likely)
{
if (js.inDelaySlot) {
ERROR_LOG_REPORT(JIT, "Branch in FPFlag delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
return;
}
int offset = (signed short)(op & 0xFFFF) << 2;
u32 targetAddr = js.compilerPC + offset + 4;
u32 delaySlotOp = Memory::ReadUnchecked_U32(js.compilerPC + 4);
bool delaySlotIsNice = IsDelaySlotNiceFPU(op, delaySlotOp);
CONDITIONAL_NICE_DELAYSLOT;
if (!likely && delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_NICE);
FlushAll();
DebugBreak(); // not made !
/*
LWZ(SREG, CTXREG, offsetof(MIPSState, fpcond));
//TST(SREG, Operand2(1, TYPE_IMM));
// i don't know the equivalent so ...
CMP(
*/
PpcGen::FixupBranch ptr;
if (!likely)
{
if (!delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
ptr = B_Cond(cc);
}
else
{
ptr = B_Cond(cc);
CompileDelaySlot(DELAYSLOT_FLUSH);
}
// Take the branch
WriteExit(targetAddr, 0);
SetJumpTarget(ptr);
// Not taken
WriteExit(js.compilerPC + 8, 1);
js.compiling = false;
}
void Jit::Comp_FPUBranch(u32 op) {
switch((op >> 16) & 0x1f)
{
case 0: BranchFPFlag(op, _BNE, false); break; // bc1f
case 1: BranchFPFlag(op, _BEQ, false); break; // bc1t
case 2: BranchFPFlag(op, _BNE, true); break; // bc1fl
case 3: BranchFPFlag(op, _BEQ, true); break; // bc1tl
default:
_dbg_assert_msg_(CPU,0,"Trying to interpret instruction that can't be interpreted");
break;
}
js.compiling = false;
}
// If likely is set, discard the branch slot if NOT taken.
void Jit::BranchVFPUFlag(u32 op, PpcGen::FixupBranchType cc, bool likely)
{
if (js.inDelaySlot) {
ERROR_LOG_REPORT(JIT, "Branch in VFPU delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
return;
}
int offset = (signed short)(op & 0xFFFF) << 2;
u32 targetAddr = js.compilerPC + offset + 4;
u32 delaySlotOp = Memory::ReadUnchecked_U32(js.compilerPC + 4);
bool delaySlotIsNice = IsDelaySlotNiceVFPU(op, delaySlotOp);
CONDITIONAL_NICE_DELAYSLOT;
if (!likely && delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_NICE);
FlushAll();
int imm3 = (op >> 18) & 7;
/*
MOVI2R(R0, (u32)&(mips_->vfpuCtrl[VFPU_CTRL_CC]));
LWZ(R0, R0, Operand2(0, TYPE_IMM));
TST(R0, Operand2(1 << imm3, TYPE_IMM));
*/
DebugBreak(); // not made !
PpcGen::FixupBranch ptr;
js.inDelaySlot = true;
if (!likely)
{
if (!delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
ptr = B_Cond(cc);
}
else
{
ptr = B_Cond(cc);
CompileDelaySlot(DELAYSLOT_FLUSH);
}
js.inDelaySlot = false;
// Take the branch
WriteExit(targetAddr, 0);
SetJumpTarget(ptr);
// Not taken
WriteExit(js.compilerPC + 8, 1);
js.compiling = false;
}
void Jit::Comp_VBranch(u32 op) {
switch ((op >> 16) & 3)
{
case 0: BranchVFPUFlag(op, _BNE, false); break; // bvf
case 1: BranchVFPUFlag(op, _BEQ, false); break; // bvt
case 2: BranchVFPUFlag(op, _BNE, true); break; // bvfl
case 3: BranchVFPUFlag(op, _BEQ, true); break; // bvtl
}
js.compiling = false;
}
void Jit::Comp_Jump(u32 op) {
if (js.inDelaySlot) {
ERROR_LOG_REPORT(JIT, "Branch in Jump delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
return;
}
u32 off = ((op & 0x03FFFFFF) << 2);
u32 targetAddr = (js.compilerPC & 0xF0000000) | off;
/*if (op == 0x0a240070) {
Break();
}*/
switch (op >> 26)
{
case 2: //j
CompileDelaySlot(DELAYSLOT_NICE);
FlushAll();
WriteExit(targetAddr, 0);
break;
case 3: //jal
//Break();
gpr.MapReg(MIPS_REG_RA, MAP_NOINIT | MAP_DIRTY);
MOVI2R(gpr.R(MIPS_REG_RA), js.compilerPC + 8);
CompileDelaySlot(DELAYSLOT_NICE);
FlushAll();
WriteExit(targetAddr, 0);
break;
default:
_dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled");
break;
}
js.compiling = false;
}
void Jit::Comp_JumpReg(u32 op) {
if (js.inDelaySlot) {
ERROR_LOG_REPORT(JIT, "Branch in JumpReg delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
return;
}
int rs = _RS;
u32 delaySlotOp = Memory::ReadUnchecked_U32(js.compilerPC + 4);
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs);
CONDITIONAL_NICE_DELAYSLOT;
if (IsSyscall(delaySlotOp)) {
gpr.MapReg(rs);
PPCReg mRs = gpr.R(rs);
MR(FLAGREG, mRs);
MovToPC(FLAGREG); // For syscall to be able to return.
CompileDelaySlot(DELAYSLOT_FLUSH);
return; // Syscall wrote exit code.
} else if (delaySlotIsNice) {
CompileDelaySlot(DELAYSLOT_NICE);
gpr.MapReg(rs);
PPCReg mRs = gpr.R(rs);
MR(FLAGREG, mRs); // Save the destination address through the delay slot. Could use isNice to avoid when the jit is fully implemented
FlushAll();
} else {
// Delay slot
gpr.MapReg(rs);
PPCReg mRs = gpr.R(rs);
MR(FLAGREG, mRs); // Save the destination address through the delay slot. Could use isNice to avoid when the jit is fully implemented
CompileDelaySlot(DELAYSLOT_NICE);
FlushAll();
}
switch (op & 0x3f)
{
case 8: //jr
break;
case 9: //jalr
// mips->reg = js.compilerPC + 8;
//Break();
MOVI2R(SREG, js.compilerPC + 8);
STW(SREG, CTXREG, MIPS_REG_RA * 4);
break;
default:
_dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled");
break;
}
WriteExitDestInR(FLAGREG);
js.compiling = false;
}
void Jit::Comp_Syscall(u32 op) {
FlushAll();
// If we're in a delay slot, this is off by one.
const int offset = js.inDelaySlot ? -1 : 0;
WriteDownCount(offset);
js.downcountAmount = -offset;
// CallSyscall(op);
MOVI2R(R3, op);
SaveDowncount(DCNTREG);
QuickCallFunction((void *)&CallSyscall);
RestoreDowncount(DCNTREG);
WriteSyscallExit();
js.compiling = false;
}
void Jit::Comp_Break(u32 op) {
Comp_Generic(op);
WriteSyscallExit();
js.compiling = false;
}
}

View File

@ -0,0 +1,42 @@
#include "Common/ChunkFile.h"
#include "../../Core.h"
#include "../../CoreTiming.h"
#include "../MIPS.h"
#include "../MIPSCodeUtils.h"
#include "../MIPSInt.h"
#include "../MIPSTables.h"
#include "PpcRegCache.h"
#include "ppcEmitter.h"
#include "PpcJit.h"
#include <ppcintrinsics.h>
using namespace PpcGen;
extern volatile CoreState coreState;
namespace MIPSComp
{
void Jit::Comp_FPULS(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_FPUComp(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_FPU3op(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_FPU2op(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_mxc1(u32 op) {
Comp_Generic(op);
}
}

View File

@ -0,0 +1,24 @@
#include "Common/ChunkFile.h"
#include "../../Core.h"
#include "../../CoreTiming.h"
#include "../MIPS.h"
#include "../MIPSCodeUtils.h"
#include "../MIPSInt.h"
#include "../MIPSTables.h"
#include "PpcRegCache.h"
#include "ppcEmitter.h"
#include "PpcJit.h"
#include <ppcintrinsics.h>
using namespace PpcGen;
namespace MIPSComp
{
void Jit::Comp_ITypeMem(u32 op) {
Comp_Generic(op);
}
}

View File

@ -0,0 +1,139 @@
#include "Common/ChunkFile.h"
#include "../../Core.h"
#include "../../CoreTiming.h"
#include "../MIPS.h"
#include "../MIPSCodeUtils.h"
#include "../MIPSInt.h"
#include "../MIPSTables.h"
#include "PpcRegCache.h"
#include "ppcEmitter.h"
#include "PpcJit.h"
#include <ppcintrinsics.h>
using namespace PpcGen;
namespace MIPSComp
{
void Jit::Comp_SV(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_SVQ(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_VPFX(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_VVectorInit(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_VMatrixInit(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_VDot(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_VecDo3(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_VV2Op(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Mftv(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Vmtvc(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Vmmov(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_VScl(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Vmmul(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Vmscl(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Vtfm(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_VHdp(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_VCrs(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_VDet(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Vi2x(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Vx2i(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Vf2i(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Vi2f(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Vcst(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Vhoriz(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_VRot(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_VIdt(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Vcmp(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Vcmov(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Viim(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Vfim(u32 op) {
Comp_Generic(op);
}
}

View File

@ -1,76 +1,229 @@
#include "Common/ChunkFile.h"
#include "../../Core.h"
#include "../../CoreTiming.h"
#include "../MIPS.h"
#include "../MIPSCodeUtils.h"
#include "../MIPSInt.h"
#include "../MIPSTables.h"
#include "PpcRegCache.h"
#include "ppcEmitter.h"
#include "PpcJit.h"
#include <ppcintrinsics.h>
using namespace PpcGen;
extern volatile CoreState coreState;
namespace MIPSComp
{
Jit * jit=NULL;
void Jit::Comp_Generic(u32 op) {
static u32 delaySlotFlagsValue;
/** we use a flag non volatile flag (FLAGREG)r18, no need to save it **/
void Jit::CompileDelaySlot(int flags)
{
// preserve flag around the delay slot! Maybe this is not always necessary on ARM where
// we can (mostly) control whether we set the flag or not. Of course, if someone puts an slt in to the
// delay slot, we're screwed.
if (flags & DELAYSLOT_SAFE) {
// Save flags register
//Break();
MOVI2R(SREG, (u32)&delaySlotFlagsValue);
STW(FLAGREG, SREG);
}
js.inDelaySlot = true;
u32 op = Memory::Read_Instruction(js.compilerPC + 4);
MIPSCompileOp(op);
js.inDelaySlot = false;
if (flags & DELAYSLOT_FLUSH)
FlushAll();
if (flags & DELAYSLOT_SAFE) {
// Restore flags register
//Break();
MOVI2R(SREG, (u32)&delaySlotFlagsValue);
LWZ(FLAGREG, SREG);
}
}
void Jit::Compile(u32 em_address)
{
if (GetSpaceLeft() < 0x10000 || blocks.IsFull())
{
ClearCache();
}
int block_num = blocks.AllocateBlock(em_address);
JitBlock *b = blocks.GetBlock(block_num);
DoJit(em_address, b);
blocks.FinalizeBlock(block_num, jo.enableBlocklink);
// Drat. The VFPU hit an uneaten prefix at the end of a block.
if (js.startDefaultPrefix && js.MayHavePrefix())
{
js.startDefaultPrefix = false;
// Our assumptions are all wrong so it's clean-slate time.
ClearCache();
// Let's try that one more time. We won't get back here because we toggled the value.
Compile(em_address);
}
}
void Jit::MovFromPC(PPCReg r) {
LWZ(r, CTXREG, offsetof(MIPSState, pc));
}
void Jit::MovToPC(PPCReg r) {
STW(r, CTXREG, offsetof(MIPSState, pc));
}
void Jit::SaveDowncount(PPCReg r) {
STW(r, CTXREG, offsetof(MIPSState, downcount));
}
void Jit::RestoreDowncount(PPCReg r) {
LWZ(r, CTXREG, offsetof(MIPSState, downcount));
}
void Jit::WriteDownCount(int offset)
{
// don't know if the result is correct
int theDowncount = js.downcountAmount + offset;
if (jo.downcountInRegister) {
// DCNTREG = DCNTREG - theDowncount;
MOVI2R(SREG, theDowncount);
SUBF(DCNTREG, SREG, DCNTREG);
STW(DCNTREG, CTXREG, offsetof(MIPSState, downcount));
} else {
// DCNTREG = MIPSState->downcount - theDowncount;
MOVI2R(SREG, theDowncount);
LWZ(DCNTREG, CTXREG, offsetof(MIPSState, downcount));
SUBF(DCNTREG, SREG, DCNTREG);
STW(DCNTREG, CTXREG, offsetof(MIPSState, downcount));
}
}
void Jit::Comp_Generic(u32 op) {
// basic jit !!
MIPSInterpretFunc func = MIPSGetInterpretFunc(op);
if (func)
{
// Save mips PC and cycles
SaveDowncount(DCNTREG);
//// Set func param
//if (op == 0x00009021)
// Break();
MOVI2R(R3, op);
QuickCallFunction((void *)func);
// restore pc and cycles
RestoreDowncount(DCNTREG);
}
// Might have eaten prefixes, hard to tell...
if ((MIPSGetInfo(op) & IS_VFPU) != 0)
js.PrefixStart();
}
void Jit::EatInstruction(u32 op){}
void Jit::Comp_RunBlock(u32 op){}
void Jit::Comp_ITypeMem(u32 op){}
void Jit::EatInstruction(u32 op) {
u32 info = MIPSGetInfo(op);
_dbg_assert_msg_(JIT, !(info & DELAYSLOT), "Never eat a branch op.");
_dbg_assert_msg_(JIT, !js.inDelaySlot, "Never eat an instruction inside a delayslot.");
void Jit::Comp_RelBranch(u32 op){}
void Jit::Comp_RelBranchRI(u32 op){}
void Jit::Comp_FPUBranch(u32 op){}
void Jit::Comp_FPULS(u32 op){}
void Jit::Comp_FPUComp(u32 op){}
void Jit::Comp_Jump(u32 op){}
void Jit::Comp_JumpReg(u32 op){}
void Jit::Comp_Syscall(u32 op){}
void Jit::Comp_Break(u32 op){}
js.compilerPC += 4;
js.downcountAmount += MIPSGetInstructionCycleEstimate(op);
}
void Jit::Comp_IType(u32 op){}
void Jit::Comp_RType2(u32 op){}
void Jit::Comp_RType3(u32 op){}
void Jit::Comp_ShiftType(u32 op){}
void Jit::Comp_Allegrex(u32 op){}
void Jit::Comp_Allegrex2(u32 op){}
void Jit::Comp_VBranch(u32 op){}
void Jit::Comp_MulDivType(u32 op){}
void Jit::Comp_Special3(u32 op){}
void Jit::Comp_RunBlock(u32 op) {
// This shouldn't be necessary, the dispatcher should catch us before we get here.
ERROR_LOG(DYNA_REC, "Comp_RunBlock should never be reached!");
}
void Jit::Comp_FPU3op(u32 op){}
void Jit::Comp_FPU2op(u32 op){}
void Jit::Comp_mxc1(u32 op){}
void Jit::Comp_DoNothing(u32 op) {
void Jit::Comp_DoNothing(u32 op){}
}
void Jit::Comp_SV(u32 op){}
void Jit::Comp_SVQ(u32 op){}
void Jit::Comp_VPFX(u32 op){}
void Jit::Comp_VVectorInit(u32 op){}
void Jit::Comp_VMatrixInit(u32 op){}
void Jit::Comp_VDot(u32 op){}
void Jit::Comp_VecDo3(u32 op){}
void Jit::Comp_VV2Op(u32 op){}
void Jit::Comp_Mftv(u32 op){}
void Jit::Comp_Vmtvc(u32 op){}
void Jit::Comp_Vmmov(u32 op){}
void Jit::Comp_VScl(u32 op){}
void Jit::Comp_Vmmul(u32 op){}
void Jit::Comp_Vmscl(u32 op){}
void Jit::Comp_Vtfm(u32 op){}
void Jit::Comp_VHdp(u32 op){}
void Jit::Comp_VCrs(u32 op){}
void Jit::Comp_VDet(u32 op){}
void Jit::Comp_Vi2x(u32 op){}
void Jit::Comp_Vx2i(u32 op){}
void Jit::Comp_Vf2i(u32 op){}
void Jit::Comp_Vi2f(u32 op){}
void Jit::Comp_Vcst(u32 op){}
void Jit::Comp_Vhoriz(u32 op){}
void Jit::Comp_VRot(u32 op){}
void Jit::Comp_VIdt(u32 op){}
void Jit::FlushAll()
{
gpr.FlushAll();
//fpr.FlushAll();
//FlushPrefixV();
}
void Jit::ClearCache() {
blocks.Clear();
ClearCodeSpace();
GenerateFixedCode();
}
void Jit::ClearCacheAt(u32 em_address) {
ClearCache();
}
Jit::Jit(MIPSState *mips) : blocks(mips, this), gpr(mips, &jo),mips_(mips)
{
blocks.Init();
gpr.SetEmitter(this);
AllocCodeSpace(1024 * 1024 * 16); // 32MB is the absolute max because that's what an ARM branch instruction can reach, backwards and forwards.
GenerateFixedCode();
js.startDefaultPrefix = true;
}
void Jit::RunLoopUntil(u64 globalticks) {
// Run the compiled code
void Jit::Comp_Vcmp(u32 op){};
void Jit::Comp_Vcmov(u32 op){};
void Jit::Comp_Viim(u32 op){};
void Jit::Comp_Vfim(u32 op){};
INFO_LOG(HLE, "enterCode: %08p", enterCode);
_alloca(8*1024);
((void (*)())enterCode)();
}
void Jit::ClearCache(){}
void Jit::ClearCacheAt(u32 em_address){}
// IDEA - could have a WriteDualExit that takes two destinations and two condition flags,
// and just have conditional that set PC "twice". This only works when we fall back to dispatcher
// though, as we need to have the SUBS flag set in the end. So with block linking in the mix,
// I don't think this gives us that much benefit.
void Jit::WriteExit(u32 destination, int exit_num)
{
WriteDownCount();
//If nobody has taken care of this yet (this can be removed when all branches are done)
JitBlock *b = js.curBlock;
b->exitAddress[exit_num] = destination;
b->exitPtrs[exit_num] = GetWritableCodePtr();
// Link opportunity!
int block = blocks.GetBlockNumberFromStartAddress(destination);
if (block >= 0 && jo.enableBlocklink) {
// It exists! Joy of joy!
B(blocks.GetBlock(block)->checkedEntry);
b->linkStatus[exit_num] = true;
} else {
MOVI2R(SREG, destination);
B((const void *)dispatcherPCInR0);
}
}
void Jit::WriteExitDestInR(PPCReg Reg)
{
//Break();
MovToPC(Reg);
WriteDownCount();
// TODO: shouldn't need an indirect branch here...
B((const void *)dispatcher);
}
void Jit::WriteSyscallExit()
{
WriteDownCount();
B((const void *)dispatcherCheckCoreState);
}
}

View File

@ -19,88 +19,260 @@
#include "../../../Globals.h"
#include "Core/MIPS/JitCommon/JitBlockCache.h"
#include "Core/MIPS/PPC/PpcRegCache.h"
#include "Core/MIPS/MIPS.h"
#include <ppcEmitter.h>
namespace MIPSComp
{
class Jit
{
public:
// Compiled ops should ignore delay slots
// the compiler will take care of them by itself
// OR NOT
void Comp_Generic(u32 op);
void EatInstruction(u32 op);
void Comp_RunBlock(u32 op);
// TODO: Eat VFPU prefixes here.
void EatPrefix() { }
struct PpcJitOptions
{
PpcJitOptions()
{
enableBlocklink = true;
downcountInRegister = true;
}
// Ops
void Comp_ITypeMem(u32 op);
bool enableBlocklink;
bool downcountInRegister;
};
void Comp_RelBranch(u32 op);
void Comp_RelBranchRI(u32 op);
void Comp_FPUBranch(u32 op);
void Comp_FPULS(u32 op);
void Comp_FPUComp(u32 op);
void Comp_Jump(u32 op);
void Comp_JumpReg(u32 op);
void Comp_Syscall(u32 op);
void Comp_Break(u32 op);
struct PpcJitState
{
enum PrefixState
{
PREFIX_UNKNOWN = 0x00,
PREFIX_KNOWN = 0x01,
PREFIX_DIRTY = 0x10,
PREFIX_KNOWN_DIRTY = 0x11,
};
void Comp_IType(u32 op);
void Comp_RType2(u32 op);
void Comp_RType3(u32 op);
void Comp_ShiftType(u32 op);
void Comp_Allegrex(u32 op);
void Comp_Allegrex2(u32 op);
void Comp_VBranch(u32 op);
void Comp_MulDivType(u32 op);
void Comp_Special3(u32 op);
u32 compilerPC;
u32 blockStart;
bool cancel;
bool inDelaySlot;
int downcountAmount;
bool compiling; // TODO: get rid of this in favor of using analysis results to determine end of block
JitBlock *curBlock;
void Comp_FPU3op(u32 op);
void Comp_FPU2op(u32 op);
void Comp_mxc1(u32 op);
// VFPU prefix magic
bool startDefaultPrefix;
u32 prefixS;
u32 prefixT;
u32 prefixD;
PrefixState prefixSFlag;
PrefixState prefixTFlag;
PrefixState prefixDFlag;
void PrefixStart() {
if (startDefaultPrefix) {
EatPrefix();
} else {
PrefixUnknown();
}
}
void PrefixUnknown() {
prefixSFlag = PREFIX_UNKNOWN;
prefixTFlag = PREFIX_UNKNOWN;
prefixDFlag = PREFIX_UNKNOWN;
}
bool MayHavePrefix() const {
if (HasUnknownPrefix()) {
return true;
} else if (prefixS != 0xE4 || prefixT != 0xE4 || prefixD != 0) {
return true;
} else if (VfpuWriteMask() != 0) {
return true;
}
void Comp_DoNothing(u32 op);
return false;
}
bool HasUnknownPrefix() const {
if (!(prefixSFlag & PREFIX_KNOWN) || !(prefixTFlag & PREFIX_KNOWN) || !(prefixDFlag & PREFIX_KNOWN)) {
return true;
}
return false;
}
bool HasNoPrefix() const {
return (prefixDFlag & PREFIX_KNOWN) && (prefixSFlag & PREFIX_KNOWN) && (prefixTFlag & PREFIX_KNOWN) && (prefixS == 0xE4 && prefixT == 0xE4 && prefixD == 0);
}
void Comp_SV(u32 op);
void Comp_SVQ(u32 op);
void Comp_VPFX(u32 op);
void Comp_VVectorInit(u32 op);
void Comp_VMatrixInit(u32 op);
void Comp_VDot(u32 op);
void Comp_VecDo3(u32 op);
void Comp_VV2Op(u32 op);
void Comp_Mftv(u32 op);
void Comp_Vmtvc(u32 op);
void Comp_Vmmov(u32 op);
void Comp_VScl(u32 op);
void Comp_Vmmul(u32 op);
void Comp_Vmscl(u32 op);
void Comp_Vtfm(u32 op);
void Comp_VHdp(u32 op);
void Comp_VCrs(u32 op);
void Comp_VDet(u32 op);
void Comp_Vi2x(u32 op);
void Comp_Vx2i(u32 op);
void Comp_Vf2i(u32 op);
void Comp_Vi2f(u32 op);
void Comp_Vcst(u32 op);
void Comp_Vhoriz(u32 op);
void Comp_VRot(u32 op);
void Comp_VIdt(u32 op);
void Comp_Vcmp(u32 op);
void Comp_Vcmov(u32 op);
void Comp_Viim(u32 op);
void Comp_Vfim(u32 op);
void EatPrefix() {
if ((prefixSFlag & PREFIX_KNOWN) == 0 || prefixS != 0xE4) {
prefixSFlag = PREFIX_KNOWN_DIRTY;
prefixS = 0xE4;
}
if ((prefixTFlag & PREFIX_KNOWN) == 0 || prefixT != 0xE4) {
prefixTFlag = PREFIX_KNOWN_DIRTY;
prefixT = 0xE4;
}
if ((prefixDFlag & PREFIX_KNOWN) == 0 || prefixD != 0x0 || VfpuWriteMask() != 0) {
prefixDFlag = PREFIX_KNOWN_DIRTY;
prefixD = 0x0;
}
}
u8 VfpuWriteMask() const {
_assert_(prefixDFlag & PREFIX_KNOWN);
return (prefixD >> 8) & 0xF;
}
bool VfpuWriteMask(int i) const {
_assert_(prefixDFlag & PREFIX_KNOWN);
return (prefixD >> (8 + i)) & 1;
}
};
void ClearCache();
void ClearCacheAt(u32 em_address);
};
typedef void (Jit::*MIPSCompileFunc)(u32 opcode);
enum CompileDelaySlotFlags
{
// Easy, nothing extra.
DELAYSLOT_NICE = 0,
// Flush registers after delay slot.
DELAYSLOT_FLUSH = 1,
// Preserve flags.
DELAYSLOT_SAFE = 2,
// Flush registers after and preserve flags.
DELAYSLOT_SAFE_FLUSH = DELAYSLOT_FLUSH | DELAYSLOT_SAFE,
};
class Jit: public PpcGen::PPCXCodeBlock
{
protected:
JitBlockCache blocks;
public:
Jit(MIPSState *mips);
// Compiled ops should ignore delay slots
// the compiler will take care of them by itself
// OR NOT
void Comp_Generic(u32 op);
void EatInstruction(u32 op);
void Comp_RunBlock(u32 op);
// TODO: Eat VFPU prefixes here.
void EatPrefix() { }
// Ops
void Comp_ITypeMem(u32 op);
void Comp_RelBranch(u32 op);
void Comp_RelBranchRI(u32 op);
void Comp_FPUBranch(u32 op);
void Comp_FPULS(u32 op);
void Comp_FPUComp(u32 op);
void Comp_Jump(u32 op);
void Comp_JumpReg(u32 op);
void Comp_Syscall(u32 op);
void Comp_Break(u32 op);
void Comp_IType(u32 op);
void Comp_RType2(u32 op);
void Comp_RType3(u32 op);
void Comp_ShiftType(u32 op);
void Comp_Allegrex(u32 op);
void Comp_Allegrex2(u32 op);
void Comp_VBranch(u32 op);
void Comp_MulDivType(u32 op);
void Comp_Special3(u32 op);
void Comp_FPU3op(u32 op);
void Comp_FPU2op(u32 op);
void Comp_mxc1(u32 op);
void Comp_DoNothing(u32 op);
void Comp_SV(u32 op);
void Comp_SVQ(u32 op);
void Comp_VPFX(u32 op);
void Comp_VVectorInit(u32 op);
void Comp_VMatrixInit(u32 op);
void Comp_VDot(u32 op);
void Comp_VecDo3(u32 op);
void Comp_VV2Op(u32 op);
void Comp_Mftv(u32 op);
void Comp_Vmtvc(u32 op);
void Comp_Vmmov(u32 op);
void Comp_VScl(u32 op);
void Comp_Vmmul(u32 op);
void Comp_Vmscl(u32 op);
void Comp_Vtfm(u32 op);
void Comp_VHdp(u32 op);
void Comp_VCrs(u32 op);
void Comp_VDet(u32 op);
void Comp_Vi2x(u32 op);
void Comp_Vx2i(u32 op);
void Comp_Vf2i(u32 op);
void Comp_Vi2f(u32 op);
void Comp_Vcst(u32 op);
void Comp_Vhoriz(u32 op);
void Comp_VRot(u32 op);
void Comp_VIdt(u32 op);
void Comp_Vcmp(u32 op);
void Comp_Vcmov(u32 op);
void Comp_Viim(u32 op);
void Comp_Vfim(u32 op);
// Utility compilation functions
void BranchFPFlag(u32 op, PpcGen::FixupBranchType cc, bool likely);
void BranchVFPUFlag(u32 op, PpcGen::FixupBranchType cc, bool likely);
void BranchRSZeroComp(u32 op, PpcGen::FixupBranchType cc, bool andLink, bool likely);
void BranchRSRTComp(u32 op, PpcGen::FixupBranchType cc, bool likely);
// flush regs
void FlushAll();
void WriteDownCount(int offset = 0);
void MovFromPC(PpcGen::PPCReg r);
void MovToPC(PpcGen::PPCReg r);
void SaveDowncount(PpcGen::PPCReg r);
void RestoreDowncount(PpcGen::PPCReg r);
void WriteExit(u32 destination, int exit_num);
void WriteExitDestInR(PPCReg Reg);
void WriteSyscallExit();
void ClearCache();
void ClearCacheAt(u32 em_address);
void RunLoopUntil(u64 globalticks);
void GenerateFixedCode();
void DumpJit();
void CompileDelaySlot(int flags);
void Compile(u32 em_address); // Compiles a block at current MIPS PC
const u8 *DoJit(u32 em_address, JitBlock *b);
PpcJitOptions jo;
PpcJitState js;
PpcRegCache gpr;
//PpcRegCacheFPU fpr;
MIPSState *mips_;
JitBlockCache *GetBlockCache() { return &blocks; }
public:
// Code pointers
const u8 *enterCode;
const u8 *outerLoop;
const u8 *outerLoopPCInR0;
const u8 *dispatcherCheckCoreState;
const u8 *dispatcherPCInR0;
const u8 *dispatcher;
const u8 *dispatcherNoCheck;
const u8 *breakpointBailout;
};
typedef void (Jit::*MIPSCompileFunc)(u32 opcode);
} // namespace MIPSComp

View File

@ -0,0 +1,317 @@
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include <PpcEmitter.h>
#include "PpcRegCache.h"
#include "PpcJit.h"
#if defined(MAEMO)
#include "stddef.h"
#endif
using namespace PpcGen;
PpcRegCache::PpcRegCache(MIPSState *mips, MIPSComp::PpcJitOptions *options) : mips_(mips), options_(options) {
}
void PpcRegCache::Init(PPCXEmitter *emitter) {
emit_ = emitter;
}
void PpcRegCache::Start(MIPSAnalyst::AnalysisResults &stats) {
for (int i = 0; i < NUM_PPCREG; i++) {
ar[i].mipsReg = -1;
ar[i].isDirty = false;
}
for (int i = 0; i < NUM_MIPSREG; i++) {
mr[i].loc = ML_MEM;
mr[i].reg = INVALID_REG;
mr[i].imm = -1;
mr[i].spillLock = false;
}
}
const PPCReg *PpcRegCache::GetMIPSAllocationOrder(int &count) {
// Note that R0 is reserved as scratch for now.
// R1 could be used as it's only used for scratch outside "regalloc space" now.
// R12 is also potentially usable.
// R4-R7 are registers we could use for static allocation or downcount.
// R8 is used to preserve flags in nasty branches.
// R9 and upwards are reserved for jit basics.
if (options_->downcountInRegister) {
static const PPCReg allocationOrder[] = {
/*R14, R15, R16, R17, R18, */R19,
R20, R21, R22, R23, R24, R25,
R26, R27, R28, R29, R30, R31,
};
count = sizeof(allocationOrder) / sizeof(const int);
return allocationOrder;
} else {
static const PPCReg allocationOrder2[] = {
/*R14, R15, R16, R17, R18,*/ R19,
R20, R21, R22, R23, R24, R25,
R26, R27, R28, R29, R30, R31,
};
count = sizeof(allocationOrder2) / sizeof(const int);
return allocationOrder2;
}
}
void PpcRegCache::FlushBeforeCall() {
// R4-R11 are preserved. Others need flushing.
/*
FlushPpcReg(R2);
FlushPpcReg(R3);
FlushPpcReg(R12);
*/
}
// TODO: Somewhat smarter spilling - currently simply spills the first available, should do
// round robin or FIFO or something.
PPCReg PpcRegCache::MapReg(MIPSReg mipsReg, int mapFlags) {
// Let's see if it's already mapped. If so we just need to update the dirty flag.
// We don't need to check for ML_NOINIT because we assume that anyone who maps
// with that flag immediately writes a "known" value to the register.
if (mr[mipsReg].loc == ML_PPCREG) {
if (ar[mr[mipsReg].reg].mipsReg != mipsReg) {
ERROR_LOG(HLE, "Register mapping out of sync! %i", mipsReg);
}
if (mapFlags & MAP_DIRTY) {
ar[mr[mipsReg].reg].isDirty = true;
}
return (PPCReg)mr[mipsReg].reg;
}
// Okay, not mapped, so we need to allocate an ARM register.
int allocCount;
const PPCReg *allocOrder = GetMIPSAllocationOrder(allocCount);
allocate:
for (int i = 0; i < allocCount; i++) {
int reg = allocOrder[i];
if (ar[reg].mipsReg == -1) {
// That means it's free. Grab it, and load the value into it (if requested).
ar[reg].isDirty = (mapFlags & MAP_DIRTY) ? true : false;
if (!(mapFlags & MAP_NOINIT)) {
if (mr[mipsReg].loc == ML_MEM) {
if (mipsReg != 0) {
emit_->LWZ((PPCReg)reg, CTXREG, GetMipsRegOffset(mipsReg));
} else {
// If we get a request to load the zero register, at least we won't spend
// time on a memory access...
emit_->MOVI2R((PPCReg)reg, 0);
}
} else if (mr[mipsReg].loc == ML_IMM) {
emit_->MOVI2R((PPCReg)reg, mr[mipsReg].imm);
ar[reg].isDirty = true; // IMM is always dirty.
}
}
ar[reg].mipsReg = mipsReg;
mr[mipsReg].loc = ML_PPCREG;
mr[mipsReg].reg = (PPCReg)reg;
return (PPCReg)reg;
}
}
// Still nothing. Let's spill a reg and goto 10.
// TODO: Use age or something to choose which register to spill?
// TODO: Spill dirty regs first? or opposite?
int bestToSpill = -1;
for (int i = 0; i < allocCount; i++) {
int reg = allocOrder[i];
if (ar[reg].mipsReg != -1 && mr[ar[reg].mipsReg].spillLock)
continue;
bestToSpill = reg;
break;
}
if (bestToSpill != -1) {
// ERROR_LOG(JIT, "Out of registers at PC %08x - spills register %i.", mips_->pc, bestToSpill);
FlushPpcReg((PPCReg)bestToSpill);
goto allocate;
}
// Uh oh, we have all them spilllocked....
ERROR_LOG(JIT, "Out of spillable registers at PC %08x!!!", mips_->pc);
return INVALID_REG;
}
void PpcRegCache::MapInIn(MIPSReg rd, MIPSReg rs) {
SpillLock(rd, rs);
MapReg(rd);
MapReg(rs);
ReleaseSpillLocks();
}
void PpcRegCache::MapDirtyIn(MIPSReg rd, MIPSReg rs, bool avoidLoad) {
SpillLock(rd, rs);
bool load = !avoidLoad || rd == rs;
MapReg(rd, MAP_DIRTY | (load ? 0 : MAP_NOINIT));
MapReg(rs);
ReleaseSpillLocks();
}
void PpcRegCache::MapDirtyInIn(MIPSReg rd, MIPSReg rs, MIPSReg rt, bool avoidLoad) {
SpillLock(rd, rs, rt);
bool load = !avoidLoad || (rd == rs || rd == rt);
MapReg(rd, MAP_DIRTY | (load ? 0 : MAP_NOINIT));
MapReg(rt);
MapReg(rs);
ReleaseSpillLocks();
}
void PpcRegCache::MapDirtyDirtyInIn(MIPSReg rd1, MIPSReg rd2, MIPSReg rs, MIPSReg rt, bool avoidLoad) {
SpillLock(rd1, rd2, rs, rt);
bool load1 = !avoidLoad || (rd1 == rs || rd1 == rt);
bool load2 = !avoidLoad || (rd2 == rs || rd2 == rt);
MapReg(rd1, MAP_DIRTY | (load1 ? 0 : MAP_NOINIT));
MapReg(rd2, MAP_DIRTY | (load2 ? 0 : MAP_NOINIT));
MapReg(rt);
MapReg(rs);
ReleaseSpillLocks();
}
void PpcRegCache::FlushPpcReg(PPCReg r) {
if (ar[r].mipsReg == -1) {
// Nothing to do, reg not mapped.
return;
}
if (ar[r].mipsReg != -1) {
if (ar[r].isDirty && mr[ar[r].mipsReg].loc == ML_PPCREG)
emit_->STW(r, CTXREG, GetMipsRegOffset(ar[r].mipsReg));
// IMMs won't be in an ARM reg.
mr[ar[r].mipsReg].loc = ML_MEM;
mr[ar[r].mipsReg].reg = INVALID_REG;
mr[ar[r].mipsReg].imm = 0;
} else {
ERROR_LOG(HLE, "Dirty but no mipsreg?");
}
ar[r].isDirty = false;
ar[r].mipsReg = -1;
}
void PpcRegCache::FlushR(MIPSReg r) {
switch (mr[r].loc) {
case ML_IMM:
// IMM is always "dirty".
emit_->MOVI2R(SREG, mr[r].imm);
emit_->STW(SREG, CTXREG, GetMipsRegOffset(r));
break;
case ML_PPCREG:
if (mr[r].reg == INVALID_REG) {
ERROR_LOG(HLE, "FlushMipsReg: MipsReg had bad PpcReg");
}
if (ar[mr[r].reg].isDirty) {
emit_->STW((PPCReg)mr[r].reg, CTXREG, GetMipsRegOffset(r));
ar[mr[r].reg].isDirty = false;
}
ar[mr[r].reg].mipsReg = -1;
break;
case ML_MEM:
// Already there, nothing to do.
break;
default:
//BAD
break;
}
mr[r].loc = ML_MEM;
mr[r].reg = INVALID_REG;
mr[r].imm = 0;
}
void PpcRegCache::FlushAll() {
for (int i = 0; i < NUM_MIPSREG; i++) {
FlushR(i);
}
// Sanity check
for (int i = 0; i < NUM_PPCREG; i++) {
if (ar[i].mipsReg != -1) {
ERROR_LOG(JIT, "Flush fail: ar[%i].mipsReg=%i", i, ar[i].mipsReg);
}
}
}
void PpcRegCache::SetImm(MIPSReg r, u32 immVal) {
if (r == 0)
ERROR_LOG(JIT, "Trying to set immediate %08x to r0", immVal);
// Zap existing value if cached in a reg
if (mr[r].loc == ML_PPCREG) {
ar[mr[r].reg].mipsReg = -1;
ar[mr[r].reg].isDirty = false;
}
mr[r].loc = ML_IMM;
mr[r].imm = immVal;
mr[r].reg = INVALID_REG;
}
bool PpcRegCache::IsImm(MIPSReg r) const {
if (r == 0) return true;
return mr[r].loc == ML_IMM;
}
u32 PpcRegCache::GetImm(MIPSReg r) const {
if (r == 0) return 0;
if (mr[r].loc != ML_IMM) {
ERROR_LOG(JIT, "Trying to get imm from non-imm register %i", r);
}
return mr[r].imm;
}
int PpcRegCache::GetMipsRegOffset(MIPSReg r) {
if (r < 32)
return r * 4;
switch (r) {
case MIPSREG_HI:
return offsetof(MIPSState, hi);
case MIPSREG_LO:
return offsetof(MIPSState, lo);
}
ERROR_LOG(JIT, "bad mips register %i", r);
return 0; // or what?
}
void PpcRegCache::SpillLock(MIPSReg r1, MIPSReg r2, MIPSReg r3, MIPSReg r4) {
mr[r1].spillLock = true;
if (r2 != -1) mr[r2].spillLock = true;
if (r3 != -1) mr[r3].spillLock = true;
if (r4 != -1) mr[r4].spillLock = true;
}
void PpcRegCache::ReleaseSpillLocks() {
for (int i = 0; i < NUM_MIPSREG; i++) {
mr[i].spillLock = false;
}
}
void PpcRegCache::ReleaseSpillLock(MIPSReg reg) {
mr[reg].spillLock = false;
}
PPCReg PpcRegCache::R(int mipsReg) {
if (mr[mipsReg].loc == ML_PPCREG) {
return (PPCReg)mr[mipsReg].reg;
} else {
ERROR_LOG(JIT, "Reg %i not in ppc reg. compilerPC = %08x", mipsReg, compilerPC_);
return INVALID_REG; // BAAAD
}
}

156
Core/MIPS/PPC/PpcRegCache.h Normal file
View File

@ -0,0 +1,156 @@
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
/**
PPC reg cache based on arm version
**/
#pragma once
#include "../MIPS.h"
#include "../MIPSAnalyst.h"
#include "ppcEmitter.h"
using namespace PpcGen;
// R2 to R8: mapped MIPS regs
// R9 = code pointers
// R10 = MIPS context
// R11 = base pointer
// R18 to R31: mapped MIPS regs
// R14 = MIPS context
// R15 = downcount register
// R16 = code pointer
// R17 = base pointer
#if 1
#define CTXREG (R14)
#define DCNTREG (R15)
#define CODEREG (R16)
#define BASEREG (R17)
#else
#define CTXREG (R6)
#define DCNTREG (R7)
#define CODEREG (R8)
#define BASEREG (R9)
#endif
// Safe to use this as scratch regs ?
#define SREG (R5)
#define FLAGREG (R18)
// Special MIPS registers:
enum {
MIPSREG_HI = 32,
MIPSREG_LO = 33,
TOTAL_MAPPABLE_MIPSREGS = 34,
};
typedef int MIPSReg;
struct RegPPC {
int mipsReg; // if -1, no mipsreg attached.
bool isDirty; // Should the register be written back?
};
enum RegMIPSLoc {
ML_IMM,
ML_PPCREG,
ML_MEM,
};
struct RegMIPS {
// Where is this MIPS register?
RegMIPSLoc loc;
// Data (only one of these is used, depending on loc. Could make a union).
u32 imm;
PPCReg reg; // reg index
bool spillLock; // if true, this register cannot be spilled.
// If loc == ML_MEM, it's back in its location in the CPU context struct.
};
#undef MAP_DIRTY
#undef MAP_NOINIT
// Initing is the default so the flag is reversed.
enum {
MAP_DIRTY = 1,
MAP_NOINIT = 2,
};
namespace MIPSComp {
struct PpcJitOptions;
}
class PpcRegCache
{
public:
PpcRegCache(MIPSState *mips, MIPSComp::PpcJitOptions *options);
~PpcRegCache() {}
void Init(PPCXEmitter *emitter);
void Start(MIPSAnalyst::AnalysisResults &stats);
// Protect the arm register containing a MIPS register from spilling, to ensure that
// it's being kept allocated.
void SpillLock(MIPSReg reg, MIPSReg reg2 = -1, MIPSReg reg3 = -1, MIPSReg reg4 = -1);
void ReleaseSpillLock(MIPSReg reg);
void ReleaseSpillLocks();
void SetImm(MIPSReg reg, u32 immVal);
bool IsImm(MIPSReg reg) const;
u32 GetImm(MIPSReg reg) const;
// Returns an ARM register containing the requested MIPS register.
PPCReg MapReg(MIPSReg reg, int mapFlags = 0);
void MapInIn(MIPSReg rd, MIPSReg rs);
void MapDirtyIn(MIPSReg rd, MIPSReg rs, bool avoidLoad = true);
void MapDirtyInIn(MIPSReg rd, MIPSReg rs, MIPSReg rt, bool avoidLoad = true);
void MapDirtyDirtyInIn(MIPSReg rd1, MIPSReg rd2, MIPSReg rs, MIPSReg rt, bool avoidLoad = true);
void FlushPpcReg(PPCReg r);
void FlushR(MIPSReg r);
void FlushBeforeCall();
void FlushAll();
PPCReg R(int preg); // Returns a cached register
void SetEmitter(PPCXEmitter *emitter) { emit_ = emitter; }
// For better log output only.
void SetCompilerPC(u32 compilerPC) { compilerPC_ = compilerPC; }
int GetMipsRegOffset(MIPSReg r);
private:
const PPCReg *GetMIPSAllocationOrder(int &count);
MIPSState *mips_;
MIPSComp::PpcJitOptions *options_;
PPCXEmitter *emit_;
u32 compilerPC_;
enum {
NUM_PPCREG = 32,
NUM_MIPSREG = TOTAL_MAPPABLE_MIPSREGS,
};
RegPPC ar[NUM_MIPSREG];
RegMIPS mr[NUM_MIPSREG];
};