From 9431c811a7cbce83919987329c2710f3f631aa4d Mon Sep 17 00:00:00 2001 From: Ced2911 Date: Sat, 10 Aug 2013 16:38:09 +0200 Subject: [PATCH] broken ppc jit --- Core/MIPS/PPC/PpcAsm.cpp | 346 ++++++++++++++++++++++ Core/MIPS/PPC/PpcCompAlu.cpp | 54 ++++ Core/MIPS/PPC/PpcCompBranch.cpp | 452 +++++++++++++++++++++++++++++ Core/MIPS/PPC/PpcCompFpu.cpp | 42 +++ Core/MIPS/PPC/PpcCompLoadStore.cpp | 24 ++ Core/MIPS/PPC/PpcCompVFPU.cpp | 139 +++++++++ Core/MIPS/PPC/PpcJit.cpp | 271 +++++++++++++---- Core/MIPS/PPC/PpcJit.h | 314 +++++++++++++++----- Core/MIPS/PPC/PpcRegCache.cpp | 317 ++++++++++++++++++++ Core/MIPS/PPC/PpcRegCache.h | 156 ++++++++++ 10 files changed, 1985 insertions(+), 130 deletions(-) create mode 100644 Core/MIPS/PPC/PpcAsm.cpp create mode 100644 Core/MIPS/PPC/PpcCompAlu.cpp create mode 100644 Core/MIPS/PPC/PpcCompBranch.cpp create mode 100644 Core/MIPS/PPC/PpcCompFpu.cpp create mode 100644 Core/MIPS/PPC/PpcCompLoadStore.cpp create mode 100644 Core/MIPS/PPC/PpcCompVFPU.cpp create mode 100644 Core/MIPS/PPC/PpcRegCache.cpp create mode 100644 Core/MIPS/PPC/PpcRegCache.h diff --git a/Core/MIPS/PPC/PpcAsm.cpp b/Core/MIPS/PPC/PpcAsm.cpp new file mode 100644 index 000000000..19b245f0e --- /dev/null +++ b/Core/MIPS/PPC/PpcAsm.cpp @@ -0,0 +1,346 @@ +#include "Common/ChunkFile.h" +#include "../../Core.h" +#include "../../CoreTiming.h" +#include "../MIPS.h" +#include "../MIPSCodeUtils.h" +#include "../MIPSInt.h" +#include "../MIPSTables.h" + +#include "PpcRegCache.h" +#include "ppcEmitter.h" +#include "PpcJit.h" + +#include + +using namespace PpcGen; + +extern volatile CoreState coreState; + +static void JitAt() +{ + MIPSComp::jit->Compile(currentMIPS->pc); +} + +namespace MIPSComp +{ + //Jit * jit=NULL; + +static int dontLogBlocks = 20; +static int logBlocks = 40; + +const u8 *Jit::DoJit(u32 em_address, JitBlock *b) +{ + NOTICE_LOG(CPU, "DoJit %08x - %08x\n", mips_->pc, mips_->downcount); + + + js.cancel = false; + js.blockStart = js.compilerPC = mips_->pc; + js.downcountAmount = 0; + js.curBlock = b; + js.compiling = true; + js.inDelaySlot = false; + js.PrefixStart(); + + // We add a check before the block, used when entering from a linked block. + b->checkedEntry = GetCodePtr(); + // Downcount flag check. The last block decremented downcounter, and the flag should still be available. + //SetCC(CC_LT); + + + MOVI2R(SREG, js.blockStart); + + //Break(); + + // Cmp ?? + //CMPLI(DCNTREG, 0); + //BLT((const void *)outerLoopPCInR0); + // if (currentMIPS->downcount<0) + BGT((const void *)outerLoopPCInR0); + + b->normalEntry = GetCodePtr(); + // TODO: this needs work + MIPSAnalyst::AnalysisResults analysis; // = MIPSAnalyst::Analyze(em_address); + + gpr.Start(analysis); + //fpr.Start(analysis); + + int numInstructions = 0; + int cycles = 0; + int partialFlushOffset = 0; + if (logBlocks > 0) logBlocks--; + if (dontLogBlocks > 0) dontLogBlocks--; + +// #define LOGASM +#ifdef LOGASM + char temp[256]; +#endif + while (js.compiling) + { + gpr.SetCompilerPC(js.compilerPC); // Let it know for log messages + //fpr.SetCompilerPC(js.compilerPC); + u32 inst = Memory::Read_Instruction(js.compilerPC); + js.downcountAmount += MIPSGetInstructionCycleEstimate(inst); + + MIPSCompileOp(inst); + + js.compilerPC += 4; + numInstructions++; + /* + if (!cpu_info.bArmV7 && (GetCodePtr() - b->checkedEntry - partialFlushOffset) > 4020) + { + // We need to prematurely flush as we are out of range + FixupBranch skip = B_CC(CC_AL); + FlushLitPool(); + SetJumpTarget(skip); + partialFlushOffset = GetCodePtr() - b->checkedEntry; + } + */ + } + //FlushLitPool(); +#ifdef LOGASM + if (logBlocks > 0 && dontLogBlocks == 0) { + for (u32 cpc = em_address; cpc != js.compilerPC + 4; cpc += 4) { + MIPSDisAsm(Memory::Read_Instruction(cpc), cpc, temp, true); + INFO_LOG(DYNA_REC, "M: %08x %s", cpc, temp); + } + } +#endif + + b->codeSize = GetCodePtr() - b->normalEntry; + +#ifdef LOGASM + if (logBlocks > 0 && dontLogBlocks == 0) { + INFO_LOG(DYNA_REC, "=============== ARM ==============="); + DisassembleArm(b->normalEntry, GetCodePtr() - b->normalEntry); + } +#endif + + + //printf("DoJitend %08x - %08x - %08x\n", mips_->pc, mips_->downcount, js.compilerPC); + + DumpJit(); + + AlignCode16(); + + // Don't forget to zap the instruction cache! + FlushIcache(); + + b->originalSize = numInstructions; + return b->normalEntry; +} + +void Jit::DumpJit() { + u32 len = (u32)GetCodePtr() - (u32)GetBasePtr(); + FILE * fd; + fd = fopen("game:\\jit.bin", "wb"); + fwrite(GetBasePtr(), len, 1, fd); + fclose(fd); +} + +void Jit::GenerateFixedCode() { + enterCode = AlignCode16(); + + INFO_LOG(HLE, "Base: %08x", (u32)Memory::base); + INFO_LOG(HLE, "enterCode: 0x%08p", enterCode); + INFO_LOG(HLE, "GetBasePtr: 0x%08p", GetBasePtr()); + +#if 1 + // Write Prologue (setup stack frame etc ...) + // Save Lr + MFLR(R12); + + // Save regs + u32 regSize = 8; // 4 in 32bit system + u32 stackFrameSize = 32*32;//(35 - 12) * regSize; + + for(int i = 14; i < 32; i ++) { + STD((PPCReg)i, R1, -((33 - i) * regSize)); + } + + // Save r12 + STW(R12, R1, -0x8); + + // allocate stack + STWU(R1, R1, -stackFrameSize); +#endif + + // Map fixed register + MOVI2R(BASEREG, (u32)Memory::base); + MOVI2R(CTXREG, (u32)mips_); + MOVI2R(CODEREG, (u32)GetBasePtr()); + + // Update downcount reg value from memory + RestoreDowncount(DCNTREG); + + // SREG = mips->pc + MovFromPC(SREG); + + // Keep current location, TODO rename it, outerLoopPCInR0 to outerLoopPCInR3 ?? + outerLoopPCInR0 = GetCodePtr(); + + // mips->pc = SREG + MovToPC(SREG); + + // Keep current location + outerLoop = GetCodePtr(); + + // Jit loop + // { + // Save downcount reg value to memory + SaveDowncount(DCNTREG); + // Call CoreTiming::Advance() => update donwcount + QuickCallFunction((void *)&CoreTiming::Advance); + // Update downcount reg value from memory + RestoreDowncount(DCNTREG); + + // branch to skipToRealDispatch + FixupBranch skipToRealDispatch = B(); //skip the sync and compare first time + + // Keep current location dispatcherCheckCoreState: + dispatcherCheckCoreState = GetCodePtr(); + + // The result of slice decrementation should be in flags if somebody jumped here + // IMPORTANT - We jump on negative, not carry!!! + // branch to bailCoreState: (jump if(what ??) negative ) + //FixupBranch bailCoreState = B_CC(CC_MI); // BLT ??? + + FixupBranch bailCoreState = BLT(); // BLT ??? + + // SREG = coreState + MOVI2R(SREG, (u32)&coreState); + // ??? Compare coreState and CORE_RUNNING + LWZ(SREG, SREG); // SREG = *SREG + CMPLI(SREG, 0); // compare 0(CORE_RUNNING) and CR0 + + // branch to badCoreState: (jump if coreState != CORE_RUNNING) + FixupBranch badCoreState = BNE(); // B_CC(CC_NEQ) + + // branch to skipToRealDispatch2: + FixupBranch skipToRealDispatch2 = B(); //skip the sync and compare first time + + // Keep current location, TODO rename it, outerLoopPCInR0 to outerLoopPCInSREG ?? + dispatcherPCInR0 = GetCodePtr(); + + // mips->pc = SREG + MovToPC(SREG); + + // At this point : flags = EQ. Fine for the next check, no need to jump over it. + // label dispatcher: + dispatcher = GetCodePtr(); + + // { + // The result of slice decrementation should be in flags if somebody jumped here + // IMPORTANT - We jump on negative, not carry!!! + // label bail: + // arm B_CC(CC_MI); + FixupBranch bail = BLT(); + + // label skipToRealDispatch: + SetJumpTarget(skipToRealDispatch); + + // label skipToRealDispatch2: + SetJumpTarget(skipToRealDispatch2); + + // Keep current location + dispatcherNoCheck = GetCodePtr(); + + // read op + // R3 = mips->pc & Memory::MEMVIEW32_MASK + LWZ(R3, CTXREG, offsetof(MIPSState, pc)); + MOVI2R(SREG, Memory::MEMVIEW32_MASK); + AND(R3, R3, SREG); + + // R3 = memory::base[r3]; + ADD(R3, BASEREG, R3); + MOVI2R(R0, 0); + LWBRX(R3, R3, R0); // R3 = op now ! + + // R4 = R3 & MIPS_EMUHACK_VALUE_MASK + MOVI2R(SREG, MIPS_EMUHACK_VALUE_MASK); + AND(R4, R3, SREG); + + // R3 = R3 & MIPS_EMUHACK_MASK + ANDIS(R3, R3, (MIPS_EMUHACK_MASK>>16)); + + // compare, op == MIPS_EMUHACK_OPCODE + MOVI2R(SREG, MIPS_EMUHACK_OPCODE); + CMP(R3, SREG); + + // Branch if func block not found + FixupBranch notfound = BNE(); + + // { + // R3 = R4 + GetBasePtr() + ADD(R3, R4, CODEREG); + + MTCTR(R3); + BCTR(); + // } + + // label notfound: + SetJumpTarget(notfound); + + //Ok, no block, let's jit + // Save downcount reg value to memory + SaveDowncount(DCNTREG); + + // Exec JitAt => Compile block ! + QuickCallFunction((void *)&JitAt); + + // Update downcount reg value from memory + RestoreDowncount(DCNTREG); + + // branch to dispatcherNoCheck: + B(dispatcherNoCheck); // no point in special casing this + // } + + // label bail: + SetJumpTarget(bail); + + // label bailCoreState: + SetJumpTarget(bailCoreState); + + // Compare coreState and CORE_RUNNING + MOVI2R(SREG, (u32)&coreState); + LWZ(SREG, SREG); // SREG = *SREG => SREG = coreState + CMPLI(SREG, 0); // compare 0(CORE_RUNNING) and corestate + + // branch to outerLoop if (coreState == CORE_RUNNING) + // arm: B_CC(CC_EQ, outerLoop); + //Break(); + BEQ(outerLoop); + // } + + // badCoreState label: + SetJumpTarget(badCoreState); + + // Keep current location + breakpointBailout = GetCodePtr(); + + // mips->downcount = DCNTREG + SaveDowncount(DCNTREG); + +#if 1 + // Write Epilogue (restore stack frame, return) + // free stack + ADDI(R1, R1, stackFrameSize); + + // Restore regs + for(int i = 14; i < 32; i ++) { + LD((PPCReg)i, R1, -((33 - i) * regSize)); + } + + // recover r12 (LR saved register) + LWZ (R12, R1, -0x8); + + // Restore Lr + MTLR(R12); + + //BLR(); +#endif + + // Don't forget to zap the instruction cache! + FlushIcache(); +} + +} \ No newline at end of file diff --git a/Core/MIPS/PPC/PpcCompAlu.cpp b/Core/MIPS/PPC/PpcCompAlu.cpp new file mode 100644 index 000000000..c470b3d35 --- /dev/null +++ b/Core/MIPS/PPC/PpcCompAlu.cpp @@ -0,0 +1,54 @@ +#include "Common/ChunkFile.h" +#include "../../Core.h" +#include "../../CoreTiming.h" +#include "../MIPS.h" +#include "../MIPSCodeUtils.h" +#include "../MIPSInt.h" +#include "../MIPSTables.h" + +#include "PpcRegCache.h" +#include "ppcEmitter.h" +#include "PpcJit.h" + +#include + +using namespace PpcGen; + +extern volatile CoreState coreState; + +namespace MIPSComp +{ + +void Jit::Comp_IType(u32 op) { + Comp_Generic(op); +} + +void Jit::Comp_RType2(u32 op) { + Comp_Generic(op); +} + +void Jit::Comp_RType3(u32 op) { + Comp_Generic(op); +} + +void Jit::Comp_ShiftType(u32 op) { + Comp_Generic(op); +} + +void Jit::Comp_Allegrex(u32 op) { + Comp_Generic(op); +} + +void Jit::Comp_Allegrex2(u32 op) { + Comp_Generic(op); +} + +void Jit::Comp_MulDivType(u32 op) { + Comp_Generic(op); +} + +void Jit::Comp_Special3(u32 op) { + Comp_Generic(op); +} + +} \ No newline at end of file diff --git a/Core/MIPS/PPC/PpcCompBranch.cpp b/Core/MIPS/PPC/PpcCompBranch.cpp new file mode 100644 index 000000000..f797e46a2 --- /dev/null +++ b/Core/MIPS/PPC/PpcCompBranch.cpp @@ -0,0 +1,452 @@ +#include "Common/ChunkFile.h" +#include "../../Core.h" +#include "../../CoreTiming.h" +#include "../MIPS.h" +#include "../MIPSCodeUtils.h" +#include "../MIPSInt.h" +#include "../MIPSTables.h" + +#include "Core/Reporting.h" +#include "Core/HLE/HLE.h" + +#include "PpcRegCache.h" +#include "ppcEmitter.h" +#include "PpcJit.h" + +#include + + +#define _RS ((op>>21) & 0x1F) +#define _RT ((op>>16) & 0x1F) +#define _RD ((op>>11) & 0x1F) +#define _FS ((op>>11) & 0x1F) +#define _FT ((op>>16) & 0x1F) +#define _FD ((op>>6 ) & 0x1F) +#define _POS ((op>>6 ) & 0x1F) +#define _SIZE ((op>>11 ) & 0x1F) + +#define LOOPOPTIMIZATION 0 + +// We can disable nice delay slots. +#define CONDITIONAL_NICE_DELAYSLOT delaySlotIsNice = false; +// #define CONDITIONAL_NICE_DELAYSLOT ; + +#define SHOW_JS_COMPILER_PC { printf("js.compilerPC: %08x\n", js.compilerPC); } + +#define BRANCH_COMPILE_LOG { printf("JIT(%8x): %s => %d - %08x\n", (u32)GetCodePtr() ,__FUNCTION__, cc, js.compilerPC); } + +using namespace MIPSAnalyst; + +using namespace PpcGen; + +namespace MIPSComp +{ + +void Jit::BranchRSRTComp(u32 op, PpcGen::FixupBranchType cc, bool likely) +{ + if (js.inDelaySlot) { + ERROR_LOG_REPORT(JIT, "Branch in RSRTComp delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart); + return; + } + int offset = (signed short)(op&0xFFFF)<<2; + int rt = _RT; + int rs = _RS; + u32 targetAddr = js.compilerPC + offset + 4; + + u32 delaySlotOp = Memory::ReadUnchecked_U32(js.compilerPC+4); + bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rt, rs); + CONDITIONAL_NICE_DELAYSLOT; + if (!likely && delaySlotIsNice) + CompileDelaySlot(DELAYSLOT_NICE); + + if (gpr.IsImm(rt) && gpr.GetImm(rt) == 0) + { + gpr.MapReg(rs); + CMPLI(gpr.R(rs), 0); + } + else if (gpr.IsImm(rs) && gpr.GetImm(rs) == 0 && (cc == _BEQ || cc == _BNE)) // only these are easily 'flippable' + { + gpr.MapReg(rt); + CMPLI(gpr.R(rt),0); + } + else + { + gpr.MapInIn(rs, rt); + CMP(gpr.R(rs), gpr.R(rt)); + } + + //if (js.compilerPC == 0x089001c4) { + // Break(); + // Break(); + //} + + PpcGen::FixupBranch ptr; + if (!likely) + { + if (!delaySlotIsNice) + CompileDelaySlot(DELAYSLOT_SAFE_FLUSH); + else + FlushAll(); + ptr = B_Cond(cc); + } + else + { + FlushAll(); + ptr = B_Cond(cc); + CompileDelaySlot(DELAYSLOT_FLUSH); + } + + INFO_LOG(CPU, "targetAddr: %08x,js.compilerPC: %08x offset: %08x, op: %08x\n", targetAddr, js.compilerPC, offset, op); + + // Take the branch + WriteExit(targetAddr, 0); + + // !cond + SetJumpTarget(ptr); + + // Not taken + WriteExit(js.compilerPC+8, 1); + + js.compiling = false; +} + + +void Jit::BranchRSZeroComp(u32 op, PpcGen::FixupBranchType cc, bool andLink, bool likely) +{ + if (js.inDelaySlot) { + ERROR_LOG_REPORT(JIT, "Branch in RSZeroComp delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart); + return; + } + int offset = (signed short)(op&0xFFFF)<<2; + int rs = _RS; + u32 targetAddr = js.compilerPC + offset + 4; + + u32 delaySlotOp = Memory::ReadUnchecked_U32(js.compilerPC + 4); + bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs); + CONDITIONAL_NICE_DELAYSLOT; + if (!likely && delaySlotIsNice) + CompileDelaySlot(DELAYSLOT_NICE); + + gpr.MapReg(rs); + CMPLI(gpr.R(rs), 0); + + PpcGen::FixupBranch ptr; + if (!likely) + { + if (!delaySlotIsNice) + CompileDelaySlot(DELAYSLOT_SAFE_FLUSH); + else + FlushAll(); + ptr = B_Cond(cc); + } + else + { + FlushAll(); + ptr = B_Cond(cc); + CompileDelaySlot(DELAYSLOT_FLUSH); + } + + // Take the branch + if (andLink) + { + //Break(); + MOVI2R(SREG, js.compilerPC + 8); + STW(SREG, CTXREG, MIPS_REG_RA * 4); + } + + WriteExit(targetAddr, 0); + + SetJumpTarget(ptr); + // Not taken + WriteExit(js.compilerPC + 8, 1); + + js.compiling = false; +} + +void Jit::Comp_RelBranch(u32 op) { + // The CC flags here should be opposite of the actual branch becuase they skip the branching action. + switch (op>>26) + { + case 4: BranchRSRTComp(op, _BNE, false); break;//beq + case 5: BranchRSRTComp(op, _BEQ, false); break;//bne + + case 6: BranchRSZeroComp(op, _BGT, false, false); break;//blez + case 7: BranchRSZeroComp(op, _BLE, false, false); break;//bgtz + + case 20: BranchRSRTComp(op, _BNE, true); break;//beql + case 21: BranchRSRTComp(op, _BEQ, true); break;//bnel + + case 22: BranchRSZeroComp(op, _BGT, false, true); break;//blezl + case 23: BranchRSZeroComp(op, _BLE, false, true); break;//bgtzl + + default: + _dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled"); + break; + } + js.compiling = false; +} + +void Jit::Comp_RelBranchRI(u32 op) { + switch ((op >> 16) & 0x1F) + { + case 0: BranchRSZeroComp(op, _BGE, false, false); break; //if ((s32)R(rs) < 0) DelayBranchTo(addr); else PC += 4; break;//bltz + case 1: BranchRSZeroComp(op, _BLT, false, false); break; //if ((s32)R(rs) >= 0) DelayBranchTo(addr); else PC += 4; break;//bgez + case 2: BranchRSZeroComp(op, _BGE, false, true); break; //if ((s32)R(rs) < 0) DelayBranchTo(addr); else PC += 8; break;//bltzl + case 3: BranchRSZeroComp(op, _BLT, false, true); break; //if ((s32)R(rs) >= 0) DelayBranchTo(addr); else PC += 8; break;//bgezl + case 16: BranchRSZeroComp(op, _BGE, true, false); break; //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) < 0) DelayBranchTo(addr); else PC += 4; break;//bltzal + case 17: BranchRSZeroComp(op, _BLT, true, false); break; //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) >= 0) DelayBranchTo(addr); else PC += 4; break;//bgezal + case 18: BranchRSZeroComp(op, _BGE, true, true); break; //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) < 0) DelayBranchTo(addr); else SkipLikely(); break;//bltzall + case 19: BranchRSZeroComp(op, _BLT, true, true); break; //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) >= 0) DelayBranchTo(addr); else SkipLikely(); break;//bgezall + default: + _dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled"); + break; + } + js.compiling = false; +} + + +// If likely is set, discard the branch slot if NOT taken. +void Jit::BranchFPFlag(u32 op, PpcGen::FixupBranchType cc, bool likely) +{ + if (js.inDelaySlot) { + ERROR_LOG_REPORT(JIT, "Branch in FPFlag delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart); + return; + } + int offset = (signed short)(op & 0xFFFF) << 2; + u32 targetAddr = js.compilerPC + offset + 4; + + u32 delaySlotOp = Memory::ReadUnchecked_U32(js.compilerPC + 4); + bool delaySlotIsNice = IsDelaySlotNiceFPU(op, delaySlotOp); + CONDITIONAL_NICE_DELAYSLOT; + if (!likely && delaySlotIsNice) + CompileDelaySlot(DELAYSLOT_NICE); + + FlushAll(); + + + DebugBreak(); // not made ! + + /* + LWZ(SREG, CTXREG, offsetof(MIPSState, fpcond)); + //TST(SREG, Operand2(1, TYPE_IMM)); + // i don't know the equivalent so ... + CMP( + + */ + PpcGen::FixupBranch ptr; + if (!likely) + { + if (!delaySlotIsNice) + CompileDelaySlot(DELAYSLOT_SAFE_FLUSH); + ptr = B_Cond(cc); + } + else + { + ptr = B_Cond(cc); + CompileDelaySlot(DELAYSLOT_FLUSH); + } + + // Take the branch + WriteExit(targetAddr, 0); + + SetJumpTarget(ptr); + // Not taken + WriteExit(js.compilerPC + 8, 1); + js.compiling = false; +} + +void Jit::Comp_FPUBranch(u32 op) { + switch((op >> 16) & 0x1f) + { + case 0: BranchFPFlag(op, _BNE, false); break; // bc1f + case 1: BranchFPFlag(op, _BEQ, false); break; // bc1t + case 2: BranchFPFlag(op, _BNE, true); break; // bc1fl + case 3: BranchFPFlag(op, _BEQ, true); break; // bc1tl + default: + _dbg_assert_msg_(CPU,0,"Trying to interpret instruction that can't be interpreted"); + break; + } + js.compiling = false; +} + + +// If likely is set, discard the branch slot if NOT taken. +void Jit::BranchVFPUFlag(u32 op, PpcGen::FixupBranchType cc, bool likely) +{ + if (js.inDelaySlot) { + ERROR_LOG_REPORT(JIT, "Branch in VFPU delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart); + return; + } + int offset = (signed short)(op & 0xFFFF) << 2; + u32 targetAddr = js.compilerPC + offset + 4; + + u32 delaySlotOp = Memory::ReadUnchecked_U32(js.compilerPC + 4); + + bool delaySlotIsNice = IsDelaySlotNiceVFPU(op, delaySlotOp); + CONDITIONAL_NICE_DELAYSLOT; + if (!likely && delaySlotIsNice) + CompileDelaySlot(DELAYSLOT_NICE); + + FlushAll(); + + int imm3 = (op >> 18) & 7; + + /* + MOVI2R(R0, (u32)&(mips_->vfpuCtrl[VFPU_CTRL_CC])); + LWZ(R0, R0, Operand2(0, TYPE_IMM)); + TST(R0, Operand2(1 << imm3, TYPE_IMM)); + */ + + + DebugBreak(); // not made ! + + PpcGen::FixupBranch ptr; + js.inDelaySlot = true; + if (!likely) + { + if (!delaySlotIsNice) + CompileDelaySlot(DELAYSLOT_SAFE_FLUSH); + ptr = B_Cond(cc); + } + else + { + ptr = B_Cond(cc); + CompileDelaySlot(DELAYSLOT_FLUSH); + } + js.inDelaySlot = false; + + // Take the branch + WriteExit(targetAddr, 0); + + SetJumpTarget(ptr); + + // Not taken + WriteExit(js.compilerPC + 8, 1); + js.compiling = false; +} + +void Jit::Comp_VBranch(u32 op) { + switch ((op >> 16) & 3) + { + case 0: BranchVFPUFlag(op, _BNE, false); break; // bvf + case 1: BranchVFPUFlag(op, _BEQ, false); break; // bvt + case 2: BranchVFPUFlag(op, _BNE, true); break; // bvfl + case 3: BranchVFPUFlag(op, _BEQ, true); break; // bvtl + } + js.compiling = false; +} + +void Jit::Comp_Jump(u32 op) { + if (js.inDelaySlot) { + ERROR_LOG_REPORT(JIT, "Branch in Jump delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart); + return; + } + u32 off = ((op & 0x03FFFFFF) << 2); + u32 targetAddr = (js.compilerPC & 0xF0000000) | off; + + /*if (op == 0x0a240070) { + Break(); + }*/ + + switch (op >> 26) + { + case 2: //j + CompileDelaySlot(DELAYSLOT_NICE); + FlushAll(); + WriteExit(targetAddr, 0); + break; + + case 3: //jal + //Break(); + gpr.MapReg(MIPS_REG_RA, MAP_NOINIT | MAP_DIRTY); + MOVI2R(gpr.R(MIPS_REG_RA), js.compilerPC + 8); + CompileDelaySlot(DELAYSLOT_NICE); + FlushAll(); + WriteExit(targetAddr, 0); + break; + + default: + _dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled"); + break; + } + js.compiling = false; +} + +void Jit::Comp_JumpReg(u32 op) { + if (js.inDelaySlot) { + ERROR_LOG_REPORT(JIT, "Branch in JumpReg delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart); + return; + } + int rs = _RS; + + u32 delaySlotOp = Memory::ReadUnchecked_U32(js.compilerPC + 4); + bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs); + CONDITIONAL_NICE_DELAYSLOT; + + if (IsSyscall(delaySlotOp)) { + gpr.MapReg(rs); + PPCReg mRs = gpr.R(rs); + MR(FLAGREG, mRs); + MovToPC(FLAGREG); // For syscall to be able to return. + CompileDelaySlot(DELAYSLOT_FLUSH); + return; // Syscall wrote exit code. + } else if (delaySlotIsNice) { + CompileDelaySlot(DELAYSLOT_NICE); + gpr.MapReg(rs); + PPCReg mRs = gpr.R(rs); + MR(FLAGREG, mRs); // Save the destination address through the delay slot. Could use isNice to avoid when the jit is fully implemented + FlushAll(); + } else { + // Delay slot + gpr.MapReg(rs); + PPCReg mRs = gpr.R(rs); + MR(FLAGREG, mRs); // Save the destination address through the delay slot. Could use isNice to avoid when the jit is fully implemented + CompileDelaySlot(DELAYSLOT_NICE); + FlushAll(); + } + + switch (op & 0x3f) + { + case 8: //jr + break; + case 9: //jalr + // mips->reg = js.compilerPC + 8; + //Break(); + MOVI2R(SREG, js.compilerPC + 8); + STW(SREG, CTXREG, MIPS_REG_RA * 4); + break; + default: + _dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled"); + break; + } + + WriteExitDestInR(FLAGREG); + js.compiling = false; +} + +void Jit::Comp_Syscall(u32 op) { + FlushAll(); + + // If we're in a delay slot, this is off by one. + const int offset = js.inDelaySlot ? -1 : 0; + WriteDownCount(offset); + js.downcountAmount = -offset; + + // CallSyscall(op); + MOVI2R(R3, op); + SaveDowncount(DCNTREG); + QuickCallFunction((void *)&CallSyscall); + RestoreDowncount(DCNTREG); + + WriteSyscallExit(); + js.compiling = false; +} + +void Jit::Comp_Break(u32 op) { + Comp_Generic(op); + WriteSyscallExit(); + js.compiling = false; +} + + +} \ No newline at end of file diff --git a/Core/MIPS/PPC/PpcCompFpu.cpp b/Core/MIPS/PPC/PpcCompFpu.cpp new file mode 100644 index 000000000..f4604e0b6 --- /dev/null +++ b/Core/MIPS/PPC/PpcCompFpu.cpp @@ -0,0 +1,42 @@ +#include "Common/ChunkFile.h" +#include "../../Core.h" +#include "../../CoreTiming.h" +#include "../MIPS.h" +#include "../MIPSCodeUtils.h" +#include "../MIPSInt.h" +#include "../MIPSTables.h" + +#include "PpcRegCache.h" +#include "ppcEmitter.h" +#include "PpcJit.h" + +#include + +using namespace PpcGen; + +extern volatile CoreState coreState; + +namespace MIPSComp +{ + +void Jit::Comp_FPULS(u32 op) { + Comp_Generic(op); +} + +void Jit::Comp_FPUComp(u32 op) { + Comp_Generic(op); +} + +void Jit::Comp_FPU3op(u32 op) { + Comp_Generic(op); +} + +void Jit::Comp_FPU2op(u32 op) { + Comp_Generic(op); +} + +void Jit::Comp_mxc1(u32 op) { + Comp_Generic(op); +} + +} \ No newline at end of file diff --git a/Core/MIPS/PPC/PpcCompLoadStore.cpp b/Core/MIPS/PPC/PpcCompLoadStore.cpp new file mode 100644 index 000000000..f1074bd3a --- /dev/null +++ b/Core/MIPS/PPC/PpcCompLoadStore.cpp @@ -0,0 +1,24 @@ +#include "Common/ChunkFile.h" +#include "../../Core.h" +#include "../../CoreTiming.h" +#include "../MIPS.h" +#include "../MIPSCodeUtils.h" +#include "../MIPSInt.h" +#include "../MIPSTables.h" + +#include "PpcRegCache.h" +#include "ppcEmitter.h" +#include "PpcJit.h" + +#include + +using namespace PpcGen; + +namespace MIPSComp +{ + +void Jit::Comp_ITypeMem(u32 op) { + Comp_Generic(op); +} + +} \ No newline at end of file diff --git a/Core/MIPS/PPC/PpcCompVFPU.cpp b/Core/MIPS/PPC/PpcCompVFPU.cpp new file mode 100644 index 000000000..ba3c2acfe --- /dev/null +++ b/Core/MIPS/PPC/PpcCompVFPU.cpp @@ -0,0 +1,139 @@ +#include "Common/ChunkFile.h" +#include "../../Core.h" +#include "../../CoreTiming.h" +#include "../MIPS.h" +#include "../MIPSCodeUtils.h" +#include "../MIPSInt.h" +#include "../MIPSTables.h" + +#include "PpcRegCache.h" +#include "ppcEmitter.h" +#include "PpcJit.h" + +#include + +using namespace PpcGen; + + +namespace MIPSComp +{ +void Jit::Comp_SV(u32 op) { + Comp_Generic(op); +} + +void Jit::Comp_SVQ(u32 op) { + Comp_Generic(op); +} + +void Jit::Comp_VPFX(u32 op) { + Comp_Generic(op); +} + +void Jit::Comp_VVectorInit(u32 op) { + Comp_Generic(op); +} + +void Jit::Comp_VMatrixInit(u32 op) { + Comp_Generic(op); +} + +void Jit::Comp_VDot(u32 op) { + Comp_Generic(op); +} + +void Jit::Comp_VecDo3(u32 op) { + Comp_Generic(op); +} + +void Jit::Comp_VV2Op(u32 op) { + Comp_Generic(op); +} + +void Jit::Comp_Mftv(u32 op) { + Comp_Generic(op); +} + +void Jit::Comp_Vmtvc(u32 op) { + Comp_Generic(op); +} + +void Jit::Comp_Vmmov(u32 op) { + Comp_Generic(op); +} + +void Jit::Comp_VScl(u32 op) { + Comp_Generic(op); +} + +void Jit::Comp_Vmmul(u32 op) { + Comp_Generic(op); +} + +void Jit::Comp_Vmscl(u32 op) { + Comp_Generic(op); +} + +void Jit::Comp_Vtfm(u32 op) { + Comp_Generic(op); +} + +void Jit::Comp_VHdp(u32 op) { + Comp_Generic(op); +} + +void Jit::Comp_VCrs(u32 op) { + Comp_Generic(op); +} + +void Jit::Comp_VDet(u32 op) { + Comp_Generic(op); +} + +void Jit::Comp_Vi2x(u32 op) { + Comp_Generic(op); +} + +void Jit::Comp_Vx2i(u32 op) { + Comp_Generic(op); +} + +void Jit::Comp_Vf2i(u32 op) { + Comp_Generic(op); +} + +void Jit::Comp_Vi2f(u32 op) { + Comp_Generic(op); +} + +void Jit::Comp_Vcst(u32 op) { + Comp_Generic(op); +} + +void Jit::Comp_Vhoriz(u32 op) { + Comp_Generic(op); +} + +void Jit::Comp_VRot(u32 op) { + Comp_Generic(op); +} + +void Jit::Comp_VIdt(u32 op) { + Comp_Generic(op); +} + +void Jit::Comp_Vcmp(u32 op) { + Comp_Generic(op); +} + +void Jit::Comp_Vcmov(u32 op) { + Comp_Generic(op); +} + +void Jit::Comp_Viim(u32 op) { + Comp_Generic(op); +} + +void Jit::Comp_Vfim(u32 op) { + Comp_Generic(op); +} +} \ No newline at end of file diff --git a/Core/MIPS/PPC/PpcJit.cpp b/Core/MIPS/PPC/PpcJit.cpp index 763141fdd..e020d4db9 100644 --- a/Core/MIPS/PPC/PpcJit.cpp +++ b/Core/MIPS/PPC/PpcJit.cpp @@ -1,76 +1,229 @@ +#include "Common/ChunkFile.h" +#include "../../Core.h" +#include "../../CoreTiming.h" +#include "../MIPS.h" +#include "../MIPSCodeUtils.h" +#include "../MIPSInt.h" +#include "../MIPSTables.h" + +#include "PpcRegCache.h" +#include "ppcEmitter.h" #include "PpcJit.h" +#include + +using namespace PpcGen; + +extern volatile CoreState coreState; + namespace MIPSComp { - Jit * jit=NULL; - void Jit::Comp_Generic(u32 op) { +static u32 delaySlotFlagsValue; +/** we use a flag non volatile flag (FLAGREG)r18, no need to save it **/ +void Jit::CompileDelaySlot(int flags) +{ + // preserve flag around the delay slot! Maybe this is not always necessary on ARM where + // we can (mostly) control whether we set the flag or not. Of course, if someone puts an slt in to the + // delay slot, we're screwed. + if (flags & DELAYSLOT_SAFE) { + // Save flags register + //Break(); + MOVI2R(SREG, (u32)&delaySlotFlagsValue); + STW(FLAGREG, SREG); } + + js.inDelaySlot = true; + u32 op = Memory::Read_Instruction(js.compilerPC + 4); + MIPSCompileOp(op); + js.inDelaySlot = false; + + if (flags & DELAYSLOT_FLUSH) + FlushAll(); + + if (flags & DELAYSLOT_SAFE) { + // Restore flags register + //Break(); + MOVI2R(SREG, (u32)&delaySlotFlagsValue); + LWZ(FLAGREG, SREG); + } +} + +void Jit::Compile(u32 em_address) +{ + if (GetSpaceLeft() < 0x10000 || blocks.IsFull()) + { + ClearCache(); + } + + int block_num = blocks.AllocateBlock(em_address); + JitBlock *b = blocks.GetBlock(block_num); + DoJit(em_address, b); + blocks.FinalizeBlock(block_num, jo.enableBlocklink); + + // Drat. The VFPU hit an uneaten prefix at the end of a block. + if (js.startDefaultPrefix && js.MayHavePrefix()) + { + js.startDefaultPrefix = false; + // Our assumptions are all wrong so it's clean-slate time. + ClearCache(); + + // Let's try that one more time. We won't get back here because we toggled the value. + Compile(em_address); + } +} + +void Jit::MovFromPC(PPCReg r) { + LWZ(r, CTXREG, offsetof(MIPSState, pc)); +} + +void Jit::MovToPC(PPCReg r) { + STW(r, CTXREG, offsetof(MIPSState, pc)); +} + +void Jit::SaveDowncount(PPCReg r) { + STW(r, CTXREG, offsetof(MIPSState, downcount)); +} + +void Jit::RestoreDowncount(PPCReg r) { + LWZ(r, CTXREG, offsetof(MIPSState, downcount)); +} + +void Jit::WriteDownCount(int offset) +{ + // don't know if the result is correct + int theDowncount = js.downcountAmount + offset; + if (jo.downcountInRegister) { + // DCNTREG = DCNTREG - theDowncount; + MOVI2R(SREG, theDowncount); + SUBF(DCNTREG, SREG, DCNTREG); + STW(DCNTREG, CTXREG, offsetof(MIPSState, downcount)); + } else { + // DCNTREG = MIPSState->downcount - theDowncount; + MOVI2R(SREG, theDowncount); + LWZ(DCNTREG, CTXREG, offsetof(MIPSState, downcount)); + SUBF(DCNTREG, SREG, DCNTREG); + STW(DCNTREG, CTXREG, offsetof(MIPSState, downcount)); + } +} + +void Jit::Comp_Generic(u32 op) { + // basic jit !! + MIPSInterpretFunc func = MIPSGetInterpretFunc(op); + if (func) + { + // Save mips PC and cycles + SaveDowncount(DCNTREG); + + //// Set func param + //if (op == 0x00009021) + // Break(); + + MOVI2R(R3, op); + + QuickCallFunction((void *)func); + + // restore pc and cycles + RestoreDowncount(DCNTREG); + } + // Might have eaten prefixes, hard to tell... + if ((MIPSGetInfo(op) & IS_VFPU) != 0) + js.PrefixStart(); +} - void Jit::EatInstruction(u32 op){} - void Jit::Comp_RunBlock(u32 op){} - void Jit::Comp_ITypeMem(u32 op){} +void Jit::EatInstruction(u32 op) { + u32 info = MIPSGetInfo(op); + _dbg_assert_msg_(JIT, !(info & DELAYSLOT), "Never eat a branch op."); + _dbg_assert_msg_(JIT, !js.inDelaySlot, "Never eat an instruction inside a delayslot."); - void Jit::Comp_RelBranch(u32 op){} - void Jit::Comp_RelBranchRI(u32 op){} - void Jit::Comp_FPUBranch(u32 op){} - void Jit::Comp_FPULS(u32 op){} - void Jit::Comp_FPUComp(u32 op){} - void Jit::Comp_Jump(u32 op){} - void Jit::Comp_JumpReg(u32 op){} - void Jit::Comp_Syscall(u32 op){} - void Jit::Comp_Break(u32 op){} + js.compilerPC += 4; + js.downcountAmount += MIPSGetInstructionCycleEstimate(op); +} - void Jit::Comp_IType(u32 op){} - void Jit::Comp_RType2(u32 op){} - void Jit::Comp_RType3(u32 op){} - void Jit::Comp_ShiftType(u32 op){} - void Jit::Comp_Allegrex(u32 op){} - void Jit::Comp_Allegrex2(u32 op){} - void Jit::Comp_VBranch(u32 op){} - void Jit::Comp_MulDivType(u32 op){} - void Jit::Comp_Special3(u32 op){} +void Jit::Comp_RunBlock(u32 op) { + // This shouldn't be necessary, the dispatcher should catch us before we get here. + ERROR_LOG(DYNA_REC, "Comp_RunBlock should never be reached!"); +} - void Jit::Comp_FPU3op(u32 op){} - void Jit::Comp_FPU2op(u32 op){} - void Jit::Comp_mxc1(u32 op){} +void Jit::Comp_DoNothing(u32 op) { - void Jit::Comp_DoNothing(u32 op){} +} - void Jit::Comp_SV(u32 op){} - void Jit::Comp_SVQ(u32 op){} - void Jit::Comp_VPFX(u32 op){} - void Jit::Comp_VVectorInit(u32 op){} - void Jit::Comp_VMatrixInit(u32 op){} - void Jit::Comp_VDot(u32 op){} - void Jit::Comp_VecDo3(u32 op){} - void Jit::Comp_VV2Op(u32 op){} - void Jit::Comp_Mftv(u32 op){} - void Jit::Comp_Vmtvc(u32 op){} - void Jit::Comp_Vmmov(u32 op){} - void Jit::Comp_VScl(u32 op){} - void Jit::Comp_Vmmul(u32 op){} - void Jit::Comp_Vmscl(u32 op){} - void Jit::Comp_Vtfm(u32 op){} - void Jit::Comp_VHdp(u32 op){} - void Jit::Comp_VCrs(u32 op){} - void Jit::Comp_VDet(u32 op){} - void Jit::Comp_Vi2x(u32 op){} - void Jit::Comp_Vx2i(u32 op){} - void Jit::Comp_Vf2i(u32 op){} - void Jit::Comp_Vi2f(u32 op){} - void Jit::Comp_Vcst(u32 op){} - void Jit::Comp_Vhoriz(u32 op){} - void Jit::Comp_VRot(u32 op){} - void Jit::Comp_VIdt(u32 op){} +void Jit::FlushAll() +{ + gpr.FlushAll(); + //fpr.FlushAll(); + //FlushPrefixV(); +} +void Jit::ClearCache() { + blocks.Clear(); + ClearCodeSpace(); + GenerateFixedCode(); +} + +void Jit::ClearCacheAt(u32 em_address) { + ClearCache(); +} + +Jit::Jit(MIPSState *mips) : blocks(mips, this), gpr(mips, &jo),mips_(mips) +{ + blocks.Init(); + gpr.SetEmitter(this); + AllocCodeSpace(1024 * 1024 * 16); // 32MB is the absolute max because that's what an ARM branch instruction can reach, backwards and forwards. + GenerateFixedCode(); + + js.startDefaultPrefix = true; +} + +void Jit::RunLoopUntil(u64 globalticks) { + // Run the compiled code - void Jit::Comp_Vcmp(u32 op){}; - void Jit::Comp_Vcmov(u32 op){}; - void Jit::Comp_Viim(u32 op){}; - void Jit::Comp_Vfim(u32 op){}; + INFO_LOG(HLE, "enterCode: %08p", enterCode); + + _alloca(8*1024); + + ((void (*)())enterCode)(); +} - void Jit::ClearCache(){} - void Jit::ClearCacheAt(u32 em_address){} + +// IDEA - could have a WriteDualExit that takes two destinations and two condition flags, +// and just have conditional that set PC "twice". This only works when we fall back to dispatcher +// though, as we need to have the SUBS flag set in the end. So with block linking in the mix, +// I don't think this gives us that much benefit. +void Jit::WriteExit(u32 destination, int exit_num) +{ + WriteDownCount(); + //If nobody has taken care of this yet (this can be removed when all branches are done) + JitBlock *b = js.curBlock; + b->exitAddress[exit_num] = destination; + b->exitPtrs[exit_num] = GetWritableCodePtr(); + + // Link opportunity! + int block = blocks.GetBlockNumberFromStartAddress(destination); + if (block >= 0 && jo.enableBlocklink) { + // It exists! Joy of joy! + B(blocks.GetBlock(block)->checkedEntry); + b->linkStatus[exit_num] = true; + } else { + MOVI2R(SREG, destination); + B((const void *)dispatcherPCInR0); + } +} + +void Jit::WriteExitDestInR(PPCReg Reg) +{ + //Break(); + MovToPC(Reg); + WriteDownCount(); + // TODO: shouldn't need an indirect branch here... + B((const void *)dispatcher); +} + +void Jit::WriteSyscallExit() +{ + WriteDownCount(); + B((const void *)dispatcherCheckCoreState); +} } \ No newline at end of file diff --git a/Core/MIPS/PPC/PpcJit.h b/Core/MIPS/PPC/PpcJit.h index 7d6aafb69..51e079d8d 100644 --- a/Core/MIPS/PPC/PpcJit.h +++ b/Core/MIPS/PPC/PpcJit.h @@ -19,88 +19,260 @@ #include "../../../Globals.h" +#include "Core/MIPS/JitCommon/JitBlockCache.h" +#include "Core/MIPS/PPC/PpcRegCache.h" + +#include "Core/MIPS/MIPS.h" +#include + namespace MIPSComp { -class Jit -{ -public: - // Compiled ops should ignore delay slots - // the compiler will take care of them by itself - // OR NOT - void Comp_Generic(u32 op); - - void EatInstruction(u32 op); - void Comp_RunBlock(u32 op); - - // TODO: Eat VFPU prefixes here. - void EatPrefix() { } + struct PpcJitOptions + { + PpcJitOptions() + { + enableBlocklink = true; + downcountInRegister = true; + } - // Ops - void Comp_ITypeMem(u32 op); + bool enableBlocklink; + bool downcountInRegister; + }; - void Comp_RelBranch(u32 op); - void Comp_RelBranchRI(u32 op); - void Comp_FPUBranch(u32 op); - void Comp_FPULS(u32 op); - void Comp_FPUComp(u32 op); - void Comp_Jump(u32 op); - void Comp_JumpReg(u32 op); - void Comp_Syscall(u32 op); - void Comp_Break(u32 op); + struct PpcJitState + { + enum PrefixState + { + PREFIX_UNKNOWN = 0x00, + PREFIX_KNOWN = 0x01, + PREFIX_DIRTY = 0x10, + PREFIX_KNOWN_DIRTY = 0x11, + }; - void Comp_IType(u32 op); - void Comp_RType2(u32 op); - void Comp_RType3(u32 op); - void Comp_ShiftType(u32 op); - void Comp_Allegrex(u32 op); - void Comp_Allegrex2(u32 op); - void Comp_VBranch(u32 op); - void Comp_MulDivType(u32 op); - void Comp_Special3(u32 op); + u32 compilerPC; + u32 blockStart; + bool cancel; + bool inDelaySlot; + int downcountAmount; + bool compiling; // TODO: get rid of this in favor of using analysis results to determine end of block + JitBlock *curBlock; - void Comp_FPU3op(u32 op); - void Comp_FPU2op(u32 op); - void Comp_mxc1(u32 op); + // VFPU prefix magic + bool startDefaultPrefix; + u32 prefixS; + u32 prefixT; + u32 prefixD; + PrefixState prefixSFlag; + PrefixState prefixTFlag; + PrefixState prefixDFlag; + void PrefixStart() { + if (startDefaultPrefix) { + EatPrefix(); + } else { + PrefixUnknown(); + } + } + void PrefixUnknown() { + prefixSFlag = PREFIX_UNKNOWN; + prefixTFlag = PREFIX_UNKNOWN; + prefixDFlag = PREFIX_UNKNOWN; + } + bool MayHavePrefix() const { + if (HasUnknownPrefix()) { + return true; + } else if (prefixS != 0xE4 || prefixT != 0xE4 || prefixD != 0) { + return true; + } else if (VfpuWriteMask() != 0) { + return true; + } - void Comp_DoNothing(u32 op); + return false; + } + bool HasUnknownPrefix() const { + if (!(prefixSFlag & PREFIX_KNOWN) || !(prefixTFlag & PREFIX_KNOWN) || !(prefixDFlag & PREFIX_KNOWN)) { + return true; + } + return false; + } + bool HasNoPrefix() const { + return (prefixDFlag & PREFIX_KNOWN) && (prefixSFlag & PREFIX_KNOWN) && (prefixTFlag & PREFIX_KNOWN) && (prefixS == 0xE4 && prefixT == 0xE4 && prefixD == 0); + } - void Comp_SV(u32 op); - void Comp_SVQ(u32 op); - void Comp_VPFX(u32 op); - void Comp_VVectorInit(u32 op); - void Comp_VMatrixInit(u32 op); - void Comp_VDot(u32 op); - void Comp_VecDo3(u32 op); - void Comp_VV2Op(u32 op); - void Comp_Mftv(u32 op); - void Comp_Vmtvc(u32 op); - void Comp_Vmmov(u32 op); - void Comp_VScl(u32 op); - void Comp_Vmmul(u32 op); - void Comp_Vmscl(u32 op); - void Comp_Vtfm(u32 op); - void Comp_VHdp(u32 op); - void Comp_VCrs(u32 op); - void Comp_VDet(u32 op); - void Comp_Vi2x(u32 op); - void Comp_Vx2i(u32 op); - void Comp_Vf2i(u32 op); - void Comp_Vi2f(u32 op); - void Comp_Vcst(u32 op); - void Comp_Vhoriz(u32 op); - void Comp_VRot(u32 op); - void Comp_VIdt(u32 op); - void Comp_Vcmp(u32 op); - void Comp_Vcmov(u32 op); - void Comp_Viim(u32 op); - void Comp_Vfim(u32 op); + void EatPrefix() { + if ((prefixSFlag & PREFIX_KNOWN) == 0 || prefixS != 0xE4) { + prefixSFlag = PREFIX_KNOWN_DIRTY; + prefixS = 0xE4; + } + if ((prefixTFlag & PREFIX_KNOWN) == 0 || prefixT != 0xE4) { + prefixTFlag = PREFIX_KNOWN_DIRTY; + prefixT = 0xE4; + } + if ((prefixDFlag & PREFIX_KNOWN) == 0 || prefixD != 0x0 || VfpuWriteMask() != 0) { + prefixDFlag = PREFIX_KNOWN_DIRTY; + prefixD = 0x0; + } + } + u8 VfpuWriteMask() const { + _assert_(prefixDFlag & PREFIX_KNOWN); + return (prefixD >> 8) & 0xF; + } + bool VfpuWriteMask(int i) const { + _assert_(prefixDFlag & PREFIX_KNOWN); + return (prefixD >> (8 + i)) & 1; + } + }; - void ClearCache(); - void ClearCacheAt(u32 em_address); -}; -typedef void (Jit::*MIPSCompileFunc)(u32 opcode); + enum CompileDelaySlotFlags + { + // Easy, nothing extra. + DELAYSLOT_NICE = 0, + // Flush registers after delay slot. + DELAYSLOT_FLUSH = 1, + // Preserve flags. + DELAYSLOT_SAFE = 2, + // Flush registers after and preserve flags. + DELAYSLOT_SAFE_FLUSH = DELAYSLOT_FLUSH | DELAYSLOT_SAFE, + }; + + class Jit: public PpcGen::PPCXCodeBlock + { + protected: + JitBlockCache blocks; + public: + Jit(MIPSState *mips); + + // Compiled ops should ignore delay slots + // the compiler will take care of them by itself + // OR NOT + void Comp_Generic(u32 op); + + void EatInstruction(u32 op); + void Comp_RunBlock(u32 op); + + // TODO: Eat VFPU prefixes here. + void EatPrefix() { } + + // Ops + void Comp_ITypeMem(u32 op); + + void Comp_RelBranch(u32 op); + void Comp_RelBranchRI(u32 op); + void Comp_FPUBranch(u32 op); + void Comp_FPULS(u32 op); + void Comp_FPUComp(u32 op); + void Comp_Jump(u32 op); + void Comp_JumpReg(u32 op); + void Comp_Syscall(u32 op); + void Comp_Break(u32 op); + + void Comp_IType(u32 op); + void Comp_RType2(u32 op); + void Comp_RType3(u32 op); + void Comp_ShiftType(u32 op); + void Comp_Allegrex(u32 op); + void Comp_Allegrex2(u32 op); + void Comp_VBranch(u32 op); + void Comp_MulDivType(u32 op); + void Comp_Special3(u32 op); + + void Comp_FPU3op(u32 op); + void Comp_FPU2op(u32 op); + void Comp_mxc1(u32 op); + + void Comp_DoNothing(u32 op); + + void Comp_SV(u32 op); + void Comp_SVQ(u32 op); + void Comp_VPFX(u32 op); + void Comp_VVectorInit(u32 op); + void Comp_VMatrixInit(u32 op); + void Comp_VDot(u32 op); + void Comp_VecDo3(u32 op); + void Comp_VV2Op(u32 op); + void Comp_Mftv(u32 op); + void Comp_Vmtvc(u32 op); + void Comp_Vmmov(u32 op); + void Comp_VScl(u32 op); + void Comp_Vmmul(u32 op); + void Comp_Vmscl(u32 op); + void Comp_Vtfm(u32 op); + void Comp_VHdp(u32 op); + void Comp_VCrs(u32 op); + void Comp_VDet(u32 op); + void Comp_Vi2x(u32 op); + void Comp_Vx2i(u32 op); + void Comp_Vf2i(u32 op); + void Comp_Vi2f(u32 op); + void Comp_Vcst(u32 op); + void Comp_Vhoriz(u32 op); + void Comp_VRot(u32 op); + void Comp_VIdt(u32 op); + void Comp_Vcmp(u32 op); + void Comp_Vcmov(u32 op); + void Comp_Viim(u32 op); + void Comp_Vfim(u32 op); + + + // Utility compilation functions + void BranchFPFlag(u32 op, PpcGen::FixupBranchType cc, bool likely); + void BranchVFPUFlag(u32 op, PpcGen::FixupBranchType cc, bool likely); + void BranchRSZeroComp(u32 op, PpcGen::FixupBranchType cc, bool andLink, bool likely); + void BranchRSRTComp(u32 op, PpcGen::FixupBranchType cc, bool likely); + + // flush regs + void FlushAll(); + + void WriteDownCount(int offset = 0); + void MovFromPC(PpcGen::PPCReg r); + void MovToPC(PpcGen::PPCReg r); + + void SaveDowncount(PpcGen::PPCReg r); + void RestoreDowncount(PpcGen::PPCReg r); + + void WriteExit(u32 destination, int exit_num); + void WriteExitDestInR(PPCReg Reg); + void WriteSyscallExit(); + + void ClearCache(); + void ClearCacheAt(u32 em_address); + + void RunLoopUntil(u64 globalticks); + void GenerateFixedCode(); + + void DumpJit(); + + void CompileDelaySlot(int flags); + void Compile(u32 em_address); // Compiles a block at current MIPS PC + const u8 *DoJit(u32 em_address, JitBlock *b); + + PpcJitOptions jo; + PpcJitState js; + + PpcRegCache gpr; + //PpcRegCacheFPU fpr; + + MIPSState *mips_; + + JitBlockCache *GetBlockCache() { return &blocks; } + + public: + // Code pointers + const u8 *enterCode; + + const u8 *outerLoop; + const u8 *outerLoopPCInR0; + const u8 *dispatcherCheckCoreState; + const u8 *dispatcherPCInR0; + const u8 *dispatcher; + const u8 *dispatcherNoCheck; + + const u8 *breakpointBailout; + + }; + + typedef void (Jit::*MIPSCompileFunc)(u32 opcode); } // namespace MIPSComp diff --git a/Core/MIPS/PPC/PpcRegCache.cpp b/Core/MIPS/PPC/PpcRegCache.cpp new file mode 100644 index 000000000..98bd2521d --- /dev/null +++ b/Core/MIPS/PPC/PpcRegCache.cpp @@ -0,0 +1,317 @@ +// Copyright (c) 2012- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + +#include +#include "PpcRegCache.h" +#include "PpcJit.h" + +#if defined(MAEMO) +#include "stddef.h" +#endif + +using namespace PpcGen; + +PpcRegCache::PpcRegCache(MIPSState *mips, MIPSComp::PpcJitOptions *options) : mips_(mips), options_(options) { +} + +void PpcRegCache::Init(PPCXEmitter *emitter) { + emit_ = emitter; +} + +void PpcRegCache::Start(MIPSAnalyst::AnalysisResults &stats) { + for (int i = 0; i < NUM_PPCREG; i++) { + ar[i].mipsReg = -1; + ar[i].isDirty = false; + } + for (int i = 0; i < NUM_MIPSREG; i++) { + mr[i].loc = ML_MEM; + mr[i].reg = INVALID_REG; + mr[i].imm = -1; + mr[i].spillLock = false; + } +} + +const PPCReg *PpcRegCache::GetMIPSAllocationOrder(int &count) { + // Note that R0 is reserved as scratch for now. + // R1 could be used as it's only used for scratch outside "regalloc space" now. + // R12 is also potentially usable. + // R4-R7 are registers we could use for static allocation or downcount. + // R8 is used to preserve flags in nasty branches. + // R9 and upwards are reserved for jit basics. + if (options_->downcountInRegister) { + static const PPCReg allocationOrder[] = { + /*R14, R15, R16, R17, R18, */R19, + R20, R21, R22, R23, R24, R25, + R26, R27, R28, R29, R30, R31, + }; + count = sizeof(allocationOrder) / sizeof(const int); + return allocationOrder; + } else { + static const PPCReg allocationOrder2[] = { + /*R14, R15, R16, R17, R18,*/ R19, + R20, R21, R22, R23, R24, R25, + R26, R27, R28, R29, R30, R31, + }; + count = sizeof(allocationOrder2) / sizeof(const int); + return allocationOrder2; + } +} + +void PpcRegCache::FlushBeforeCall() { + // R4-R11 are preserved. Others need flushing. + /* + FlushPpcReg(R2); + FlushPpcReg(R3); + FlushPpcReg(R12); + */ +} + +// TODO: Somewhat smarter spilling - currently simply spills the first available, should do +// round robin or FIFO or something. +PPCReg PpcRegCache::MapReg(MIPSReg mipsReg, int mapFlags) { + // Let's see if it's already mapped. If so we just need to update the dirty flag. + // We don't need to check for ML_NOINIT because we assume that anyone who maps + // with that flag immediately writes a "known" value to the register. + if (mr[mipsReg].loc == ML_PPCREG) { + if (ar[mr[mipsReg].reg].mipsReg != mipsReg) { + ERROR_LOG(HLE, "Register mapping out of sync! %i", mipsReg); + } + if (mapFlags & MAP_DIRTY) { + ar[mr[mipsReg].reg].isDirty = true; + } + return (PPCReg)mr[mipsReg].reg; + } + + // Okay, not mapped, so we need to allocate an ARM register. + + int allocCount; + const PPCReg *allocOrder = GetMIPSAllocationOrder(allocCount); + +allocate: + for (int i = 0; i < allocCount; i++) { + int reg = allocOrder[i]; + + if (ar[reg].mipsReg == -1) { + // That means it's free. Grab it, and load the value into it (if requested). + ar[reg].isDirty = (mapFlags & MAP_DIRTY) ? true : false; + if (!(mapFlags & MAP_NOINIT)) { + if (mr[mipsReg].loc == ML_MEM) { + if (mipsReg != 0) { + emit_->LWZ((PPCReg)reg, CTXREG, GetMipsRegOffset(mipsReg)); + } else { + // If we get a request to load the zero register, at least we won't spend + // time on a memory access... + emit_->MOVI2R((PPCReg)reg, 0); + } + } else if (mr[mipsReg].loc == ML_IMM) { + emit_->MOVI2R((PPCReg)reg, mr[mipsReg].imm); + ar[reg].isDirty = true; // IMM is always dirty. + } + } + ar[reg].mipsReg = mipsReg; + mr[mipsReg].loc = ML_PPCREG; + mr[mipsReg].reg = (PPCReg)reg; + return (PPCReg)reg; + } + } + + // Still nothing. Let's spill a reg and goto 10. + // TODO: Use age or something to choose which register to spill? + // TODO: Spill dirty regs first? or opposite? + int bestToSpill = -1; + for (int i = 0; i < allocCount; i++) { + int reg = allocOrder[i]; + if (ar[reg].mipsReg != -1 && mr[ar[reg].mipsReg].spillLock) + continue; + bestToSpill = reg; + break; + } + + if (bestToSpill != -1) { + // ERROR_LOG(JIT, "Out of registers at PC %08x - spills register %i.", mips_->pc, bestToSpill); + FlushPpcReg((PPCReg)bestToSpill); + goto allocate; + } + + // Uh oh, we have all them spilllocked.... + ERROR_LOG(JIT, "Out of spillable registers at PC %08x!!!", mips_->pc); + return INVALID_REG; +} + +void PpcRegCache::MapInIn(MIPSReg rd, MIPSReg rs) { + SpillLock(rd, rs); + MapReg(rd); + MapReg(rs); + ReleaseSpillLocks(); +} + +void PpcRegCache::MapDirtyIn(MIPSReg rd, MIPSReg rs, bool avoidLoad) { + SpillLock(rd, rs); + bool load = !avoidLoad || rd == rs; + MapReg(rd, MAP_DIRTY | (load ? 0 : MAP_NOINIT)); + MapReg(rs); + ReleaseSpillLocks(); +} + +void PpcRegCache::MapDirtyInIn(MIPSReg rd, MIPSReg rs, MIPSReg rt, bool avoidLoad) { + SpillLock(rd, rs, rt); + bool load = !avoidLoad || (rd == rs || rd == rt); + MapReg(rd, MAP_DIRTY | (load ? 0 : MAP_NOINIT)); + MapReg(rt); + MapReg(rs); + ReleaseSpillLocks(); +} + +void PpcRegCache::MapDirtyDirtyInIn(MIPSReg rd1, MIPSReg rd2, MIPSReg rs, MIPSReg rt, bool avoidLoad) { + SpillLock(rd1, rd2, rs, rt); + bool load1 = !avoidLoad || (rd1 == rs || rd1 == rt); + bool load2 = !avoidLoad || (rd2 == rs || rd2 == rt); + MapReg(rd1, MAP_DIRTY | (load1 ? 0 : MAP_NOINIT)); + MapReg(rd2, MAP_DIRTY | (load2 ? 0 : MAP_NOINIT)); + MapReg(rt); + MapReg(rs); + ReleaseSpillLocks(); +} + +void PpcRegCache::FlushPpcReg(PPCReg r) { + if (ar[r].mipsReg == -1) { + // Nothing to do, reg not mapped. + return; + } + if (ar[r].mipsReg != -1) { + if (ar[r].isDirty && mr[ar[r].mipsReg].loc == ML_PPCREG) + emit_->STW(r, CTXREG, GetMipsRegOffset(ar[r].mipsReg)); + // IMMs won't be in an ARM reg. + mr[ar[r].mipsReg].loc = ML_MEM; + mr[ar[r].mipsReg].reg = INVALID_REG; + mr[ar[r].mipsReg].imm = 0; + } else { + ERROR_LOG(HLE, "Dirty but no mipsreg?"); + } + ar[r].isDirty = false; + ar[r].mipsReg = -1; +} + +void PpcRegCache::FlushR(MIPSReg r) { + switch (mr[r].loc) { + case ML_IMM: + // IMM is always "dirty". + emit_->MOVI2R(SREG, mr[r].imm); + emit_->STW(SREG, CTXREG, GetMipsRegOffset(r)); + break; + + case ML_PPCREG: + if (mr[r].reg == INVALID_REG) { + ERROR_LOG(HLE, "FlushMipsReg: MipsReg had bad PpcReg"); + } + if (ar[mr[r].reg].isDirty) { + emit_->STW((PPCReg)mr[r].reg, CTXREG, GetMipsRegOffset(r)); + ar[mr[r].reg].isDirty = false; + } + ar[mr[r].reg].mipsReg = -1; + break; + + case ML_MEM: + // Already there, nothing to do. + break; + + default: + //BAD + break; + } + mr[r].loc = ML_MEM; + mr[r].reg = INVALID_REG; + mr[r].imm = 0; +} + +void PpcRegCache::FlushAll() { + for (int i = 0; i < NUM_MIPSREG; i++) { + FlushR(i); + } + // Sanity check + for (int i = 0; i < NUM_PPCREG; i++) { + if (ar[i].mipsReg != -1) { + ERROR_LOG(JIT, "Flush fail: ar[%i].mipsReg=%i", i, ar[i].mipsReg); + } + } +} + +void PpcRegCache::SetImm(MIPSReg r, u32 immVal) { + if (r == 0) + ERROR_LOG(JIT, "Trying to set immediate %08x to r0", immVal); + + // Zap existing value if cached in a reg + if (mr[r].loc == ML_PPCREG) { + ar[mr[r].reg].mipsReg = -1; + ar[mr[r].reg].isDirty = false; + } + mr[r].loc = ML_IMM; + mr[r].imm = immVal; + mr[r].reg = INVALID_REG; +} + +bool PpcRegCache::IsImm(MIPSReg r) const { + if (r == 0) return true; + return mr[r].loc == ML_IMM; +} + +u32 PpcRegCache::GetImm(MIPSReg r) const { + if (r == 0) return 0; + if (mr[r].loc != ML_IMM) { + ERROR_LOG(JIT, "Trying to get imm from non-imm register %i", r); + } + return mr[r].imm; +} + +int PpcRegCache::GetMipsRegOffset(MIPSReg r) { + if (r < 32) + return r * 4; + switch (r) { + case MIPSREG_HI: + return offsetof(MIPSState, hi); + case MIPSREG_LO: + return offsetof(MIPSState, lo); + } + ERROR_LOG(JIT, "bad mips register %i", r); + return 0; // or what? +} + +void PpcRegCache::SpillLock(MIPSReg r1, MIPSReg r2, MIPSReg r3, MIPSReg r4) { + mr[r1].spillLock = true; + if (r2 != -1) mr[r2].spillLock = true; + if (r3 != -1) mr[r3].spillLock = true; + if (r4 != -1) mr[r4].spillLock = true; +} + +void PpcRegCache::ReleaseSpillLocks() { + for (int i = 0; i < NUM_MIPSREG; i++) { + mr[i].spillLock = false; + } +} + +void PpcRegCache::ReleaseSpillLock(MIPSReg reg) { + mr[reg].spillLock = false; +} + +PPCReg PpcRegCache::R(int mipsReg) { + if (mr[mipsReg].loc == ML_PPCREG) { + return (PPCReg)mr[mipsReg].reg; + } else { + ERROR_LOG(JIT, "Reg %i not in ppc reg. compilerPC = %08x", mipsReg, compilerPC_); + return INVALID_REG; // BAAAD + } +} diff --git a/Core/MIPS/PPC/PpcRegCache.h b/Core/MIPS/PPC/PpcRegCache.h new file mode 100644 index 000000000..acd7ebf25 --- /dev/null +++ b/Core/MIPS/PPC/PpcRegCache.h @@ -0,0 +1,156 @@ +// Copyright (c) 2012- PPSSPP Project. + +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, version 2.0 or later versions. + +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License 2.0 for more details. + +// A copy of the GPL 2.0 should have been included with the program. +// If not, see http://www.gnu.org/licenses/ + +// Official git repository and contact information can be found at +// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + + +/** +PPC reg cache based on arm version +**/ + +#pragma once + +#include "../MIPS.h" +#include "../MIPSAnalyst.h" +#include "ppcEmitter.h" + +using namespace PpcGen; + +// R2 to R8: mapped MIPS regs +// R9 = code pointers +// R10 = MIPS context +// R11 = base pointer + + +// R18 to R31: mapped MIPS regs +// R14 = MIPS context +// R15 = downcount register +// R16 = code pointer +// R17 = base pointer + +#if 1 +#define CTXREG (R14) +#define DCNTREG (R15) +#define CODEREG (R16) +#define BASEREG (R17) +#else +#define CTXREG (R6) +#define DCNTREG (R7) +#define CODEREG (R8) +#define BASEREG (R9) +#endif + + +// Safe to use this as scratch regs ? +#define SREG (R5) +#define FLAGREG (R18) + +// Special MIPS registers: +enum { + MIPSREG_HI = 32, + MIPSREG_LO = 33, + TOTAL_MAPPABLE_MIPSREGS = 34, +}; + +typedef int MIPSReg; + +struct RegPPC { + int mipsReg; // if -1, no mipsreg attached. + bool isDirty; // Should the register be written back? +}; + +enum RegMIPSLoc { + ML_IMM, + ML_PPCREG, + ML_MEM, +}; + +struct RegMIPS { + // Where is this MIPS register? + RegMIPSLoc loc; + // Data (only one of these is used, depending on loc. Could make a union). + u32 imm; + PPCReg reg; // reg index + bool spillLock; // if true, this register cannot be spilled. + // If loc == ML_MEM, it's back in its location in the CPU context struct. +}; + +#undef MAP_DIRTY +#undef MAP_NOINIT +// Initing is the default so the flag is reversed. +enum { + MAP_DIRTY = 1, + MAP_NOINIT = 2, +}; + +namespace MIPSComp { + struct PpcJitOptions; +} + +class PpcRegCache +{ +public: + PpcRegCache(MIPSState *mips, MIPSComp::PpcJitOptions *options); + ~PpcRegCache() {} + + void Init(PPCXEmitter *emitter); + void Start(MIPSAnalyst::AnalysisResults &stats); + + // Protect the arm register containing a MIPS register from spilling, to ensure that + // it's being kept allocated. + void SpillLock(MIPSReg reg, MIPSReg reg2 = -1, MIPSReg reg3 = -1, MIPSReg reg4 = -1); + void ReleaseSpillLock(MIPSReg reg); + void ReleaseSpillLocks(); + + void SetImm(MIPSReg reg, u32 immVal); + bool IsImm(MIPSReg reg) const; + u32 GetImm(MIPSReg reg) const; + + // Returns an ARM register containing the requested MIPS register. + PPCReg MapReg(MIPSReg reg, int mapFlags = 0); + void MapInIn(MIPSReg rd, MIPSReg rs); + void MapDirtyIn(MIPSReg rd, MIPSReg rs, bool avoidLoad = true); + void MapDirtyInIn(MIPSReg rd, MIPSReg rs, MIPSReg rt, bool avoidLoad = true); + void MapDirtyDirtyInIn(MIPSReg rd1, MIPSReg rd2, MIPSReg rs, MIPSReg rt, bool avoidLoad = true); + void FlushPpcReg(PPCReg r); + void FlushR(MIPSReg r); + void FlushBeforeCall(); + void FlushAll(); + + PPCReg R(int preg); // Returns a cached register + + void SetEmitter(PPCXEmitter *emitter) { emit_ = emitter; } + + // For better log output only. + void SetCompilerPC(u32 compilerPC) { compilerPC_ = compilerPC; } + + int GetMipsRegOffset(MIPSReg r); + +private: + const PPCReg *GetMIPSAllocationOrder(int &count); + + MIPSState *mips_; + MIPSComp::PpcJitOptions *options_; + PPCXEmitter *emit_; + u32 compilerPC_; + + enum { + NUM_PPCREG = 32, + NUM_MIPSREG = TOTAL_MAPPABLE_MIPSREGS, + }; + + RegPPC ar[NUM_MIPSREG]; + RegMIPS mr[NUM_MIPSREG]; +};