Merge pull request #3191 from Ced2911/ppc_basic

ppc support + ppc jit
This commit is contained in:
Henrik Rydgård 2013-08-16 12:16:49 -07:00
commit a678621caf
21 changed files with 3170 additions and 27 deletions

View File

@ -169,7 +169,7 @@ private:
# elif defined __SSE3__
# define _M_SSE 0x300
# endif
#elif (_MSC_VER >= 1500) || __INTEL_COMPILER // Visual Studio 2008
#elif ((_MSC_VER >= 1500) || __INTEL_COMPILER) // Visual Studio 2008
# define _M_SSE 0x402
#endif
@ -213,6 +213,34 @@ inline double bswapd( double f )
return dat2.f;
}
#ifdef BIG_ENDIAN
template<typename T> T LE_F(T x) {
return (T)bswapf(x);
}
template<typename T> T LE_16(T x) {
return (T)bswap16(x);
}
template<typename T> T LE_32(T x) {
return (T)bswap32(x);
}
template<typename T> T LE_64(T x) {
return (T)bswap64(x);
}
#else
template<typename T> T LE_F(T x) {
return (T)(x);
}
template<typename T> T LE_16(T x) {
return (T)(x);
}
template<typename T> T LE_32(T x) {
return (T)(x);
}
template<typename T> T LE_64(T x) {
return (T)(x);
}
#endif
// Host communication.
enum HOST_COMM
{

View File

@ -85,7 +85,9 @@ public:
return *this;
}
operator long() const { return (long)swap(); }
operator long() const { return (long)swap(); }
operator s8() const { return (s8)swap(); }
operator u8() const { return (u8)swap(); }
operator s16() const { return (s16)swap(); }
operator u16() const { return (u16)swap(); }
operator s32() const { return (s32)swap(); }

0
Common/ppcAbi.cpp Normal file
View File

508
Common/ppcEmitter.cpp Normal file
View File

@ -0,0 +1,508 @@
#include <xtl.h>
#include "ppcEmitter.h"
namespace PpcGen {
// Arithmetics ops
void PPCXEmitter::ADD (PPCReg Rd, PPCReg Ra, PPCReg Rb) {
u32 instr = (0x7C000214 | (Rd << 21) | (Ra << 16) | (Rb << 11));
Write32(instr);
}
void PPCXEmitter::ADDI (PPCReg Rd, PPCReg Ra, short imm) {
u32 instr = (0x38000000 | (Rd << 21) | (Ra << 16) | ((imm) & 0xffff));
Write32(instr);
}
void PPCXEmitter::ADDIS (PPCReg Rd, PPCReg Ra, short imm) {
u32 instr = (0x3C000000 | (Rd << 21) | (Ra << 16) | ((imm) & 0xffff));
Write32(instr);
}
void PPCXEmitter::AND (PPCReg Rs, PPCReg Ra, PPCReg Rb) {
u32 instr = (0x7C000038 | (Ra << 21) | (Rs << 16) | (Rb << 11));
Write32(instr);
}
void PPCXEmitter::ANDI (PPCReg Rd, PPCReg Ra, unsigned short imm) {
u32 instr = (0x70000000 | (Rd << 21) | (Ra << 16) | ((imm) & 0xffff));
Write32(instr);
}
void PPCXEmitter::ANDIS (PPCReg Rd, PPCReg Ra, unsigned short imm) {
u32 instr = (0x74000000 | (Rd << 21) | (Ra << 16) | ((imm) & 0xffff));
Write32(instr);
}
// Memory load/store operations
void PPCXEmitter::LI(PPCReg dest, unsigned short imm) {
u32 instr = (0x38000000 | (dest << 21) | ((imm) & 0xffff));
Write32(instr);
}
void PPCXEmitter::LIS(PPCReg dest, unsigned short imm) {
u32 instr = (0x3C000000 | (dest << 21) | ((imm) & 0xffff));
Write32(instr);
}
void PPCXEmitter::LBZ (PPCReg dest, PPCReg src, int offset) {
u32 instr = (0x88000000 | (dest << 21) | (src << 16) | ((offset) & 0xffff));
Write32(instr);
}
void PPCXEmitter::LBZX (PPCReg dest, PPCReg a, PPCReg b) {
u32 instr = ((31<<26) | (dest << 21) | (a << 16) | (b << 11) | (87<<1));
Write32(instr);
}
void PPCXEmitter::LHZ (PPCReg dest, PPCReg src, int offset) {
u32 instr = (0xA0000000 | (dest << 21) | (src << 16) | ((offset) & 0xffff));
Write32(instr);
}
void PPCXEmitter::LHBRX (PPCReg dest, PPCReg src, PPCReg offset) {
u32 instr = (0x7C00062C | (dest << 21) | (src << 16) | (offset << 11));
Write32(instr);
}
void PPCXEmitter::LWZ (PPCReg dest, PPCReg src, int offset) {
u32 instr = (0x80000000 | (dest << 21) | (src << 16) | ((offset) & 0xffff));
Write32(instr);
}
void PPCXEmitter::LWBRX (PPCReg dest, PPCReg src, PPCReg offset) {
u32 instr = (0x7C00042C | (dest << 21) | (src << 16) | (offset << 11));
Write32(instr);
}
void PPCXEmitter::STB (PPCReg dest, PPCReg src, int offset) {
u32 instr = (0x98000000 | (dest << 21) | (src << 16) | ((offset) & 0xffff));
Write32(instr);
}
void PPCXEmitter::STBX (PPCReg dest, PPCReg a, PPCReg b) {
u32 instr = ((31<<26) | (dest << 21) | (a << 16) | (b << 11) | (215 << 1));
Write32(instr);
}
void PPCXEmitter::STH (PPCReg dest, PPCReg src, int offset) {
u32 instr = (0xB0000000 | (dest << 21) | (src << 16) | ((offset) & 0xffff));
Write32(instr);
}
void PPCXEmitter::STHBRX (PPCReg dest, PPCReg src, PPCReg offset) {
u32 instr = (0x7C00072C | (dest << 21) | (src << 16) | (offset << 11));
Write32(instr);
}
void PPCXEmitter::STW (PPCReg dest, PPCReg src, int offset) {
u32 instr = (0x90000000 | (dest << 21) | (src << 16) | ((offset) & 0xffff));
Write32(instr);
}
void PPCXEmitter::STWU (PPCReg dest, PPCReg src, int offset) {
u32 instr = (0x94000000 | (dest << 21) | (src << 16) | ((offset) & 0xffff));
Write32(instr);
}
void PPCXEmitter::STWBRX (PPCReg dest, PPCReg src, PPCReg offset) {
u32 instr = (0x7C00052C | (dest << 21) | (src << 16) | (offset << 11));
Write32(instr);
}
void PPCXEmitter::LD (PPCReg dest, PPCReg src, int offset) {
u32 instr = ((58 << 26) | (dest << 21) | (src << 16) | ((offset) & 0xffff));
Write32(instr);
}
void PPCXEmitter::STD (PPCReg dest, PPCReg src, int offset) {
u32 instr = ((62 << 26) | (dest << 21) | (src << 16) | ((offset) & 0xffff));
Write32(instr);
}
// Branch operations
void PPCXEmitter::B (const void *fnptr) {
s32 func = (s32)fnptr - s32(code);
u32 instr = (0x48000000 | ((s32)((func) & 0x3fffffc)));
Write32(instr);
}
void PPCXEmitter::BL(const void *fnptr) {
s32 func = (s32)fnptr - s32(code);
u32 instr = (0x48000001 | ((s32)((func) & 0x3fffffc)));
Write32(instr);
}
void PPCXEmitter::BA (const void *fnptr) {
s32 func = (s32)fnptr;
u32 instr = (0x48000002 | ((s32)((func) & 0x3fffffc)));
Write32(instr);
}
void PPCXEmitter::BLA (const void *fnptr) {
s32 func = (s32)fnptr;
u32 instr = (0x48000003 | ((s32)((func) & 0x3fffffc)));
Write32(instr);
}
#define IS_SMALL_JUMP (((u32)code - (u32)fnptr)>=-32767 && ((u32)code - (u32)fnptr)<=-32767)
#define CHECK_SMALL_JUMP { if(IS_SMALL_JUMP) { DebugBreak(); } }
void PPCXEmitter::BEQ (const void *fnptr) {
CHECK_SMALL_JUMP
s32 func = (s32)fnptr - s32(code);
u32 instr = (0x41820000 | ( func & 0xfffc));
Write32(instr);
}
void PPCXEmitter::BGT(const void *fnptr) {
CHECK_SMALL_JUMP
s32 func = (s32)fnptr - s32(code);
u32 instr = (0x41810000 | (((s16)(((func)+1))) & 0xfffc));
Write32(instr);
}
void PPCXEmitter::BLTCTR() {
Write32((19 << 26) | (12 << 21) | (528 <<1));
// Break();
}
void PPCXEmitter::BLT (const void *fnptr) {
//CHECK_JUMP
if (!IS_SMALL_JUMP) {
u32 func_addr = (u32) fnptr;
// Load func address
MOVI2R(R0, func_addr);
// Set it to link register
MTCTR(R0);
// Branch
BLTCTR();
return;
}
s32 func = (s32)fnptr - s32(code);
u32 instr = (0x41800000 | (((s16)(((func)+1))) & 0xfffc));
Write32(instr);
}
void PPCXEmitter::BLE (const void *fnptr) {
CHECK_SMALL_JUMP
s32 func = (s32)fnptr - s32(code);
u32 instr = (0x40810000 | (((s16)(((func)+1))) & 0xfffc));
Write32(instr);
}
void PPCXEmitter::BCTRL() {
Write32(0x4E800421);
}
void PPCXEmitter::BCTR() {
Write32(0x4E800420);
}
// Link Register
void PPCXEmitter::MFLR(PPCReg r) {
Write32(0x7C0802A6 | r << 21);
}
void PPCXEmitter::MTLR(PPCReg r) {
Write32(0x7C0803A6 | r << 21);
}
void PPCXEmitter::MTCTR(PPCReg r) {
Write32(0x7C0903A6 | r << 21);
}
void PPCXEmitter::BLR() {
Write32(0x4E800020);
}
void PPCXEmitter::BGTLR() {
Write32(0x4D810020);
}
// Fixup
FixupBranch PPCXEmitter::B()
{
FixupBranch branch;
branch.type = _B;
branch.ptr = code;
branch.condition = condition;
//We'll write NOP here for now.
Write32(0x60000000);
return branch;
}
FixupBranch PPCXEmitter::BL()
{
FixupBranch branch;
branch.type = _BL;
branch.ptr = code;
branch.condition = condition;
//We'll write NOP here for now.
Write32(0x60000000);
return branch;
}
FixupBranch PPCXEmitter::BNE() {
FixupBranch branch;
branch.type = _BNE;
branch.ptr = code;
branch.condition = condition;
//We'll write NOP here for now.
Write32(0x60000000);
return branch;
}
FixupBranch PPCXEmitter::BLT() {
FixupBranch branch;
branch.type = _BLT;
branch.ptr = code;
branch.condition = condition;
//We'll write NOP here for now.
Write32(0x60000000);
return branch;
}
FixupBranch PPCXEmitter::BLE() {
FixupBranch branch;
branch.type = _BLE;
branch.ptr = code;
branch.condition = condition;
//We'll write NOP here for now.
Write32(0x60000000);
return branch;
}
FixupBranch PPCXEmitter::B_Cond(FixupBranchType type) {
FixupBranch branch;
branch.type = type;
branch.ptr = code;
branch.condition = condition;
//We'll write NOP here for now.
Write32(0x60000000);
return branch;
}
void PPCXEmitter::SetJumpTarget(FixupBranch const &branch)
{
s32 distance = s32(code) - (s32)branch.ptr;
_assert_msg_(DYNA_REC, distance > -32767
&& distance <= 32767,
"SetJumpTarget out of range (%p calls %p)", code,
branch.ptr);
switch(branch.type) {
case _B:
*(u32*)branch.ptr = (0x48000000 | ((s32)((distance) & 0x3fffffc)));
break;
case _BL:
*(u32*)branch.ptr = (0x48000001 | ((s32)((distance) & 0x3fffffc)));
break;
case _BEQ:
*(u32*)branch.ptr = (0x41820000 | ((s16)(((distance)+1)) & 0xfffc));
break;
case _BNE:
*(u32*)branch.ptr = (0x40820000 | ((s16)(((distance)+1)) & 0xfffc));
break;
case _BLT:
*(u32*)branch.ptr = (0x41800000 | ((s16)(((distance)+1)) & 0xfffc));
break;
case _BLE:
*(u32*)branch.ptr = (0x40810000 | ((s16)(((distance)+1)) & 0xfffc));
break;
case _BGT:
*(u32*)branch.ptr = (0x41810000 | ((s16)(((distance)+1)) & 0xfffc));
break;
case _BGE:
*(u32*)branch.ptr = (0x40800000 | ((s16)(((distance)+1)) & 0xfffc));
break;
default:
// Error !!!
_assert_msg_(DYNA_REC, 0, "SetJumpTarget unknow branch type: %d", branch.type);
break;
}
}
// Compare (Only use CR0 atm...)
void PPCXEmitter::CMPI(PPCReg dest, unsigned short imm) {
Write32((11<<26) | (dest << 16) | ((imm) & 0xffff));
}
void PPCXEmitter::CMPLI(PPCReg dest, unsigned short imm) {
Write32((10<<26) | (dest << 16) | ((imm) & 0xffff));
}
void PPCXEmitter::CMP(PPCReg a, PPCReg b) {
Write32((31 << 26) | (a << 16) | (b << 11));
}
void PPCXEmitter::CMPL(PPCReg a, PPCReg b) {
Write32((31 << 26) | (a << 16) | (b << 11) | (1<<6));
}
// Others operation
void PPCXEmitter::ORI(PPCReg src, PPCReg dest, unsigned short imm) {
u32 instr = (0x60000000 | (src << 21) | (dest << 16) | (imm & 0xffff));
Write32(instr);
}
void PPCXEmitter::OR(PPCReg Rd, PPCReg Ra, PPCReg Rb) {
u32 instr = (0x7C000378 | (Ra << 21) | (Rd << 16) | (Rb << 11));
Write32(instr);
}
void PPCXEmitter::XOR(PPCReg Rd, PPCReg Ra, PPCReg Rb) {
u32 instr = (0x7C000278 | (Ra << 21) | (Rd << 16) | (Rb << 11));
Write32(instr);
}
void PPCXEmitter::SUBF(PPCReg Rd, PPCReg Ra, PPCReg Rb, int RCFlags) {
u32 instr = (0x7C000050 | (Rd << 21) | (Ra << 16) | (Rb << 11) | (RCFlags & 1));
Write32(instr);
}
// Quick Call
// dest = LIS(imm) + ORI(+imm)
void PPCXEmitter::MOVI2R(PPCReg dest, unsigned int imm) {
if (imm == (unsigned short)imm) {
// 16bit
LI(dest, imm & 0xFFFF);
} else {
// HI 16bit
LIS(dest, imm>>16);
if ((imm & 0xFFFF) != 0) {
// LO 16bit
ORI(dest, dest, imm & 0xFFFF);
}
}
}
void PPCXEmitter::QuickCallFunction(void *func) {
/** TODO : can use simple jump **/
u32 func_addr = (u32) func;
// Load func address
MOVI2R(R0, func_addr);
// Set it to link register
MTCTR(R0);
// Branch
BCTRL();
}
// sign
void PPCXEmitter::EXTSB (PPCReg dest, PPCReg src) {
Write32((0x7C000774 | (src << 21) | (dest << 16)));
}
void PPCXEmitter::EXTSH (PPCReg dest, PPCReg src) {
Write32(0x7C000734 | (src << 21) | (dest << 16));
}
void PPCXEmitter::RLWINM (PPCReg dest, PPCReg src, int shift, int start, int end) {
Write32((21<<26) | (src << 21) | (dest << 16) | (shift << 11) | (start << 6) | (end << 1));
}
// Prologue / epilogue
void PPCXEmitter::Prologue() {
// Save regs
u32 regSize = 8; // 4 in 32bit system
u32 stackFrameSize = 32*32;//(35 - 12) * regSize;
// Write Prologue (setup stack frame etc ...)
// Save Lr
MFLR(R12);
for(int i = 14; i < 32; i ++) {
STD((PPCReg)i, R1, -((33 - i) * regSize));
}
// Save r12
STW(R12, R1, -0x8);
// allocate stack
STWU(R1, R1, -stackFrameSize);
}
void PPCXEmitter::Epilogue() {
u32 regSize = 8; // 4 in 32bit system
u32 stackFrameSize = 32*32;//(35 - 12) * regSize;
// Write Epilogue (restore stack frame, return)
// free stack
ADDI(R1, R1, stackFrameSize);
// Restore regs
for(int i = 14; i < 32; i ++) {
LD((PPCReg)i, R1, -((33 - i) * regSize));
}
// recover r12 (LR saved register)
LWZ (R12, R1, -0x8);
// Restore Lr
MTLR(R12);
}
// Others ...
void PPCXEmitter::SetCodePtr(u8 *ptr)
{
code = ptr;
startcode = code;
lastCacheFlushEnd = ptr;
}
const u8 *PPCXEmitter::GetCodePtr() const
{
return code;
}
u8 *PPCXEmitter::GetWritableCodePtr()
{
return code;
}
void PPCXEmitter::ReserveCodeSpace(u32 bytes)
{
for (u32 i = 0; i < bytes/4; i++)
Write32(0x60000000); //nop
}
const u8 *PPCXEmitter::AlignCode16()
{
ReserveCodeSpace((-(s32)code) & 15);
return code;
}
const u8 *PPCXEmitter::AlignCodePage()
{
ReserveCodeSpace((-(s32)code) & 4095);
return code;
}
void PPCXEmitter::FlushIcache()
{
FlushIcacheSection(lastCacheFlushEnd, code);
lastCacheFlushEnd = code;
}
void PPCXEmitter::FlushIcacheSection(u8 *start, u8 *end)
{
u8 * addr = start;
while(addr < end) {
__asm dcbst r0, addr
__asm icbi r0, addr
addr += 4;
}
__emit(0x7c0004ac);//sync
__emit(0x4C00012C);//isync
}
} // namespace

381
Common/ppcEmitter.h Normal file
View File

@ -0,0 +1,381 @@
// Copyright (C) 2003 Dolphin Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official SVN repository and contact information can be found at
// http://code.google.com/p/dolphin-emu/
// WARNING - THIS LIBRARY IS NOT THREAD SAFE!!!
// http://www.csd.uwo.ca/~mburrel/stuff/ppc-asm.html
// http://publib.boulder.ibm.com/infocenter/pseries/v5r3/index.jsp?topic=/com.ibm.aix.aixassem/doc/alangref/linkage_convent.htm
// http://publib.boulder.ibm.com/infocenter/pseries/v5r3/index.jsp?topic=/com.ibm.aix.aixassem/doc/alangref/instruction_set.htm
#ifndef _DOLPHIN_PPC_CODEGEN_
#define _DOLPHIN_PPC_CODEGEN_
#include "Common.h"
#include "MemoryUtil.h"
#include <vector>
#undef _IP
#undef R0
#undef _SP
#undef _LR
#undef _PC
#undef CALL
namespace PpcGen
{
enum PPCReg
{
// GPRs (32)
// Behaves as zero does in some instructions
R0 = 0,
// Stack pointer (SP)
R1,
// Reserved
R2,
// Used to pass integer function parameters and return values
R3, R4,
// Used to pass integer function parameters
R5, R6, R7, R8, R9, R10,
// General purpose
R11,
// Scratch
R12,
// Unused by the compiler reserved
R13,
// General purpose
R14, R15, R16, R17, R18, R19,
R20, R21, R22, R23, R24, R25,
R26, R27, R28, R29, R30, R31,
// CRs (7)
CR0 = 0,
// FPRs (32)
// Scratch
FPR0,
// Used to pass double word function parameters and return values
FPR1, FPR2, FPR3, FPR4,
FPR5, FPR6, FPR7, FPR8,
FPR9, FPR10, FPR11, FPR12,
FPR13,
// General purpose
FPR14, FPR15, FPR16, FPR17,
FPR18, FPR19, FPR20, FPR21,
FPR22, FPR23, FPR24, FPR25,
FPR26, FPR27, FPR28, FPR29,
FPR30, FPR31,
// Vmx (128)
VR0,
// Used to pass vector function parameters and return values
VR1, VR2, VR3, VR4,
VR5, VR6, VR7, VR8,
VR9, VR10, VR11, VR12,
VR13, // ...
// Others regs
LR, CTR, XER, FPSCR,
// End
INVALID_REG = 0xFFFFFFFF
};
enum IntegerSize
{
I_I8 = 0,
I_I16,
I_I32,
I_I64
};
enum
{
NUMGPRs = 31,
};
typedef const u8* JumpTarget;
enum FixupBranchType {
_B,
_BEQ,
_BNE,
_BLT,
_BLE,
_BGT,
_BGE,
// Link register
_BL
};
struct FixupBranch
{
u8 *ptr;
u32 condition; // Remembers our codition at the time
FixupBranchType type; //0 = B 1 = BL
};
class PPCXEmitter
{
private:
u8 *code, *startcode;
u8 *lastCacheFlushEnd;
u32 condition;
protected:
// Write opcode
inline void Write32(u32 value) {*(u32*)code = value; code+=4;}
public:
PPCXEmitter() : code(0), startcode(0), lastCacheFlushEnd(0) {
}
PPCXEmitter(u8 *code_ptr) {
code = code_ptr;
lastCacheFlushEnd = code_ptr;
startcode = code_ptr;
}
virtual ~PPCXEmitter() {}
void SetCodePtr(u8 *ptr);
void ReserveCodeSpace(u32 bytes);
const u8 *AlignCode16();
const u8 *AlignCodePage();
const u8 *GetCodePtr() const;
void FlushIcache();
void FlushIcacheSection(u8 *start, u8 *end);
u8 *GetWritableCodePtr();
// Special purpose instructions
// Debug Breakpoint
void BKPT(u16 arg);
// Hint instruction
void YIELD();
// Do nothing
void NOP(int count = 1); //nop padding - TODO: fast nop slides, for amd and intel (check their manuals)
// FixupBranch ops
FixupBranch B();
FixupBranch BL();
FixupBranch BNE();
FixupBranch BLT();
FixupBranch BLE();
FixupBranch B_Cond(FixupBranchType type);
void SetJumpTarget(FixupBranch const &branch);
// Branch ops
void B (const void *fnptr);
void BL(const void *fnptr);
void BA (const void *fnptr);
void BLA(const void *fnptr);
void BEQ(const void *fnptr);
void BLE(const void *fnptr);
void BLT(const void *fnptr);
void BGT(const void *fnptr);
void BEQ (PPCReg r);
void BLR();
void BGTLR(); // ??? used ?
void BLTCTR();
void BGTCTR();
void BLECTR();
void BGECTR();
void BCTRL ();
void BCTR();
// Link Register
void MFLR(PPCReg r);
void MTLR(PPCReg r);
void MTCTR(PPCReg r);
// Logical Ops
void AND (PPCReg Rs, PPCReg Ra, PPCReg Rb);
void ANDI (PPCReg Rs, PPCReg Ra, unsigned short imm);
void ANDIS(PPCReg Rs, PPCReg Ra, unsigned short imm);
void NAND (PPCReg Rs, PPCReg Ra, PPCReg Rb);
void OR (PPCReg Rs, PPCReg Ra, PPCReg Rb);
void ORC (PPCReg Rs, PPCReg Ra, PPCReg Rb);
void NOR (PPCReg Rs, PPCReg Ra, PPCReg Rb);
void XOR (PPCReg Rs, PPCReg Ra, PPCReg Rb);
void NEG (PPCReg Rs, PPCReg Ra, PPCReg Rb);
// Arithmetics ops
void ADD (PPCReg Rd, PPCReg Ra, PPCReg Rb);
void ADDI (PPCReg Rd, PPCReg Ra, short imm);
void ADDIS (PPCReg Rd, PPCReg Ra, short imm);
void ADDC (PPCReg Rd, PPCReg Ra, PPCReg Rb);
void SUB (PPCReg Rd, PPCReg Ra, PPCReg Rb) {
// reverse ?
SUBF(Rd, Rb, Ra);
}
// if RCFlags update CR0
void SUBF (PPCReg Rd, PPCReg Ra, PPCReg Rb, int RCFlags = 0);
void SUBFC (PPCReg Rd, PPCReg Ra, PPCReg Rb);
// Floating ops
void DIVW(PPCReg dest, PPCReg dividend, PPCReg divisor);
void DIVWU(PPCReg dest, PPCReg dividend, PPCReg divisor);
void MULLW(PPCReg dest, PPCReg src, PPCReg op2);
void MULHW (PPCReg dest, PPCReg src, PPCReg op2);
void MULHWS(PPCReg dest, PPCReg src, PPCReg op2);
void ORI (PPCReg src, PPCReg dest, unsigned short imm);
// Memory load/store operations
void LI (PPCReg dest, unsigned short imm);
void LIS (PPCReg dest, unsigned short imm);
// dest = LIS(imm) + ORI(+imm)
void MOVI2R (PPCReg dest, unsigned int imm);
// 8bit
void LBZ (PPCReg dest, PPCReg src, int offset = 0);
void LBZX (PPCReg dest, PPCReg a, PPCReg b);
// 16bit
void LHZ (PPCReg dest, PPCReg src, int offset = 0);
void LHBRX (PPCReg dest, PPCReg src, PPCReg offset);
// 32 bit
void LWZ (PPCReg dest, PPCReg src, int offset = 0);
void LWBRX (PPCReg dest, PPCReg src, PPCReg offset);
// 64 bit
void LD (PPCReg dest, PPCReg src, int offset = 0);
// 8 bit
void STB (PPCReg dest, PPCReg src, int offset = 0);
void STBX (PPCReg dest, PPCReg a, PPCReg b);
// 16 bit
void STH (PPCReg dest, PPCReg src, int offset = 0);
void STHBRX (PPCReg dest, PPCReg src, PPCReg offset);
// 32 bit
void STW (PPCReg dest, PPCReg src, int offset = 0);
void STWU (PPCReg dest, PPCReg src, int offset = 0);
void STWBRX (PPCReg dest, PPCReg src, PPCReg offset);
// 64 bit
void STD (PPCReg dest, PPCReg src, int offset = 0);
// sign
void EXTSB (PPCReg dest, PPCReg src);
void EXTSH (PPCReg dest, PPCReg src);
void RLWINM (PPCReg dest, PPCReg src, int shift, int start, int end);
// Compare
void CMPLI (PPCReg dest, unsigned short imm);
void CMPI (PPCReg dest, unsigned short imm);
void CMPL (PPCReg a, PPCReg b);
void CMP (PPCReg a, PPCReg b);
void Prologue();
void Epilogue();
// Debug !
void Break() {
Write32(0x0FE00016);
}
void MR (PPCReg to, PPCReg from) {
OR(to, from, from);
}
void QuickCallFunction(void *func);
protected:
}; // class PPCXEmitter
// You get memory management for free, plus, you can use all the MOV etc functions without
// having to prefix them with gen-> or something similar.
class PPCXCodeBlock : public PPCXEmitter
{
protected:
u8 *region;
size_t region_size;
public:
PPCXCodeBlock() : region(NULL), region_size(0) {}
virtual ~PPCXCodeBlock() { if (region) FreeCodeSpace(); }
// Call this before you generate any code.
void AllocCodeSpace(int size)
{
region_size = size;
region = (u8*)AllocateExecutableMemory(region_size);
SetCodePtr(region);
}
// Always clear code space with breakpoints, so that if someone accidentally executes
// uninitialized, it just breaks into the debugger.
void ClearCodeSpace()
{
// x86/64: 0xCC = breakpoint
memset(region, 0xCC, region_size);
ResetCodePtr();
}
// Call this when shutting down. Don't rely on the destructor, even though it'll do the job.
void FreeCodeSpace()
{
region = NULL;
region_size = 0;
}
bool IsInSpace(u8 *ptr)
{
return ptr >= region && ptr < region + region_size;
}
// Cannot currently be undone. Will write protect the entire code region.
// Start over if you need to change the code (call FreeCodeSpace(), AllocCodeSpace()).
void WriteProtect()
{
//WriteProtectMemory(region, region_size, true);
}
void UnWriteProtect()
{
//UnWriteProtectMemory(region, region_size, false);
}
void ResetCodePtr()
{
SetCodePtr(region);
}
size_t GetSpaceLeft() const
{
return region_size - (GetCodePtr() - region);
}
u8 *GetBasePtr() {
return region;
}
size_t GetOffset(u8 *ptr) {
return ptr - region;
}
};
} // namespace
#endif // _DOLPHIN_INTEL_CODEGEN_

View File

@ -29,6 +29,7 @@ enum GPUCore {
GPU_NULL,
GPU_GLES,
GPU_SOFTWARE,
GPU_DIRECTX9,
};
struct CoreParameter

View File

@ -48,6 +48,10 @@ using namespace ArmGen;
#include "Common/x64Analyzer.h"
#include "Core/MIPS/x86/Asm.h"
using namespace Gen;
#elif defined(PPC)
#include "Common/ppcEmitter.h"
#include "Core/MIPS/MIPS.h"
using namespace PpcGen;
#else
#error "Unsupported arch!"
#endif
@ -274,6 +278,10 @@ void JitBlockCache::LinkBlockExits(int i)
#elif defined(_M_IX86) || defined(_M_X64)
XEmitter emit(b.exitPtrs[e]);
emit.JMP(blocks[destinationBlock].checkedEntry, true);
#elif defined(PPC)
PPCXEmitter emit(b.exitPtrs[e]);
emit.B(blocks[destinationBlock].checkedEntry);
emit.FlushIcache();
#endif
b.linkStatus[e] = true;
}
@ -356,6 +364,12 @@ void JitBlockCache::DestroyBlock(int block_num, bool invalidate)
XEmitter emit((u8 *)b.checkedEntry);
emit.MOV(32, M(&mips->pc), Imm32(b.originalAddress));
emit.JMP(MIPSComp::jit->Asm().dispatcher, true);
#elif defined(PPC)
PPCXEmitter emit((u8 *)b.checkedEntry);
emit.MOVI2R(R3, b.originalAddress);
emit.STW(R0, CTXREG, offsetof(MIPSState, pc));
emit.B(MIPSComp::jit->dispatcher);
emit.FlushIcache();
#endif
}

View File

@ -36,6 +36,11 @@ typedef ArmGen::ARMXCodeBlock CodeBlock;
namespace Gen { class XEmitter; }
using namespace Gen;
typedef Gen::XCodeBlock CodeBlock;
#elif defined(PPC)
#include "Common/ppcEmitter.h"
namespace PpcGen { class PPCXEmitter; }
using namespace PpcGen;
typedef PpcGen::PPCXCodeBlock CodeBlock;
#else
#error "Unsupported arch!"
#endif

View File

@ -19,7 +19,9 @@
#include "Common/Common.h"
#if defined(ARM)
#if defined(PPC)
#include "../PPC/PpcJit.h"
#elif defined(ARM)
#include "../ARM/ArmJit.h"
#else
#include "../x86/Jit.h"

285
Core/MIPS/PPC/PpcAsm.cpp Normal file
View File

@ -0,0 +1,285 @@
#include "Common/ChunkFile.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/MIPS/MIPS.h"
#include "Core/MIPS/MIPSCodeUtils.h"
#include "Core/MIPS/MIPSInt.h"
#include "Core/MIPS/MIPSTables.h"
#include "PpcRegCache.h"
#include "ppcEmitter.h"
#include "PpcJit.h"
#include <ppcintrinsics.h>
using namespace PpcGen;
extern volatile CoreState coreState;
static void JitAt()
{
MIPSComp::jit->Compile(currentMIPS->pc);
}
namespace MIPSComp
{
static int dontLogBlocks = 20;
static int logBlocks = 40;
const u8 *Jit::DoJit(u32 em_address, JitBlock *b)
{
js.cancel = false;
js.blockStart = js.compilerPC = mips_->pc;
js.downcountAmount = 0;
js.curBlock = b;
js.compiling = true;
js.inDelaySlot = false;
js.PrefixStart();
// We add a check before the block, used when entering from a linked block.
b->checkedEntry = GetCodePtr();
// Downcount flag check. The last block decremented downcounter, and the flag should still be available.
MOVI2R(SREG, js.blockStart);
// if (currentMIPS->downcount<0)
CMPI(DCNTREG, 0);
BLT((const void *)outerLoopPCInR0);
b->normalEntry = GetCodePtr();
// TODO: this needs work
MIPSAnalyst::AnalysisResults analysis; // = MIPSAnalyst::Analyze(em_address);
gpr.Start(analysis);
//fpr.Start(analysis);
int numInstructions = 0;
int cycles = 0;
int partialFlushOffset = 0;
if (logBlocks > 0) logBlocks--;
if (dontLogBlocks > 0) dontLogBlocks--;
// #define LOGASM
#ifdef LOGASM
char temp[256];
#endif
while (js.compiling)
{
gpr.SetCompilerPC(js.compilerPC); // Let it know for log messages
//fpr.SetCompilerPC(js.compilerPC);
u32 inst = Memory::Read_Instruction(js.compilerPC);
js.downcountAmount += MIPSGetInstructionCycleEstimate(inst);
MIPSCompileOp(inst);
js.compilerPC += 4;
numInstructions++;
}
//FlushLitPool();
#ifdef LOGASM
if (logBlocks > 0 && dontLogBlocks == 0) {
for (u32 cpc = em_address; cpc != js.compilerPC + 4; cpc += 4) {
MIPSDisAsm(Memory::Read_Instruction(cpc), cpc, temp, true);
INFO_LOG(DYNA_REC, "M: %08x %s", cpc, temp);
}
}
#endif
b->codeSize = GetCodePtr() - b->normalEntry;
#ifdef LOGASM
if (logBlocks > 0 && dontLogBlocks == 0) {
INFO_LOG(DYNA_REC, "=============== ARM ===============");
DisassembleArm(b->normalEntry, GetCodePtr() - b->normalEntry);
}
#endif
//DumpJit();
AlignCode16();
// Don't forget to zap the instruction cache!
FlushIcache();
b->originalSize = numInstructions;
return b->normalEntry;
}
void Jit::DumpJit() {
#ifdef _XBOX
u32 len = (u32)GetCodePtr() - (u32)GetBasePtr();
FILE * fd;
fd = fopen("game:\\jit.bin", "wb");
fwrite(GetBasePtr(), len, 1, fd);
fclose(fd);
#endif
}
void Jit::GenerateFixedCode() {
enterCode = AlignCode16();
INFO_LOG(HLE, "Base: %08x", (u32)Memory::base);
INFO_LOG(HLE, "enterCode: 0x%08p", enterCode);
INFO_LOG(HLE, "GetBasePtr: 0x%08p", GetBasePtr());
Prologue();
// Map fixed register
MOVI2R(BASEREG, (u32)Memory::base);
MOVI2R(CTXREG, (u32)mips_);
MOVI2R(CODEREG, (u32)GetBasePtr());
// Update downcount reg value from memory
RestoreDowncount(DCNTREG);
// SREG = mips->pc
MovFromPC(SREG);
// Keep current location, TODO rename it, outerLoopPCInR0 to outerLoopPCInR3 ??
outerLoopPCInR0 = GetCodePtr();
// mips->pc = SREG
MovToPC(SREG);
// Keep current location
outerLoop = GetCodePtr();
// Jit loop
// {
// Save downcount reg value to memory
SaveDowncount(DCNTREG);
// Call CoreTiming::Advance() => update donwcount
QuickCallFunction((void *)&CoreTiming::Advance);
// Update downcount reg value from memory
RestoreDowncount(DCNTREG);
// branch to skipToRealDispatch
FixupBranch skipToRealDispatch = B(); //skip the sync and compare first time
// Keep current location dispatcherCheckCoreState:
dispatcherCheckCoreState = GetCodePtr();
// The result of slice decrementation should be in flags if somebody jumped here
// IMPORTANT - We jump on negative, not carry!!!
// branch to bailCoreState: (jump if(what ??) negative )
FixupBranch bailCoreState = BLT(); // BLT ???
// SREG = coreState
MOVI2R(SREG, (u32)&coreState);
// Compare coreState and CORE_RUNNING
LWZ(SREG, SREG); // SREG = *SREG
CMPI(SREG, 0); // compare 0(CORE_RUNNING) and CR0
// branch to badCoreState: (jump if coreState != CORE_RUNNING)
FixupBranch badCoreState = BNE();
// branch to skipToRealDispatch2:
FixupBranch skipToRealDispatch2 = B(); //skip the sync and compare first time
// Keep current location, TODO rename it, outerLoopPCInR0 to outerLoopPCInSREG ??
dispatcherPCInR0 = GetCodePtr();
// mips->pc = SREG
MovToPC(SREG);
// At this point : flags = EQ. Fine for the next check, no need to jump over it.
// label dispatcher:
dispatcher = GetCodePtr();
// {
// The result of slice decrementation should be in flags if somebody jumped here
// IMPORTANT - We jump on negative, not carry!!!
// label bail:
// arm B_CC(CC_MI);
FixupBranch bail = BLT();
// label skipToRealDispatch:
SetJumpTarget(skipToRealDispatch);
// label skipToRealDispatch2:
SetJumpTarget(skipToRealDispatch2);
// Keep current location
dispatcherNoCheck = GetCodePtr();
// read op
// R3 = mips->pc & Memory::MEMVIEW32_MASK
LWZ(R3, CTXREG, offsetof(MIPSState, pc));
// & Memory::MEMVIEW32_MASK
RLWINM(R3, R3, 0, 2, 31);
// R3 = memory::base[r3];
ADD(R3, BASEREG, R3);
MOVI2R(R0, 0);
LWBRX(R3, R3, R0);
// R4 = R3 & MIPS_EMUHACK_VALUE_MASK
RLWINM(R4, R3, 0, 6, 31);
// R3 = R3 & MIPS_EMUHACK_MASK
RLWINM(R3, R3, 0, 0, 6);
// compare, op == MIPS_EMUHACK_OPCODE
MOVI2R(SREG, MIPS_EMUHACK_OPCODE);
CMPL(R3, SREG);
// Branch if func block not found
FixupBranch notfound = BNE();
// {
// R3 = R4 + GetBasePtr()
ADD(R3, R4, CODEREG);
MTCTR(R3);
BCTR();
// }
// label notfound:
SetJumpTarget(notfound);
//Ok, no block, let's jit
// Save downcount reg value to memory
SaveDowncount(DCNTREG);
// Exec JitAt => Compile block !
QuickCallFunction((void *)&JitAt);
// Update downcount reg value from memory
RestoreDowncount(DCNTREG);
// branch to dispatcherNoCheck:
B(dispatcherNoCheck); // no point in special casing this
// }
// label bail:
SetJumpTarget(bail);
// label bailCoreState:
SetJumpTarget(bailCoreState);
// Compare coreState and CORE_RUNNING
MOVI2R(SREG, (u32)&coreState);
LWZ(SREG, SREG); // SREG = *SREG => SREG = coreState
CMPLI(SREG, 0); // compare 0(CORE_RUNNING) and corestate
BEQ(outerLoop);
// }
// badCoreState label:
SetJumpTarget(badCoreState);
// Keep current location
breakpointBailout = GetCodePtr();
// mips->downcount = DCNTREG
SaveDowncount(DCNTREG);
Epilogue();
// Go back to caller
BLR();
// Don't forget to zap the instruction cache!
FlushIcache();
}
}

View File

@ -0,0 +1,172 @@
#include "Common/ChunkFile.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/MIPS/MIPS.h"
#include "Core/MIPS/MIPSCodeUtils.h"
#include "Core/MIPS/MIPSInt.h"
#include "Core/MIPS/MIPSTables.h"
#include "PpcRegCache.h"
#include "ppcEmitter.h"
#include "PpcJit.h"
using namespace MIPSAnalyst;
#define _RS ((op>>21) & 0x1F)
#define _RT ((op>>16) & 0x1F)
#define _RD ((op>>11) & 0x1F)
#define _FS ((op>>11) & 0x1F)
#define _FT ((op>>16) & 0x1F)
#define _FD ((op>>6 ) & 0x1F)
#define _SA ((op>>6 ) & 0x1F)
#define _POS ((op>>6 ) & 0x1F)
#define _SIZE ((op>>11 ) & 0x1F)
// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.
// Currently known non working ones should have DISABLE.
//#define CONDITIONAL_DISABLE { Comp_Generic(op); return; }
#define CONDITIONAL_DISABLE ;
#define DISABLE { Comp_Generic(op); return; }
namespace MIPSComp
{
static u32 EvalOr(u32 a, u32 b) { return a | b; }
static u32 EvalEor(u32 a, u32 b) { return a ^ b; }
static u32 EvalAnd(u32 a, u32 b) { return a & b; }
static u32 EvalAdd(u32 a, u32 b) { return a + b; }
static u32 EvalSub(u32 a, u32 b) { return a - b; }
void Jit::Comp_IType(u32 op)
{
CONDITIONAL_DISABLE;
s32 simm = (s32)(s16)(op & 0xFFFF); // sign extension
u32 uimm = op & 0xFFFF;
u32 suimm = (u32)(s32)simm;
int rt = _RT;
int rs = _RS;
// noop, won't write to ZERO.
if (rt == 0)
return;
switch (op >> 26)
{
case 8: // same as addiu?
case 9: // R(rt) = R(rs) + simm; break; //addiu
{
if (gpr.IsImm(rs)) {
gpr.SetImm(rt, gpr.GetImm(rs) + simm);
} else {
gpr.MapDirtyIn(rt, rs);
ADDI(gpr.R(rt), gpr.R(rs), simm);
}
break;
}
case 15: // R(rt) = uimm << 16; //lui
gpr.SetImm(rt, uimm << 16);
break;
default:
Comp_Generic(op);
break;
}
}
void Jit::Comp_RType2(u32 op) {
Comp_Generic(op);
}
// Utilities to reduce duplicated code
void Jit::CompImmLogic(int rs, int rt, u32 uimm, void (PPCXEmitter::*arith)(PPCReg Rd, PPCReg Ra, PPCReg Rb), u32 (*eval)(u32 a, u32 b)) {
DebugBreak();
}
void Jit::CompType3(int rd, int rs, int rt, void (PPCXEmitter::*arith)(PPCReg Rd, PPCReg Ra, PPCReg Rb), u32 (*eval)(u32 a, u32 b), bool isSub) {
if (gpr.IsImm(rs) && gpr.IsImm(rt)) {
gpr.SetImm(rd, (*eval)(gpr.GetImm(rs), gpr.GetImm(rt)));
} else if (gpr.IsImm(rt)) {
u32 rtImm = gpr.GetImm(rt);
gpr.MapDirtyIn(rd, rs);
MOVI2R(SREG, rtImm);
(this->*arith)(gpr.R(rd), gpr.R(rs), SREG);
} else if (gpr.IsImm(rs)) {
u32 rsImm = gpr.GetImm(rs);
gpr.MapDirtyIn(rd, rt);
// TODO: Special case when rsImm can be represented as an Operand2
MOVI2R(SREG, rsImm);
(this->*arith)(gpr.R(rd), SREG, gpr.R(rt));
} else {
// Generic solution
gpr.MapDirtyInIn(rd, rs, rt);
(this->*arith)(gpr.R(rd), gpr.R(rs), gpr.R(rt));
}
}
void Jit::Comp_RType3(u32 op) {
CONDITIONAL_DISABLE;
int rt = _RT;
int rs = _RS;
int rd = _RD;
// noop, won't write to ZERO.
if (rd == 0)
return;
switch (op & 63)
{
case 32: //R(rd) = R(rs) + R(rt); break; //add
case 33: //R(rd) = R(rs) + R(rt); break; //addu
// Some optimized special cases
if (gpr.IsImm(rs) && gpr.GetImm(rs) == 0) {
gpr.MapDirtyIn(rd, rt);
MR(gpr.R(rd), gpr.R(rt));
} else if (gpr.IsImm(rt) && gpr.GetImm(rt) == 0) {
gpr.MapDirtyIn(rd, rs);
MR(gpr.R(rd), gpr.R(rs));
} else {
CompType3(rd, rs, rt, &PPCXEmitter::ADD, &EvalAdd);
}
break;
case 34: //R(rd) = R(rs) - R(rt); break; //sub
case 35: //R(rd) = R(rs) - R(rt); break; //subu
CompType3(rd, rs, rt, &PPCXEmitter::SUB, &EvalSub, true);
break;
case 36: //R(rd) = R(rs) & R(rt); break; //and
CompType3(rd, rs, rt, &PPCXEmitter::AND, &EvalAnd);
break;
case 37: //R(rd) = R(rs) | R(rt); break; //or
CompType3(rd, rs, rt, &PPCXEmitter::OR, &EvalOr);
break;
case 38: //R(rd) = R(rs) ^ R(rt); break; //xor/eor
CompType3(rd, rs, rt, &PPCXEmitter::XOR, &EvalEor);
break;
default:
Comp_Generic(op);
break;
}
}
void Jit::Comp_ShiftType(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Allegrex(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Allegrex2(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_MulDivType(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Special3(u32 op) {
Comp_Generic(op);
}
}

View File

@ -0,0 +1,434 @@
#include "Common/ChunkFile.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/MIPS/MIPS.h"
#include "Core/MIPS/MIPSCodeUtils.h"
#include "Core/MIPS/MIPSInt.h"
#include "Core/MIPS/MIPSTables.h"
#include "Core/Reporting.h"
#include "Core/HLE/HLE.h"
#include "PpcRegCache.h"
#include "ppcEmitter.h"
#include "PpcJit.h"
#include <ppcintrinsics.h>
#define _RS ((op>>21) & 0x1F)
#define _RT ((op>>16) & 0x1F)
#define _RD ((op>>11) & 0x1F)
#define _FS ((op>>11) & 0x1F)
#define _FT ((op>>16) & 0x1F)
#define _FD ((op>>6 ) & 0x1F)
#define _POS ((op>>6 ) & 0x1F)
#define _SIZE ((op>>11 ) & 0x1F)
#define LOOPOPTIMIZATION 0
// We can disable nice delay slots.
#define CONDITIONAL_NICE_DELAYSLOT delaySlotIsNice = false;
// #define CONDITIONAL_NICE_DELAYSLOT ;
#define SHOW_JS_COMPILER_PC { printf("js.compilerPC: %08x\n", js.compilerPC); }
#define BRANCH_COMPILE_LOG { printf("JIT(%8x): %s => %d - %08x\n", (u32)GetCodePtr() ,__FUNCTION__, cc, js.compilerPC); }
using namespace MIPSAnalyst;
using namespace PpcGen;
namespace MIPSComp
{
void Jit::BranchRSRTComp(u32 op, PpcGen::FixupBranchType cc, bool likely)
{
if (js.inDelaySlot) {
ERROR_LOG_REPORT(JIT, "Branch in RSRTComp delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
return;
}
int offset = (signed short)(op&0xFFFF)<<2;
int rt = _RT;
int rs = _RS;
u32 targetAddr = js.compilerPC + offset + 4;
u32 delaySlotOp = Memory::ReadUnchecked_U32(js.compilerPC+4);
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rt, rs);
CONDITIONAL_NICE_DELAYSLOT;
if (!likely && delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_NICE);
if (gpr.IsImm(rt) && gpr.GetImm(rt) == 0)
{
gpr.MapReg(rs);
CMPLI(gpr.R(rs), 0);
}
else if (gpr.IsImm(rs) && gpr.GetImm(rs) == 0) // only these are easily 'flippable'
{
gpr.MapReg(rt);
CMPLI(gpr.R(rt), 0);
}
else
{
gpr.MapInIn(rs, rt);
CMPL(gpr.R(rs), gpr.R(rt));
}
PpcGen::FixupBranch ptr;
if (!likely)
{
if (!delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
else
FlushAll();
ptr = B_Cond(cc);
}
else
{
FlushAll();
ptr = B_Cond(cc);
CompileDelaySlot(DELAYSLOT_FLUSH);
}
INFO_LOG(CPU, "targetAddr: %08x,js.compilerPC: %08x offset: %08x, op: %08x\n", targetAddr, js.compilerPC, offset, op);
// Take the branch
WriteExit(targetAddr, 0);
SetJumpTarget(ptr);
// Not taken
WriteExit(js.compilerPC+8, 1);
js.compiling = false;
}
void Jit::BranchRSZeroComp(u32 op, PpcGen::FixupBranchType cc, bool andLink, bool likely)
{
if (js.inDelaySlot) {
ERROR_LOG_REPORT(JIT, "Branch in RSZeroComp delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
return;
}
int offset = (signed short)(op&0xFFFF)<<2;
int rs = _RS;
u32 targetAddr = js.compilerPC + offset + 4;
u32 delaySlotOp = Memory::ReadUnchecked_U32(js.compilerPC + 4);
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs);
CONDITIONAL_NICE_DELAYSLOT;
if (!likely && delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_NICE);
gpr.MapReg(rs);
CMPI(gpr.R(rs), 0);
PpcGen::FixupBranch ptr;
if (!likely)
{
if (!delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
else
FlushAll();
ptr = B_Cond(cc);
}
else
{
FlushAll();
ptr = B_Cond(cc);
CompileDelaySlot(DELAYSLOT_FLUSH);
}
// Take the branch
if (andLink)
{
//Break();
MOVI2R(SREG, js.compilerPC + 8);
STW(SREG, CTXREG, MIPS_REG_RA * 4);
}
WriteExit(targetAddr, 0);
SetJumpTarget(ptr);
// Not taken
WriteExit(js.compilerPC + 8, 1);
js.compiling = false;
}
void Jit::Comp_RelBranch(u32 op) {
// The CC flags here should be opposite of the actual branch becuase they skip the branching action.
switch (op>>26)
{
case 4: BranchRSRTComp(op, _BNE, false); break;//beq
case 5: BranchRSRTComp(op, _BEQ, false); break;//bne
case 6: BranchRSZeroComp(op, _BGT, false, false); break;//blez
case 7: BranchRSZeroComp(op, _BLE, false, false); break;//bgtz
case 20: BranchRSRTComp(op, _BNE, true); break;//beql
case 21: BranchRSRTComp(op, _BEQ, true); break;//bnel
case 22: BranchRSZeroComp(op, _BGT, false, true); break;//blezl
case 23: BranchRSZeroComp(op, _BLE, false, true); break;//bgtzl
default:
_dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled");
break;
}
js.compiling = false;
}
void Jit::Comp_RelBranchRI(u32 op) {
switch ((op >> 16) & 0x1F)
{
case 0: BranchRSZeroComp(op, _BGE, false, false); break; //if ((s32)R(rs) < 0) DelayBranchTo(addr); else PC += 4; break;//bltz
case 1: BranchRSZeroComp(op, _BLT, false, false); break; //if ((s32)R(rs) >= 0) DelayBranchTo(addr); else PC += 4; break;//bgez
case 2: BranchRSZeroComp(op, _BGE, false, true); break; //if ((s32)R(rs) < 0) DelayBranchTo(addr); else PC += 8; break;//bltzl
case 3: BranchRSZeroComp(op, _BLT, false, true); break; //if ((s32)R(rs) >= 0) DelayBranchTo(addr); else PC += 8; break;//bgezl
case 16: BranchRSZeroComp(op, _BGE, true, false); break; //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) < 0) DelayBranchTo(addr); else PC += 4; break;//bltzal
case 17: BranchRSZeroComp(op, _BLT, true, false); break; //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) >= 0) DelayBranchTo(addr); else PC += 4; break;//bgezal
case 18: BranchRSZeroComp(op, _BGE, true, true); break; //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) < 0) DelayBranchTo(addr); else SkipLikely(); break;//bltzall
case 19: BranchRSZeroComp(op, _BLT, true, true); break; //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) >= 0) DelayBranchTo(addr); else SkipLikely(); break;//bgezall
default:
_dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled");
break;
}
js.compiling = false;
}
// If likely is set, discard the branch slot if NOT taken.
void Jit::BranchFPFlag(u32 op, PpcGen::FixupBranchType cc, bool likely)
{
if (js.inDelaySlot) {
ERROR_LOG_REPORT(JIT, "Branch in FPFlag delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
return;
}
int offset = (signed short)(op & 0xFFFF) << 2;
u32 targetAddr = js.compilerPC + offset + 4;
u32 delaySlotOp = Memory::ReadUnchecked_U32(js.compilerPC + 4);
bool delaySlotIsNice = IsDelaySlotNiceFPU(op, delaySlotOp);
CONDITIONAL_NICE_DELAYSLOT;
if (!likely && delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_NICE);
FlushAll();
LWZ(SREG, CTXREG, offsetof(MIPSState, fpcond));
// change CR0
ANDI(SREG, SREG, 1);
PpcGen::FixupBranch ptr;
if (!likely)
{
if (!delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
ptr = B_Cond(cc);
}
else
{
ptr = B_Cond(cc);
CompileDelaySlot(DELAYSLOT_FLUSH);
}
// Take the branch
WriteExit(targetAddr, 0);
SetJumpTarget(ptr);
// Not taken
WriteExit(js.compilerPC + 8, 1);
js.compiling = false;
}
void Jit::Comp_FPUBranch(u32 op) {
switch((op >> 16) & 0x1f)
{
case 0: BranchFPFlag(op, _BNE, false); break; // bc1f
case 1: BranchFPFlag(op, _BEQ, false); break; // bc1t
case 2: BranchFPFlag(op, _BNE, true); break; // bc1fl
case 3: BranchFPFlag(op, _BEQ, true); break; // bc1tl
default:
_dbg_assert_msg_(CPU,0,"Trying to interpret instruction that can't be interpreted");
break;
}
js.compiling = false;
}
// If likely is set, discard the branch slot if NOT taken.
void Jit::BranchVFPUFlag(u32 op, PpcGen::FixupBranchType cc, bool likely)
{
if (js.inDelaySlot) {
ERROR_LOG_REPORT(JIT, "Branch in VFPU delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
return;
}
int offset = (signed short)(op & 0xFFFF) << 2;
u32 targetAddr = js.compilerPC + offset + 4;
u32 delaySlotOp = Memory::ReadUnchecked_U32(js.compilerPC + 4);
bool delaySlotIsNice = IsDelaySlotNiceVFPU(op, delaySlotOp);
CONDITIONAL_NICE_DELAYSLOT;
if (!likely && delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_NICE);
FlushAll();
int imm3 = (op >> 18) & 7;
MOVI2R(SREG, (u32)&(mips_->vfpuCtrl[VFPU_CTRL_CC]));
LWZ(SREG, SREG, 0);
// change CR0
ANDI(SREG, SREG, 1 << imm3);
PpcGen::FixupBranch ptr;
js.inDelaySlot = true;
if (!likely)
{
if (!delaySlotIsNice)
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
ptr = B_Cond(cc);
}
else
{
ptr = B_Cond(cc);
CompileDelaySlot(DELAYSLOT_FLUSH);
}
js.inDelaySlot = false;
// Take the branch
WriteExit(targetAddr, 0);
SetJumpTarget(ptr);
// Not taken
WriteExit(js.compilerPC + 8, 1);
js.compiling = false;
}
void Jit::Comp_VBranch(u32 op) {
switch ((op >> 16) & 3)
{
case 0: BranchVFPUFlag(op, _BNE, false); break; // bvf
case 1: BranchVFPUFlag(op, _BEQ, false); break; // bvt
case 2: BranchVFPUFlag(op, _BNE, true); break; // bvfl
case 3: BranchVFPUFlag(op, _BEQ, true); break; // bvtl
}
js.compiling = false;
}
void Jit::Comp_Jump(u32 op) {
if (js.inDelaySlot) {
ERROR_LOG_REPORT(JIT, "Branch in Jump delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
return;
}
u32 off = ((op & 0x03FFFFFF) << 2);
u32 targetAddr = (js.compilerPC & 0xF0000000) | off;
switch (op >> 26)
{
case 2: //j
CompileDelaySlot(DELAYSLOT_NICE);
FlushAll();
WriteExit(targetAddr, 0);
break;
case 3: //jal
//Break();
gpr.MapReg(MIPS_REG_RA, MAP_NOINIT | MAP_DIRTY);
MOVI2R(gpr.R(MIPS_REG_RA), js.compilerPC + 8);
CompileDelaySlot(DELAYSLOT_NICE);
FlushAll();
WriteExit(targetAddr, 0);
break;
default:
_dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled");
break;
}
js.compiling = false;
}
void Jit::Comp_JumpReg(u32 op) {
if (js.inDelaySlot) {
ERROR_LOG_REPORT(JIT, "Branch in JumpReg delay slot at %08x in block starting at %08x", js.compilerPC, js.blockStart);
return;
}
int rs = _RS;
u32 delaySlotOp = Memory::ReadUnchecked_U32(js.compilerPC + 4);
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs);
CONDITIONAL_NICE_DELAYSLOT;
if (IsSyscall(delaySlotOp)) {
gpr.MapReg(rs);
PPCReg mRs = gpr.R(rs);
MR(FLAGREG, mRs);
MovToPC(FLAGREG); // For syscall to be able to return.
CompileDelaySlot(DELAYSLOT_FLUSH);
return; // Syscall wrote exit code.
} else if (delaySlotIsNice) {
CompileDelaySlot(DELAYSLOT_NICE);
gpr.MapReg(rs);
PPCReg mRs = gpr.R(rs);
MR(FLAGREG, mRs); // Save the destination address through the delay slot. Could use isNice to avoid when the jit is fully implemented
FlushAll();
} else {
// Delay slot
gpr.MapReg(rs);
PPCReg mRs = gpr.R(rs);
MR(FLAGREG, mRs); // Save the destination address through the delay slot. Could use isNice to avoid when the jit is fully implemented
CompileDelaySlot(DELAYSLOT_NICE);
FlushAll();
}
switch (op & 0x3f)
{
case 8: //jr
break;
case 9: //jalr
// mips->reg = js.compilerPC + 8;
//Break();
MOVI2R(SREG, js.compilerPC + 8);
STW(SREG, CTXREG, MIPS_REG_RA * 4);
break;
default:
_dbg_assert_msg_(CPU,0,"Trying to compile instruction that can't be compiled");
break;
}
WriteExitDestInR(FLAGREG);
js.compiling = false;
}
void Jit::Comp_Syscall(u32 op) {
FlushAll();
// If we're in a delay slot, this is off by one.
const int offset = js.inDelaySlot ? -1 : 0;
WriteDownCount(offset);
js.downcountAmount = -offset;
// CallSyscall(op);
MOVI2R(R3, op);
SaveDowncount(DCNTREG);
QuickCallFunction((void *)&CallSyscall);
RestoreDowncount(DCNTREG);
WriteSyscallExit();
js.compiling = false;
}
void Jit::Comp_Break(u32 op) {
Comp_Generic(op);
WriteSyscallExit();
js.compiling = false;
}
}

View File

@ -0,0 +1,42 @@
#include "Common/ChunkFile.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/MIPS/MIPS.h"
#include "Core/MIPS/MIPSCodeUtils.h"
#include "Core/MIPS/MIPSInt.h"
#include "Core/MIPS/MIPSTables.h"
#include "PpcRegCache.h"
#include "ppcEmitter.h"
#include "PpcJit.h"
#include <ppcintrinsics.h>
using namespace PpcGen;
extern volatile CoreState coreState;
namespace MIPSComp
{
void Jit::Comp_FPULS(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_FPUComp(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_FPU3op(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_FPU2op(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_mxc1(u32 op) {
Comp_Generic(op);
}
}

View File

@ -0,0 +1,142 @@
#include "Common/ChunkFile.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/MIPS/MIPS.h"
#include "Core/MIPS/MIPSCodeUtils.h"
#include "Core/MIPS/MIPSInt.h"
#include "Core/MIPS/MIPSTables.h"
#include "PpcRegCache.h"
#include "ppcEmitter.h"
#include "PpcJit.h"
#define _RS ((op>>21) & 0x1F)
#define _RT ((op>>16) & 0x1F)
#define _RD ((op>>11) & 0x1F)
#define _FS ((op>>11) & 0x1F)
#define _FT ((op>>16) & 0x1F)
#define _FD ((op>>6 ) & 0x1F)
#define _POS ((op>>6 ) & 0x1F)
#define _SIZE ((op>>11 ) & 0x1F)
// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.
// Currently known non working ones should have DISABLE.
//#define CONDITIONAL_DISABLE { Comp_Generic(op); return; }
#define CONDITIONAL_DISABLE ;
#define DISABLE { Comp_Generic(op); return; }
using namespace PpcGen;
namespace MIPSComp
{
void Jit::SetRegToEffectiveAddress(PpcGen::PPCReg r, int rs, s16 offset) {
if (offset) {
ADDI(SREG, gpr.R(rs), offset);
RLWINM(SREG, SREG, 0, 2, 31); // &= 0x3FFFFFFF
} else {
RLWINM(SREG, gpr.R(rs), 0, 2, 31); // &= 0x3FFFFFFF
}
}
void Jit::Comp_ITypeMem(u32 op) {
CONDITIONAL_DISABLE;
int offset = (signed short)(op&0xFFFF);
bool load = false;
int rt = _RT;
int rs = _RS;
int o = op>>26;
if (((op >> 29) & 1) == 0 && rt == 0) {
// Don't load anything into $zr
return;
}
u32 iaddr = gpr.IsImm(rs) ? offset + gpr.GetImm(rs) : 0xFFFFFFFF;
bool doCheck = false;
switch (o)
{
case 32: //lb
case 33: //lh
case 35: //lw
case 36: //lbu
case 37: //lhu
load = true;
case 40: //sb
case 41: //sh
case 43: //sw
if (gpr.IsImm(rs) && Memory::IsValidAddress(iaddr)) {
// We can compute the full address at compile time. Kickass.
u32 addr = iaddr & 0x3FFFFFFF;
// Must be OK even if rs == rt since we have the value from imm already.
gpr.MapReg(rt, load ? MAP_NOINIT | MAP_DIRTY : 0);
MOVI2R(SREG, addr);
} else {
_dbg_assert_msg_(JIT, !gpr.IsImm(rs), "Invalid immediate address? CPU bug?");
load ? gpr.MapDirtyIn(rt, rs) : gpr.MapInIn(rt, rs);
SetRegToEffectiveAddress(SREG, rs, offset);
}
switch (o)
{
// Load
case 32: //lb
LBZX(gpr.R(rt), BASEREG, SREG);
EXTSB(gpr.R(rt), gpr.R(rt));
break;
case 33: //lh
LHBRX(gpr.R(rt), BASEREG, SREG);
EXTSH(gpr.R(rt), gpr.R(rt));
break;
case 35: //lw
LWBRX(gpr.R(rt), BASEREG, SREG);
break;
case 36: //lbu
LBZX (gpr.R(rt), BASEREG, SREG);
break;
case 37: //lhu
LHBRX (gpr.R(rt), BASEREG, SREG);
break;
// Store
case 40: //sb
STBX (gpr.R(rt), BASEREG, SREG);
break;
case 41: //sh
STHBRX(gpr.R(rt), BASEREG, SREG);
break;
case 43: //sw
STWBRX(gpr.R(rt), BASEREG, SREG);
break;
}
break;
case 34: //lwl
case 38: //lwr
load = true;
case 42: //swl
case 46: //swr
if (!js.inDelaySlot) {
// Optimisation: Combine to single unaligned load/store
bool isLeft = (o == 34 || o == 42);
u32 nextOp = Memory::Read_Instruction(js.compilerPC + 4);
// Find a matching shift in opposite direction with opposite offset.
if (nextOp == (isLeft ? (op + (4<<26) - 3)
: (op - (4<<26) + 3)))
{
EatInstruction(nextOp);
nextOp = ((load ? 35 : 43) << 26) | ((isLeft ? nextOp : op) & 0x3FFFFFF); //lw, sw
Comp_ITypeMem(nextOp);
return;
}
}
DISABLE; // Disabled until crashes are resolved.
break;
default:
Comp_Generic(op);
return ;
}
}
}

View File

@ -0,0 +1,139 @@
#include "Common/ChunkFile.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/MIPS/MIPS.h"
#include "Core/MIPS/MIPSCodeUtils.h"
#include "Core/MIPS/MIPSInt.h"
#include "Core/MIPS/MIPSTables.h"
#include "PpcRegCache.h"
#include "ppcEmitter.h"
#include "PpcJit.h"
#include <ppcintrinsics.h>
using namespace PpcGen;
namespace MIPSComp
{
void Jit::Comp_SV(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_SVQ(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_VPFX(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_VVectorInit(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_VMatrixInit(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_VDot(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_VecDo3(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_VV2Op(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Mftv(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Vmtvc(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Vmmov(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_VScl(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Vmmul(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Vmscl(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Vtfm(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_VHdp(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_VCrs(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_VDet(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Vi2x(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Vx2i(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Vf2i(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Vi2f(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Vcst(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Vhoriz(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_VRot(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_VIdt(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Vcmp(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Vcmov(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Viim(u32 op) {
Comp_Generic(op);
}
void Jit::Comp_Vfim(u32 op) {
Comp_Generic(op);
}
}

235
Core/MIPS/PPC/PpcJit.cpp Normal file
View File

@ -0,0 +1,235 @@
#include "Common/ChunkFile.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/MIPS/MIPS.h"
#include "Core/MIPS/MIPSCodeUtils.h"
#include "Core/MIPS/MIPSInt.h"
#include "Core/MIPS/MIPSTables.h"
#include "PpcRegCache.h"
#include "ppcEmitter.h"
#include "PpcJit.h"
#include <ppcintrinsics.h>
using namespace PpcGen;
extern volatile CoreState coreState;
namespace MIPSComp
{
static u32 delaySlotFlagsValue;
void Jit::CompileDelaySlot(int flags)
{
// preserve flag around the delay slot! Maybe this is not always necessary on ARM where
// we can (mostly) control whether we set the flag or not. Of course, if someone puts an slt in to the
// delay slot, we're screwed.
if (flags & DELAYSLOT_SAFE) {
// Save flags register
MOVI2R(SREG, (u32)&delaySlotFlagsValue);
STW(FLAGREG, SREG);
}
js.inDelaySlot = true;
u32 op = Memory::Read_Instruction(js.compilerPC + 4);
MIPSCompileOp(op);
js.inDelaySlot = false;
if (flags & DELAYSLOT_FLUSH)
FlushAll();
if (flags & DELAYSLOT_SAFE) {
// Restore flags register
MOVI2R(SREG, (u32)&delaySlotFlagsValue);
LWZ(FLAGREG, SREG);
}
}
void Jit::Compile(u32 em_address)
{
if (GetSpaceLeft() < 0x10000 || blocks.IsFull())
{
ClearCache();
}
int block_num = blocks.AllocateBlock(em_address);
JitBlock *b = blocks.GetBlock(block_num);
DoJit(em_address, b);
blocks.FinalizeBlock(block_num, jo.enableBlocklink);
// Drat. The VFPU hit an uneaten prefix at the end of a block.
if (js.startDefaultPrefix && js.MayHavePrefix())
{
js.startDefaultPrefix = false;
// Our assumptions are all wrong so it's clean-slate time.
ClearCache();
// Let's try that one more time. We won't get back here because we toggled the value.
Compile(em_address);
}
}
void Jit::MovFromPC(PPCReg r) {
LWZ(r, CTXREG, offsetof(MIPSState, pc));
}
void Jit::MovToPC(PPCReg r) {
STW(r, CTXREG, offsetof(MIPSState, pc));
}
void Jit::SaveDowncount(PPCReg r) {
STW(r, CTXREG, offsetof(MIPSState, downcount));
}
void Jit::RestoreDowncount(PPCReg r) {
LWZ(r, CTXREG, offsetof(MIPSState, downcount));
}
static void ShowDownCount() {
if (currentMIPS->downcount<0) {
//ERROR_LOG(DYNA_REC, "MIPSState, downcount %08x", currentMIPS->downcount);
Crash();
}
}
void Jit::WriteDownCount(int offset)
{
// don't know if the result is correct
int theDowncount = js.downcountAmount + offset;
if (jo.downcountInRegister) {
// DCNTREG = DCNTREG - theDowncount;
MOVI2R(SREG, theDowncount);
SUBF(DCNTREG, SREG, DCNTREG, 1);
STW(DCNTREG, CTXREG, offsetof(MIPSState, downcount));
} else {
// DCNTREG = MIPSState->downcount - theDowncount;
MOVI2R(SREG, theDowncount);
LWZ(DCNTREG, CTXREG, offsetof(MIPSState, downcount));
SUBF(DCNTREG, SREG, DCNTREG, 1);
STW(DCNTREG, CTXREG, offsetof(MIPSState, downcount));
}
//QuickCallFunction(ShowDownCount);
CMPI(DCNTREG, 0);
}
void Jit::Comp_Generic(u32 op) {
FlushAll();
// basic jit !!
MIPSInterpretFunc func = MIPSGetInterpretFunc(op);
if (func)
{
// Save mips PC and cycles
SaveDowncount(DCNTREG);
// call interpreted function
MOVI2R(R3, op);
QuickCallFunction((void *)func);
// restore pc and cycles
RestoreDowncount(DCNTREG);
}
// Might have eaten prefixes, hard to tell...
if ((MIPSGetInfo(op) & IS_VFPU) != 0)
js.PrefixStart();
}
void Jit::EatInstruction(u32 op) {
u32 info = MIPSGetInfo(op);
_dbg_assert_msg_(JIT, !(info & DELAYSLOT), "Never eat a branch op.");
_dbg_assert_msg_(JIT, !js.inDelaySlot, "Never eat an instruction inside a delayslot.");
js.compilerPC += 4;
js.downcountAmount += MIPSGetInstructionCycleEstimate(op);
}
void Jit::Comp_RunBlock(u32 op) {
// This shouldn't be necessary, the dispatcher should catch us before we get here.
ERROR_LOG(DYNA_REC, "Comp_RunBlock should never be reached!");
}
void Jit::Comp_DoNothing(u32 op) {
}
void Jit::FlushAll()
{
gpr.FlushAll();
//fpr.FlushAll();
//FlushPrefixV();
}
void Jit::ClearCache() {
blocks.Clear();
ClearCodeSpace();
GenerateFixedCode();
}
void Jit::ClearCacheAt(u32 em_address) {
ClearCache();
}
Jit::Jit(MIPSState *mips) : blocks(mips, this), gpr(mips, &jo),mips_(mips)
{
blocks.Init();
gpr.SetEmitter(this);
AllocCodeSpace(1024 * 1024 * 16); // 32MB is the absolute max because that's what an ARM branch instruction can reach, backwards and forwards.
GenerateFixedCode();
js.startDefaultPrefix = true;
}
void Jit::RunLoopUntil(u64 globalticks) {
#ifdef _XBOX
// force stack alinement
_alloca(8*1024);
#endif
// Run the compiled code
((void (*)())enterCode)();
}
// IDEA - could have a WriteDualExit that takes two destinations and two condition flags,
// and just have conditional that set PC "twice". This only works when we fall back to dispatcher
// though, as we need to have the SUBS flag set in the end. So with block linking in the mix,
// I don't think this gives us that much benefit.
void Jit::WriteExit(u32 destination, int exit_num)
{
WriteDownCount();
//If nobody has taken care of this yet (this can be removed when all branches are done)
JitBlock *b = js.curBlock;
b->exitAddress[exit_num] = destination;
b->exitPtrs[exit_num] = GetWritableCodePtr();
// Link opportunity!
int block = blocks.GetBlockNumberFromStartAddress(destination);
if (block >= 0 && jo.enableBlocklink) {
// It exists! Joy of joy!
B(blocks.GetBlock(block)->checkedEntry);
b->linkStatus[exit_num] = true;
} else {
MOVI2R(SREG, destination);
B((const void *)dispatcherPCInR0);
}
}
void Jit::WriteExitDestInR(PPCReg Reg)
{
//Break();
MovToPC(Reg);
WriteDownCount();
// TODO: shouldn't need an indirect branch here...
B((const void *)dispatcher);
}
void Jit::WriteSyscallExit()
{
WriteDownCount();
B((const void *)dispatcherCheckCoreState);
}
}

284
Core/MIPS/PPC/PpcJit.h Normal file
View File

@ -0,0 +1,284 @@
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#pragma once
#include "../../../Globals.h"
#include "Core/MIPS/JitCommon/JitBlockCache.h"
#include "Core/MIPS/PPC/PpcRegCache.h"
#include "Core/MIPS/MIPS.h"
#include <ppcEmitter.h>
namespace MIPSComp
{
struct PpcJitOptions
{
PpcJitOptions()
{
enableBlocklink = true;
downcountInRegister = true;
}
bool enableBlocklink;
bool downcountInRegister;
};
struct PpcJitState
{
enum PrefixState
{
PREFIX_UNKNOWN = 0x00,
PREFIX_KNOWN = 0x01,
PREFIX_DIRTY = 0x10,
PREFIX_KNOWN_DIRTY = 0x11,
};
u32 compilerPC;
u32 blockStart;
bool cancel;
bool inDelaySlot;
int downcountAmount;
bool compiling; // TODO: get rid of this in favor of using analysis results to determine end of block
JitBlock *curBlock;
// VFPU prefix magic
bool startDefaultPrefix;
u32 prefixS;
u32 prefixT;
u32 prefixD;
PrefixState prefixSFlag;
PrefixState prefixTFlag;
PrefixState prefixDFlag;
void PrefixStart() {
if (startDefaultPrefix) {
EatPrefix();
} else {
PrefixUnknown();
}
}
void PrefixUnknown() {
prefixSFlag = PREFIX_UNKNOWN;
prefixTFlag = PREFIX_UNKNOWN;
prefixDFlag = PREFIX_UNKNOWN;
}
bool MayHavePrefix() const {
if (HasUnknownPrefix()) {
return true;
} else if (prefixS != 0xE4 || prefixT != 0xE4 || prefixD != 0) {
return true;
} else if (VfpuWriteMask() != 0) {
return true;
}
return false;
}
bool HasUnknownPrefix() const {
if (!(prefixSFlag & PREFIX_KNOWN) || !(prefixTFlag & PREFIX_KNOWN) || !(prefixDFlag & PREFIX_KNOWN)) {
return true;
}
return false;
}
bool HasNoPrefix() const {
return (prefixDFlag & PREFIX_KNOWN) && (prefixSFlag & PREFIX_KNOWN) && (prefixTFlag & PREFIX_KNOWN) && (prefixS == 0xE4 && prefixT == 0xE4 && prefixD == 0);
}
void EatPrefix() {
if ((prefixSFlag & PREFIX_KNOWN) == 0 || prefixS != 0xE4) {
prefixSFlag = PREFIX_KNOWN_DIRTY;
prefixS = 0xE4;
}
if ((prefixTFlag & PREFIX_KNOWN) == 0 || prefixT != 0xE4) {
prefixTFlag = PREFIX_KNOWN_DIRTY;
prefixT = 0xE4;
}
if ((prefixDFlag & PREFIX_KNOWN) == 0 || prefixD != 0x0 || VfpuWriteMask() != 0) {
prefixDFlag = PREFIX_KNOWN_DIRTY;
prefixD = 0x0;
}
}
u8 VfpuWriteMask() const {
_assert_(prefixDFlag & PREFIX_KNOWN);
return (prefixD >> 8) & 0xF;
}
bool VfpuWriteMask(int i) const {
_assert_(prefixDFlag & PREFIX_KNOWN);
return (prefixD >> (8 + i)) & 1;
}
};
enum CompileDelaySlotFlags
{
// Easy, nothing extra.
DELAYSLOT_NICE = 0,
// Flush registers after delay slot.
DELAYSLOT_FLUSH = 1,
// Preserve flags.
DELAYSLOT_SAFE = 2,
// Flush registers after and preserve flags.
DELAYSLOT_SAFE_FLUSH = DELAYSLOT_FLUSH | DELAYSLOT_SAFE,
};
class Jit: public PpcGen::PPCXCodeBlock
{
protected:
JitBlockCache blocks;
public:
Jit(MIPSState *mips);
// Compiled ops should ignore delay slots
// the compiler will take care of them by itself
// OR NOT
void Comp_Generic(u32 op);
void EatInstruction(u32 op);
void Comp_RunBlock(u32 op);
// TODO: Eat VFPU prefixes here.
void EatPrefix() { }
// Ops
void Comp_ITypeMem(u32 op);
void Comp_RelBranch(u32 op);
void Comp_RelBranchRI(u32 op);
void Comp_FPUBranch(u32 op);
void Comp_FPULS(u32 op);
void Comp_FPUComp(u32 op);
void Comp_Jump(u32 op);
void Comp_JumpReg(u32 op);
void Comp_Syscall(u32 op);
void Comp_Break(u32 op);
void Comp_IType(u32 op);
void Comp_RType2(u32 op);
void Comp_RType3(u32 op);
void Comp_ShiftType(u32 op);
void Comp_Allegrex(u32 op);
void Comp_Allegrex2(u32 op);
void Comp_VBranch(u32 op);
void Comp_MulDivType(u32 op);
void Comp_Special3(u32 op);
void Comp_FPU3op(u32 op);
void Comp_FPU2op(u32 op);
void Comp_mxc1(u32 op);
void Comp_DoNothing(u32 op);
void Comp_SV(u32 op);
void Comp_SVQ(u32 op);
void Comp_VPFX(u32 op);
void Comp_VVectorInit(u32 op);
void Comp_VMatrixInit(u32 op);
void Comp_VDot(u32 op);
void Comp_VecDo3(u32 op);
void Comp_VV2Op(u32 op);
void Comp_Mftv(u32 op);
void Comp_Vmtvc(u32 op);
void Comp_Vmmov(u32 op);
void Comp_VScl(u32 op);
void Comp_Vmmul(u32 op);
void Comp_Vmscl(u32 op);
void Comp_Vtfm(u32 op);
void Comp_VHdp(u32 op);
void Comp_VCrs(u32 op);
void Comp_VDet(u32 op);
void Comp_Vi2x(u32 op);
void Comp_Vx2i(u32 op);
void Comp_Vf2i(u32 op);
void Comp_Vi2f(u32 op);
void Comp_Vcst(u32 op);
void Comp_Vhoriz(u32 op);
void Comp_VRot(u32 op);
void Comp_VIdt(u32 op);
void Comp_Vcmp(u32 op);
void Comp_Vcmov(u32 op);
void Comp_Viim(u32 op);
void Comp_Vfim(u32 op);
// Utility compilation functions
void BranchFPFlag(u32 op, PpcGen::FixupBranchType cc, bool likely);
void BranchVFPUFlag(u32 op, PpcGen::FixupBranchType cc, bool likely);
void BranchRSZeroComp(u32 op, PpcGen::FixupBranchType cc, bool andLink, bool likely);
void BranchRSRTComp(u32 op, PpcGen::FixupBranchType cc, bool likely);
void SetRegToEffectiveAddress(PpcGen::PPCReg r, int rs, s16 offset);
// Utilities to reduce duplicated code
void CompImmLogic(int rs, int rt, u32 uimm, void (PPCXEmitter::*arith)(PPCReg Rd, PPCReg Ra, PPCReg Rb), u32 (*eval)(u32 a, u32 b));
void CompType3(int rd, int rs, int rt, void (PPCXEmitter::*arithOp2)(PPCReg Rd, PPCReg Ra, PPCReg Rb), u32 (*eval)(u32 a, u32 b), bool isSub = false);
// flush regs
void FlushAll();
void WriteDownCount(int offset = 0);
void MovFromPC(PpcGen::PPCReg r);
void MovToPC(PpcGen::PPCReg r);
void SaveDowncount(PpcGen::PPCReg r);
void RestoreDowncount(PpcGen::PPCReg r);
void WriteExit(u32 destination, int exit_num);
void WriteExitDestInR(PPCReg Reg);
void WriteSyscallExit();
void ClearCache();
void ClearCacheAt(u32 em_address);
void RunLoopUntil(u64 globalticks);
void GenerateFixedCode();
void DumpJit();
void CompileDelaySlot(int flags);
void Compile(u32 em_address); // Compiles a block at current MIPS PC
const u8 *DoJit(u32 em_address, JitBlock *b);
PpcJitOptions jo;
PpcJitState js;
PpcRegCache gpr;
//PpcRegCacheFPU fpr;
MIPSState *mips_;
JitBlockCache *GetBlockCache() { return &blocks; }
public:
// Code pointers
const u8 *enterCode;
const u8 *outerLoop;
const u8 *outerLoopPCInR0;
const u8 *dispatcherCheckCoreState;
const u8 *dispatcherPCInR0;
const u8 *dispatcher;
const u8 *dispatcherNoCheck;
const u8 *breakpointBailout;
};
typedef void (Jit::*MIPSCompileFunc)(u32 opcode);
} // namespace MIPSComp

View File

@ -0,0 +1,313 @@
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include <PpcEmitter.h>
#include "PpcRegCache.h"
#include "PpcJit.h"
using namespace PpcGen;
PpcRegCache::PpcRegCache(MIPSState *mips, MIPSComp::PpcJitOptions *options) : mips_(mips), options_(options) {
}
void PpcRegCache::Init(PPCXEmitter *emitter) {
emit_ = emitter;
}
void PpcRegCache::Start(MIPSAnalyst::AnalysisResults &stats) {
for (int i = 0; i < NUM_PPCREG; i++) {
ar[i].mipsReg = -1;
ar[i].isDirty = false;
}
for (int i = 0; i < NUM_MIPSREG; i++) {
mr[i].loc = ML_MEM;
mr[i].reg = INVALID_REG;
mr[i].imm = -1;
mr[i].spillLock = false;
}
}
const PPCReg *PpcRegCache::GetMIPSAllocationOrder(int &count) {
// Note that R0 is reserved as scratch for now.
// R1 could be used as it's only used for scratch outside "regalloc space" now.
// R12 is also potentially usable.
// R4-R7 are registers we could use for static allocation or downcount.
// R8 is used to preserve flags in nasty branches.
// R9 and upwards are reserved for jit basics.
if (options_->downcountInRegister) {
static const PPCReg allocationOrder[] = {
/*R14, R15, R16, R17, R18, */R19,
R20, R21, R22, R23, R24, R25,
R26, R27, R28, R29, R30, R31,
};
count = sizeof(allocationOrder) / sizeof(const int);
return allocationOrder;
} else {
static const PPCReg allocationOrder2[] = {
/*R14, R15, R16, R17, R18,*/ R19,
R20, R21, R22, R23, R24, R25,
R26, R27, R28, R29, R30, R31,
};
count = sizeof(allocationOrder2) / sizeof(const int);
return allocationOrder2;
}
}
void PpcRegCache::FlushBeforeCall() {
// R4-R11 are preserved. Others need flushing.
/*
FlushPpcReg(R2);
FlushPpcReg(R3);
FlushPpcReg(R12);
*/
}
// TODO: Somewhat smarter spilling - currently simply spills the first available, should do
// round robin or FIFO or something.
PPCReg PpcRegCache::MapReg(MIPSReg mipsReg, int mapFlags) {
// Let's see if it's already mapped. If so we just need to update the dirty flag.
// We don't need to check for ML_NOINIT because we assume that anyone who maps
// with that flag immediately writes a "known" value to the register.
if (mr[mipsReg].loc == ML_PPCREG) {
if (ar[mr[mipsReg].reg].mipsReg != mipsReg) {
ERROR_LOG(HLE, "Register mapping out of sync! %i", mipsReg);
}
if (mapFlags & MAP_DIRTY) {
ar[mr[mipsReg].reg].isDirty = true;
}
return (PPCReg)mr[mipsReg].reg;
}
// Okay, not mapped, so we need to allocate an ARM register.
int allocCount;
const PPCReg *allocOrder = GetMIPSAllocationOrder(allocCount);
allocate:
for (int i = 0; i < allocCount; i++) {
int reg = allocOrder[i];
if (ar[reg].mipsReg == -1) {
// That means it's free. Grab it, and load the value into it (if requested).
ar[reg].isDirty = (mapFlags & MAP_DIRTY) ? true : false;
if (!(mapFlags & MAP_NOINIT)) {
if (mr[mipsReg].loc == ML_MEM) {
if (mipsReg != 0) {
emit_->LWZ((PPCReg)reg, CTXREG, GetMipsRegOffset(mipsReg));
} else {
// If we get a request to load the zero register, at least we won't spend
// time on a memory access...
emit_->MOVI2R((PPCReg)reg, 0);
}
} else if (mr[mipsReg].loc == ML_IMM) {
emit_->MOVI2R((PPCReg)reg, mr[mipsReg].imm);
ar[reg].isDirty = true; // IMM is always dirty.
}
}
ar[reg].mipsReg = mipsReg;
mr[mipsReg].loc = ML_PPCREG;
mr[mipsReg].reg = (PPCReg)reg;
return (PPCReg)reg;
}
}
// Still nothing. Let's spill a reg and goto 10.
// TODO: Use age or something to choose which register to spill?
// TODO: Spill dirty regs first? or opposite?
int bestToSpill = -1;
for (int i = 0; i < allocCount; i++) {
int reg = allocOrder[i];
if (ar[reg].mipsReg != -1 && mr[ar[reg].mipsReg].spillLock)
continue;
bestToSpill = reg;
break;
}
if (bestToSpill != -1) {
// ERROR_LOG(JIT, "Out of registers at PC %08x - spills register %i.", mips_->pc, bestToSpill);
FlushPpcReg((PPCReg)bestToSpill);
goto allocate;
}
// Uh oh, we have all them spilllocked....
ERROR_LOG(JIT, "Out of spillable registers at PC %08x!!!", mips_->pc);
return INVALID_REG;
}
void PpcRegCache::MapInIn(MIPSReg rd, MIPSReg rs) {
SpillLock(rd, rs);
MapReg(rd);
MapReg(rs);
ReleaseSpillLocks();
}
void PpcRegCache::MapDirtyIn(MIPSReg rd, MIPSReg rs, bool avoidLoad) {
SpillLock(rd, rs);
bool load = !avoidLoad || rd == rs;
MapReg(rd, MAP_DIRTY | (load ? 0 : MAP_NOINIT));
MapReg(rs);
ReleaseSpillLocks();
}
void PpcRegCache::MapDirtyInIn(MIPSReg rd, MIPSReg rs, MIPSReg rt, bool avoidLoad) {
SpillLock(rd, rs, rt);
bool load = !avoidLoad || (rd == rs || rd == rt);
MapReg(rd, MAP_DIRTY | (load ? 0 : MAP_NOINIT));
MapReg(rt);
MapReg(rs);
ReleaseSpillLocks();
}
void PpcRegCache::MapDirtyDirtyInIn(MIPSReg rd1, MIPSReg rd2, MIPSReg rs, MIPSReg rt, bool avoidLoad) {
SpillLock(rd1, rd2, rs, rt);
bool load1 = !avoidLoad || (rd1 == rs || rd1 == rt);
bool load2 = !avoidLoad || (rd2 == rs || rd2 == rt);
MapReg(rd1, MAP_DIRTY | (load1 ? 0 : MAP_NOINIT));
MapReg(rd2, MAP_DIRTY | (load2 ? 0 : MAP_NOINIT));
MapReg(rt);
MapReg(rs);
ReleaseSpillLocks();
}
void PpcRegCache::FlushPpcReg(PPCReg r) {
if (ar[r].mipsReg == -1) {
// Nothing to do, reg not mapped.
return;
}
if (ar[r].mipsReg != -1) {
if (ar[r].isDirty && mr[ar[r].mipsReg].loc == ML_PPCREG)
emit_->STW(r, CTXREG, GetMipsRegOffset(ar[r].mipsReg));
// IMMs won't be in an ARM reg.
mr[ar[r].mipsReg].loc = ML_MEM;
mr[ar[r].mipsReg].reg = INVALID_REG;
mr[ar[r].mipsReg].imm = 0;
} else {
ERROR_LOG(HLE, "Dirty but no mipsreg?");
}
ar[r].isDirty = false;
ar[r].mipsReg = -1;
}
void PpcRegCache::FlushR(MIPSReg r) {
switch (mr[r].loc) {
case ML_IMM:
// IMM is always "dirty".
emit_->MOVI2R(SREG, mr[r].imm);
emit_->STW(SREG, CTXREG, GetMipsRegOffset(r));
break;
case ML_PPCREG:
if (mr[r].reg == INVALID_REG) {
ERROR_LOG(HLE, "FlushMipsReg: MipsReg had bad PpcReg");
}
if (ar[mr[r].reg].isDirty) {
emit_->STW((PPCReg)mr[r].reg, CTXREG, GetMipsRegOffset(r));
ar[mr[r].reg].isDirty = false;
}
ar[mr[r].reg].mipsReg = -1;
break;
case ML_MEM:
// Already there, nothing to do.
break;
default:
//BAD
break;
}
mr[r].loc = ML_MEM;
mr[r].reg = INVALID_REG;
mr[r].imm = 0;
}
void PpcRegCache::FlushAll() {
for (int i = 0; i < NUM_MIPSREG; i++) {
FlushR(i);
}
// Sanity check
for (int i = 0; i < NUM_PPCREG; i++) {
if (ar[i].mipsReg != -1) {
ERROR_LOG(JIT, "Flush fail: ar[%i].mipsReg=%i", i, ar[i].mipsReg);
}
}
}
void PpcRegCache::SetImm(MIPSReg r, u32 immVal) {
if (r == 0)
ERROR_LOG(JIT, "Trying to set immediate %08x to r0", immVal);
// Zap existing value if cached in a reg
if (mr[r].loc == ML_PPCREG) {
ar[mr[r].reg].mipsReg = -1;
ar[mr[r].reg].isDirty = false;
}
mr[r].loc = ML_IMM;
mr[r].imm = immVal;
mr[r].reg = INVALID_REG;
}
bool PpcRegCache::IsImm(MIPSReg r) const {
if (r == 0) return true;
return mr[r].loc == ML_IMM;
}
u32 PpcRegCache::GetImm(MIPSReg r) const {
if (r == 0) return 0;
if (mr[r].loc != ML_IMM) {
ERROR_LOG(JIT, "Trying to get imm from non-imm register %i", r);
}
return mr[r].imm;
}
int PpcRegCache::GetMipsRegOffset(MIPSReg r) {
if (r < 32)
return r * 4;
switch (r) {
case MIPSREG_HI:
return offsetof(MIPSState, hi);
case MIPSREG_LO:
return offsetof(MIPSState, lo);
}
ERROR_LOG(JIT, "bad mips register %i", r);
return 0; // or what?
}
void PpcRegCache::SpillLock(MIPSReg r1, MIPSReg r2, MIPSReg r3, MIPSReg r4) {
mr[r1].spillLock = true;
if (r2 != -1) mr[r2].spillLock = true;
if (r3 != -1) mr[r3].spillLock = true;
if (r4 != -1) mr[r4].spillLock = true;
}
void PpcRegCache::ReleaseSpillLocks() {
for (int i = 0; i < NUM_MIPSREG; i++) {
mr[i].spillLock = false;
}
}
void PpcRegCache::ReleaseSpillLock(MIPSReg reg) {
mr[reg].spillLock = false;
}
PPCReg PpcRegCache::R(int mipsReg) {
if (mr[mipsReg].loc == ML_PPCREG) {
return (PPCReg)mr[mipsReg].reg;
} else {
ERROR_LOG(JIT, "Reg %i not in ppc reg. compilerPC = %08x", mipsReg, compilerPC_);
return INVALID_REG; // BAAAD
}
}

156
Core/MIPS/PPC/PpcRegCache.h Normal file
View File

@ -0,0 +1,156 @@
// Copyright (c) 2012- PPSSPP Project.
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, version 2.0 or later versions.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License 2.0 for more details.
// A copy of the GPL 2.0 should have been included with the program.
// If not, see http://www.gnu.org/licenses/
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
/**
PPC reg cache based on arm version
**/
#pragma once
#include "../MIPS.h"
#include "../MIPSAnalyst.h"
#include "ppcEmitter.h"
using namespace PpcGen;
// R2 to R8: mapped MIPS regs
// R9 = code pointers
// R10 = MIPS context
// R11 = base pointer
// R18 to R31: mapped MIPS regs
// R14 = MIPS context
// R15 = downcount register
// R16 = code pointer
// R17 = base pointer
#if 1
#define CTXREG (R14)
#define DCNTREG (R15)
#define CODEREG (R16)
#define BASEREG (R17)
#else
#define CTXREG (R6)
#define DCNTREG (R7)
#define CODEREG (R8)
#define BASEREG (R9)
#endif
// Safe to use this as scratch regs ?
#define SREG (R5)
#define FLAGREG (R18)
// Special MIPS registers:
enum {
MIPSREG_HI = 32,
MIPSREG_LO = 33,
TOTAL_MAPPABLE_MIPSREGS = 34,
};
typedef int MIPSReg;
struct RegPPC {
int mipsReg; // if -1, no mipsreg attached.
bool isDirty; // Should the register be written back?
};
enum RegMIPSLoc {
ML_IMM,
ML_PPCREG,
ML_MEM,
};
struct RegMIPS {
// Where is this MIPS register?
RegMIPSLoc loc;
// Data (only one of these is used, depending on loc. Could make a union).
u32 imm;
PPCReg reg; // reg index
bool spillLock; // if true, this register cannot be spilled.
// If loc == ML_MEM, it's back in its location in the CPU context struct.
};
#undef MAP_DIRTY
#undef MAP_NOINIT
// Initing is the default so the flag is reversed.
enum {
MAP_DIRTY = 1,
MAP_NOINIT = 2,
};
namespace MIPSComp {
struct PpcJitOptions;
}
class PpcRegCache
{
public:
PpcRegCache(MIPSState *mips, MIPSComp::PpcJitOptions *options);
~PpcRegCache() {}
void Init(PPCXEmitter *emitter);
void Start(MIPSAnalyst::AnalysisResults &stats);
// Protect the arm register containing a MIPS register from spilling, to ensure that
// it's being kept allocated.
void SpillLock(MIPSReg reg, MIPSReg reg2 = -1, MIPSReg reg3 = -1, MIPSReg reg4 = -1);
void ReleaseSpillLock(MIPSReg reg);
void ReleaseSpillLocks();
void SetImm(MIPSReg reg, u32 immVal);
bool IsImm(MIPSReg reg) const;
u32 GetImm(MIPSReg reg) const;
// Returns an ARM register containing the requested MIPS register.
PPCReg MapReg(MIPSReg reg, int mapFlags = 0);
void MapInIn(MIPSReg rd, MIPSReg rs);
void MapDirtyIn(MIPSReg rd, MIPSReg rs, bool avoidLoad = true);
void MapDirtyInIn(MIPSReg rd, MIPSReg rs, MIPSReg rt, bool avoidLoad = true);
void MapDirtyDirtyInIn(MIPSReg rd1, MIPSReg rd2, MIPSReg rs, MIPSReg rt, bool avoidLoad = true);
void FlushPpcReg(PPCReg r);
void FlushR(MIPSReg r);
void FlushBeforeCall();
void FlushAll();
PPCReg R(int preg); // Returns a cached register
void SetEmitter(PPCXEmitter *emitter) { emit_ = emitter; }
// For better log output only.
void SetCompilerPC(u32 compilerPC) { compilerPC_ = compilerPC; }
int GetMipsRegOffset(MIPSReg r);
private:
const PPCReg *GetMIPSAllocationOrder(int &count);
MIPSState *mips_;
MIPSComp::PpcJitOptions *options_;
PPCXEmitter *emit_;
u32 compilerPC_;
enum {
NUM_PPCREG = 32,
NUM_MIPSREG = TOTAL_MAPPABLE_MIPSREGS,
};
RegPPC ar[NUM_MIPSREG];
RegMIPS mr[NUM_MIPSREG];
};

View File

@ -136,23 +136,23 @@ void WriteUnchecked_U32(const u32 _Data, const u32 _Address);
#else
inline u32 ReadUnchecked_U32(const u32 address) {
#if defined(_M_IX86) || defined(_M_ARM32)
return (*(u32 *)(base + (address & MEMVIEW32_MASK)));
#if defined(_M_IX86) || defined(_M_ARM32) || defined (_XBOX)
return LE_32(*(u32 *)(base + (address & MEMVIEW32_MASK)));
#else
return (*(u32 *)(base + address));
return LE_32(*(u32 *)(base + address));
#endif
}
inline u16 ReadUnchecked_U16(const u32 address) {
#if defined(_M_IX86) || defined(_M_ARM32)
return (*(u16 *)(base + (address & MEMVIEW32_MASK)));
#if defined(_M_IX86) || defined(_M_ARM32) || defined (_XBOX)
return LE_16(*(u16 *)(base + (address & MEMVIEW32_MASK)));
#else
return (*(u16 *)(base + address));
return LE_16(*(u16 *)(base + address));
#endif
}
inline u8 ReadUnchecked_U8(const u32 address) {
#if defined(_M_IX86) || defined(_M_ARM32)
#if defined(_M_IX86) || defined(_M_ARM32) || defined (_XBOX)
return (*(u8 *)(base + (address & MEMVIEW32_MASK)));
#else
return (*(u8 *)(base + address));
@ -160,23 +160,23 @@ inline u8 ReadUnchecked_U8(const u32 address) {
}
inline void WriteUnchecked_U32(u32 data, u32 address) {
#if defined(_M_IX86) || defined(_M_ARM32)
(*(u32 *)(base + (address & MEMVIEW32_MASK))) = data;
#if defined(_M_IX86) || defined(_M_ARM32) || defined (_XBOX)
(*(u32 *)(base + (address & MEMVIEW32_MASK))) = LE_32(data);
#else
(*(u32 *)(base + address)) = data;
(*(u32 *)(base + address)) = LE_32(data);
#endif
}
inline void WriteUnchecked_U16(u16 data, u32 address) {
#if defined(_M_IX86) || defined(_M_ARM32)
(*(u16 *)(base + (address & MEMVIEW32_MASK))) = data;
#if defined(_M_IX86) || defined(_M_ARM32) || defined (_XBOX)
(*(u16 *)(base + (address & MEMVIEW32_MASK))) = LE_16(data);
#else
(*(u16 *)(base + address)) = data;
(*(u16 *)(base + address)) = LE_16(data);
#endif
}
inline void WriteUnchecked_U8(u8 data, u32 address) {
#if defined(_M_IX86) || defined(_M_ARM32)
#if defined(_M_IX86) || defined(_M_ARM32) || defined (_XBOX)
(*(u8 *)(base + (address & MEMVIEW32_MASK))) = data;
#else
(*(u8 *)(base + address)) = data;

View File

@ -158,21 +158,21 @@ u16 Read_U16(const u32 _Address)
{
u16 _var = 0;
ReadFromHardware<u16>(_var, _Address);
return (u16)_var;
return (u16)LE_16(_var);
}
u32 Read_U32(const u32 _Address)
{
u32 _var = 0;
ReadFromHardware<u32>(_var, _Address);
return _var;
return LE_32(_var);
}
u64 Read_U64(const u32 _Address)
{
u64 _var = 0;
ReadFromHardware<u64>(_var, _Address);
return _var;
return LE_64(_var);
}
u32 Read_U8_ZX(const u32 _Address)
@ -193,17 +193,17 @@ void Write_U8(const u8 _Data, const u32 _Address)
void Write_U16(const u16 _Data, const u32 _Address)
{
WriteToHardware<u16>(_Address, _Data);
WriteToHardware<u16>(_Address, LE_16(_Data));
}
void Write_U32(const u32 _Data, const u32 _Address)
{
WriteToHardware<u32>(_Address, _Data);
WriteToHardware<u32>(_Address, LE_32(_Data));
}
void Write_U64(const u64 _Data, const u32 _Address)
{
WriteToHardware<u64>(_Address, _Data);
WriteToHardware<u64>(_Address, LE_64(_Data));
}
#ifdef SAFE_MEMORY
@ -219,14 +219,14 @@ u16 ReadUnchecked_U16(const u32 _Address)
{
u16 _var = 0;
ReadFromHardware<u16>(_var, _Address);
return _var;
return LE_16(_var);
}
u32 ReadUnchecked_U32(const u32 _Address)
{
u32 _var = 0;
ReadFromHardware<u32>(_var, _Address);
return _var;
return LE_32(_var);
}
void WriteUnchecked_U8(const u8 _iValue, const u32 _Address)
@ -236,12 +236,12 @@ void WriteUnchecked_U8(const u8 _iValue, const u32 _Address)
void WriteUnchecked_U16(const u16 _iValue, const u32 _Address)
{
WriteToHardware<u16>(_Address, _iValue);
WriteToHardware<u16>(_Address, LE_16(_iValue));
}
void WriteUnchecked_U32(const u32 _iValue, const u32 _Address)
{
WriteToHardware<u32>(_Address, _iValue);
WriteToHardware<u32>(_Address, LE_32(_iValue));
}
#endif