Icache must be invalidated. Jit now starts to run, but there's no cube in cube.elf!

This commit is contained in:
Henrik Rydgard 2013-01-08 23:52:11 +01:00
parent 8915677241
commit 76481a300c
12 changed files with 62 additions and 294 deletions

View File

@ -59,7 +59,7 @@ const u8 *ARMXEmitter::AlignCodePage()
return code;
}
void ARMXEmitter::Flush()
void ARMXEmitter::FlushIcache()
{
__builtin___clear_cache (startcode, code);
SLEEP(0);
@ -166,7 +166,7 @@ void ARMXEmitter::B (const void *fnptr)
void ARMXEmitter::B(ARMReg src)
{
Write32(condition | (18 << 20) | (0xFFF << 8) | (1 << 4) | src);
Write32(condition | 0x12FFF10 | src);
}
void ARMXEmitter::BL(const void *fnptr)
@ -536,20 +536,4 @@ void ARMXEmitter::VMOV(ARMReg Dest, ARMReg Src)
}
}
// helper routines for setting pointers
void ARMXEmitter::CallCdeclFunction3(void* fnptr, u32 arg0, u32 arg1, u32 arg2)
{
}
void ARMXEmitter::CallCdeclFunction4(void* fnptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3)
{
}
void ARMXEmitter::CallCdeclFunction5(void* fnptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4)
{
}
void ARMXEmitter::CallCdeclFunction6(void* fnptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4, u32 arg5)
{
}
}
}

View File

@ -204,17 +204,17 @@ public:
{
switch(Type)
{
case TYPE_IMM:
case TYPE_IMM:
return Imm12Mod(); // This'll need to be changed later
case TYPE_REG:
case TYPE_REG:
return Rm();
case TYPE_IMMSREG:
case TYPE_IMMSREG:
return IMMSR();
case TYPE_RSR:
case TYPE_RSR:
return RSR();
default:
_assert_msg_(DYNA_REC, false, "GetData with Invalid Type");
break;
default:
_assert_msg_(DYNA_REC, false, "GetData with Invalid Type");
return 0;
}
}
const u32 IMMSR() // IMM shifted register
@ -344,7 +344,7 @@ public:
const u8 *AlignCode16();
const u8 *AlignCodePage();
const u8 *GetCodePtr() const;
void Flush();
void FlushIcache();
u8 *GetWritableCodePtr();
void SetCC(CCFlags cond = CC_AL);
@ -463,8 +463,6 @@ public:
void VMOV(ARMReg Dest, ARMReg Src);
// Utility functions
// The difference between this and CALL is that this aligns the stack
// where appropriate.
void ARMABI_CallFunction(void *func);
void ARMABI_CallFunctionC(void *func, u32 Arg0);
void ARMABI_CallFunctionCNoSave(void *func, u32 Arg0);
@ -480,19 +478,6 @@ public:
void UpdateAPSR(bool NZCVQ, u8 Flags, bool GE, u8 GEval);
// Strange call wrappers.
void CallCdeclFunction3(void* fnptr, u32 arg0, u32 arg1, u32 arg2);
void CallCdeclFunction4(void* fnptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3);
void CallCdeclFunction5(void* fnptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4);
void CallCdeclFunction6(void* fnptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4, u32 arg5);
#define CallCdeclFunction3_I(a,b,c,d) CallCdeclFunction3((void *)(a), (b), (c), (d))
#define CallCdeclFunction4_I(a,b,c,d,e) CallCdeclFunction4((void *)(a), (b), (c), (d), (e))
#define CallCdeclFunction5_I(a,b,c,d,e,f) CallCdeclFunction5((void *)(a), (b), (c), (d), (e), (f))
#define CallCdeclFunction6_I(a,b,c,d,e,f,g) CallCdeclFunction6((void *)(a), (b), (c), (d), (e), (f), (g))
#define DECLARE_IMPORT(x)
}; // class ARMXEmitter

View File

@ -243,13 +243,13 @@ u8* MemArena::Find4GBBase()
// We are unable to use relative addresses due to lack of mmap()
return NULL;
#else
void* base = mmap(0, 0x5000000, PROT_READ | PROT_WRITE,
void* base = mmap(0, 0x10000000, PROT_READ | PROT_WRITE,
MAP_ANON | MAP_SHARED, -1, 0);
if (base == MAP_FAILED) {
PanicAlert("Failed to map 100 MB of memory space: %s", strerror(errno));
PanicAlert("Failed to map 256 MB of memory space: %s", strerror(errno));
return 0;
}
munmap(base, 0x5000000);
munmap(base, 0x10000000);
return static_cast<u8*>(base);
#endif
#endif

View File

@ -1465,184 +1465,6 @@ void XEmitter::FWAIT()
}
void XEmitter::RTDSC() { Write8(0x0F); Write8(0x31); }
// helper routines for setting pointers
void XEmitter::CallCdeclFunction3(void* fnptr, u32 arg0, u32 arg1, u32 arg2)
{
using namespace Gen;
#ifdef _M_X64
#ifdef _MSC_VER
MOV(32, R(RCX), Imm32(arg0));
MOV(32, R(RDX), Imm32(arg1));
MOV(32, R(R8), Imm32(arg2));
CALL(fnptr);
#else
MOV(32, R(RDI), Imm32(arg0));
MOV(32, R(RSI), Imm32(arg1));
MOV(32, R(RDX), Imm32(arg2));
CALL(fnptr);
#endif
} // Gen
#else
ABI_AlignStack(3 * 4);
PUSH(32, Imm32(arg2));
PUSH(32, Imm32(arg1));
PUSH(32, Imm32(arg0));
CALL(fnptr);
#ifdef _WIN32
// don't inc stack
#else
ABI_RestoreStack(3 * 4);
#endif
#endif
}
void XEmitter::CallCdeclFunction4(void* fnptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3)
{
using namespace Gen;
#ifdef _M_X64
#ifdef _MSC_VER
MOV(32, R(RCX), Imm32(arg0));
MOV(32, R(RDX), Imm32(arg1));
MOV(32, R(R8), Imm32(arg2));
MOV(32, R(R9), Imm32(arg3));
CALL(fnptr);
#else
MOV(32, R(RDI), Imm32(arg0));
MOV(32, R(RSI), Imm32(arg1));
MOV(32, R(RDX), Imm32(arg2));
MOV(32, R(RCX), Imm32(arg3));
CALL(fnptr);
#endif
#else
ABI_AlignStack(4 * 4);
PUSH(32, Imm32(arg3));
PUSH(32, Imm32(arg2));
PUSH(32, Imm32(arg1));
PUSH(32, Imm32(arg0));
CALL(fnptr);
#ifdef _WIN32
// don't inc stack
#else
ABI_RestoreStack(4 * 4);
#endif
#endif
}
void XEmitter::CallCdeclFunction5(void* fnptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4)
{
using namespace Gen;
#ifdef _M_X64
#ifdef _MSC_VER
MOV(32, R(RCX), Imm32(arg0));
MOV(32, R(RDX), Imm32(arg1));
MOV(32, R(R8), Imm32(arg2));
MOV(32, R(R9), Imm32(arg3));
MOV(32, MDisp(RSP, 0x20), Imm32(arg4));
CALL(fnptr);
#else
MOV(32, R(RDI), Imm32(arg0));
MOV(32, R(RSI), Imm32(arg1));
MOV(32, R(RDX), Imm32(arg2));
MOV(32, R(RCX), Imm32(arg3));
MOV(32, R(R8), Imm32(arg4));
CALL(fnptr);
#endif
#else
ABI_AlignStack(5 * 4);
PUSH(32, Imm32(arg4));
PUSH(32, Imm32(arg3));
PUSH(32, Imm32(arg2));
PUSH(32, Imm32(arg1));
PUSH(32, Imm32(arg0));
CALL(fnptr);
#ifdef _WIN32
// don't inc stack
#else
ABI_RestoreStack(5 * 4);
#endif
#endif
}
void XEmitter::CallCdeclFunction6(void* fnptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4, u32 arg5)
{
using namespace Gen;
#ifdef _M_X64
#ifdef _MSC_VER
MOV(32, R(RCX), Imm32(arg0));
MOV(32, R(RDX), Imm32(arg1));
MOV(32, R(R8), Imm32(arg2));
MOV(32, R(R9), Imm32(arg3));
MOV(32, MDisp(RSP, 0x20), Imm32(arg4));
MOV(32, MDisp(RSP, 0x28), Imm32(arg5));
CALL(fnptr);
#else
MOV(32, R(RDI), Imm32(arg0));
MOV(32, R(RSI), Imm32(arg1));
MOV(32, R(RDX), Imm32(arg2));
MOV(32, R(RCX), Imm32(arg3));
MOV(32, R(R8), Imm32(arg4));
MOV(32, R(R9), Imm32(arg5));
CALL(fnptr);
#endif
#else
ABI_AlignStack(6 * 4);
PUSH(32, Imm32(arg5));
PUSH(32, Imm32(arg4));
PUSH(32, Imm32(arg3));
PUSH(32, Imm32(arg2));
PUSH(32, Imm32(arg1));
PUSH(32, Imm32(arg0));
CALL(fnptr);
#ifdef _WIN32
// don't inc stack
#else
ABI_RestoreStack(6 * 4);
#endif
#endif
}
#ifdef _M_X64
// See header
void XEmitter::___CallCdeclImport3(void* impptr, u32 arg0, u32 arg1, u32 arg2) {
MOV(32, R(RCX), Imm32(arg0));
MOV(32, R(RDX), Imm32(arg1));
MOV(32, R(R8), Imm32(arg2));
CALLptr(M(impptr));
}
void XEmitter::___CallCdeclImport4(void* impptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3) {
MOV(32, R(RCX), Imm32(arg0));
MOV(32, R(RDX), Imm32(arg1));
MOV(32, R(R8), Imm32(arg2));
MOV(32, R(R9), Imm32(arg3));
CALLptr(M(impptr));
}
void XEmitter::___CallCdeclImport5(void* impptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4) {
MOV(32, R(RCX), Imm32(arg0));
MOV(32, R(RDX), Imm32(arg1));
MOV(32, R(R8), Imm32(arg2));
MOV(32, R(R9), Imm32(arg3));
MOV(32, MDisp(RSP, 0x20), Imm32(arg4));
CALLptr(M(impptr));
}
void XEmitter::___CallCdeclImport6(void* impptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4, u32 arg5) {
MOV(32, R(RCX), Imm32(arg0));
MOV(32, R(RDX), Imm32(arg1));
MOV(32, R(R8), Imm32(arg2));
MOV(32, R(R9), Imm32(arg3));
MOV(32, MDisp(RSP, 0x20), Imm32(arg4));
MOV(32, MDisp(RSP, 0x28), Imm32(arg5));
CALLptr(M(impptr));
}
#endif
}

View File

@ -687,43 +687,6 @@ public:
#else
inline int ABI_GetNumXMMRegs() { return 16; }
#endif
// Strange call wrappers.
void CallCdeclFunction3(void* fnptr, u32 arg0, u32 arg1, u32 arg2);
void CallCdeclFunction4(void* fnptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3);
void CallCdeclFunction5(void* fnptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4);
void CallCdeclFunction6(void* fnptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4, u32 arg5);
#if defined(_M_IX86)
#define CallCdeclFunction3_I(a,b,c,d) CallCdeclFunction3((void *)(a), (b), (c), (d))
#define CallCdeclFunction4_I(a,b,c,d,e) CallCdeclFunction4((void *)(a), (b), (c), (d), (e))
#define CallCdeclFunction5_I(a,b,c,d,e,f) CallCdeclFunction5((void *)(a), (b), (c), (d), (e), (f))
#define CallCdeclFunction6_I(a,b,c,d,e,f,g) CallCdeclFunction6((void *)(a), (b), (c), (d), (e), (f), (g))
#define DECLARE_IMPORT(x)
#else
// Comments from VertexLoader.cpp about these horrors:
// This is a horrible hack that is necessary in 64-bit mode because Opengl32.dll is based way, way above the 32-bit
// address space that is within reach of a CALL, and just doing &fn gives us these high uncallable addresses. So we
// want to grab the function pointers from the import table instead.
void ___CallCdeclImport3(void* impptr, u32 arg0, u32 arg1, u32 arg2);
void ___CallCdeclImport4(void* impptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3);
void ___CallCdeclImport5(void* impptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4);
void ___CallCdeclImport6(void* impptr, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4, u32 arg5);
#define CallCdeclFunction3_I(a,b,c,d) ___CallCdeclImport3(&__imp_##a,b,c,d)
#define CallCdeclFunction4_I(a,b,c,d,e) ___CallCdeclImport4(&__imp_##a,b,c,d,e)
#define CallCdeclFunction5_I(a,b,c,d,e,f) ___CallCdeclImport5(&__imp_##a,b,c,d,e,f)
#define CallCdeclFunction6_I(a,b,c,d,e,f,g) ___CallCdeclImport6(&__imp_##a,b,c,d,e,f,g)
#define DECLARE_IMPORT(x) extern "C" void *__imp_##x
#endif
}; // class XEmitter

View File

@ -462,7 +462,6 @@ void MoveEvents()
void Advance()
{
WARN_LOG(HLE, "ADVANCE!");
int cyclesExecuted = slicelength - downcount;
globalTimer += cyclesExecuted;
downcount = slicelength;

View File

@ -65,9 +65,9 @@ void Jit()
MIPSComp::jit->Compile(currentMIPS->pc);
}
void ShowPC() {
void ShowPC(u32 sp) {
if (currentMIPS) {
WARN_LOG(HLE, "PC : %08x", currentMIPS->pc);
WARN_LOG(HLE, "PC : %08x ArmSP : %08x", currentMIPS->pc, sp);
} else {
ERROR_LOG(HLE, "Universe corrupt?");
}
@ -79,6 +79,13 @@ void DisassembleArm(const u8 *data, int size);
// dynarec buffer
// At this offset - 4, there is an int specifying the block number.
void ArmAsmRoutineManager::QuickCallFunction(ARMReg reg, void *func) {
PUSH(1, _LR);
ARMABI_MOVI2R(reg, (u32)(func));
BL(reg);
POP(1, _LR);
}
void ArmAsmRoutineManager::Generate(MIPSState *mips, MIPSComp::Jit *jit)
{
enterCode = AlignCode16();
@ -93,10 +100,10 @@ void ArmAsmRoutineManager::Generate(MIPSState *mips, MIPSComp::Jit *jit)
// R13 cannot be used as it's the stack pointer.
ARMABI_MOVI2R(R11, (u32)Memory::base);
ARMABI_MOVI2R(R10, (u32)mips);
ARMABI_MOVI2R(R9, (u32)jit->GetBlockCache()->GetCodePointers());
ARMABI_MOVI2R(R7, (u32)jit->GetBlockCache()->GetCodePointers());
outerLoop = GetCodePtr();
ARMABI_CallFunction((void *)&CoreTiming::Advance);
QuickCallFunction(R0, (void *)&CoreTiming::Advance);
FixupBranch skipToRealDispatch = B(); //skip the sync and compare first time
dispatcherCheckCoreState = GetCodePtr();
@ -106,9 +113,9 @@ void ArmAsmRoutineManager::Generate(MIPSState *mips, MIPSComp::Jit *jit)
LDR(R0, R0);
CMP(R0, 0);
FixupBranch badCoreState = B_CC(CC_NEQ);
// At this point : flags = EQ. Fine for the next check, no need to jump over it.
FixupBranch skipToRealDispatch2 = B(); //skip the sync and compare first time
// At this point : flags = EQ. Fine for the next check, no need to jump over it.
dispatcher = GetCodePtr();
// The result of slice decrementation should be in flags if somebody jumped here
@ -116,11 +123,17 @@ void ArmAsmRoutineManager::Generate(MIPSState *mips, MIPSComp::Jit *jit)
FixupBranch bail = B_CC(CC_MI);
SetJumpTarget(skipToRealDispatch);
SetJumpTarget(skipToRealDispatch2);
dispatcherNoCheck = GetCodePtr();
// Debug
// ARMABI_CallFunction((void *)&ShowPC);
// MOV(R0, R13);
// QuickCallFunction(R1, (void *)&ShowPC);
ARMABI_MOVI2R(R7, (u32)jit->GetBlockCache()->GetCodePointers());
ARMABI_MOVI2R(R11, (u32)Memory::base);
ARMABI_MOVI2R(R10, (u32)mips);
LDR(R0, R10, offsetof(MIPSState, pc));
@ -138,7 +151,7 @@ void ArmAsmRoutineManager::Generate(MIPSState *mips, MIPSComp::Jit *jit)
//ADD(32, M(&mips->debugCount), Imm8(1));
}
// grab from list and jump to it
ADD(R0, R9, Operand2(2, ST_LSL, R0));
ADD(R0, R7, Operand2(2, ST_LSL, R0));
LDR(R0, R0);
B(R0);
@ -166,4 +179,7 @@ void ArmAsmRoutineManager::Generate(MIPSState *mips, MIPSComp::Jit *jit)
INFO_LOG(HLE, "THE DISASM ========================");
DisassembleArm(enterCode, GetCodePtr() - enterCode);
INFO_LOG(HLE, "END OF THE DISASM ========================");
// Don't forget to zap the instruction cache!
FlushIcache();
}

View File

@ -30,10 +30,6 @@ namespace MIPSComp
class ArmAsmRoutineManager : public ArmGen::ARMXCodeBlock
{
private:
void Generate(MIPSState *mips, MIPSComp::Jit *jit);
void GenerateCommon();
public:
ArmAsmRoutineManager()
{
@ -58,6 +54,10 @@ public:
const u8 *dispatcherNoCheck;
const u8 *breakpointBailout;
private:
void Generate(MIPSState *mips, MIPSComp::Jit *jit);
void QuickCallFunction(ArmGen::ARMReg reg, void *func);
};
#endif // _JIT64ASM_H

View File

@ -62,20 +62,20 @@ void Jit::BranchRSRTComp(u32 op, ArmGen::CCFlags cc, bool likely)
//ERROR_LOG(CPU, "Not nice delay slot in BranchRSRTComp :( %08x", js.compilerPC);
}
// The delay slot being nice doesn't really matter though...
/*
if (rt == 0)
{
gpr.MapReg(rs, MAP_INITVAL);
CMP(gpr.R(rs), Operand2(0));
}*/
}
/*
else if (rs == 0 && (cc == CC_EQ || cc == CC_NEQ)) // only these are easily 'flippable'
{
gpr.MapReg(rt, MAP_INITVAL);
CMP(gpr.R(rt), Operand2(0));
}
else*/
{
*/
else {
gpr.SpillLock(rs, rt);
gpr.MapReg(rs, MAP_INITVAL);
gpr.MapReg(rt, MAP_INITVAL);
@ -227,9 +227,8 @@ void Jit::BranchFPFlag(u32 op, ArmGen::CCFlags cc, bool likely)
}
FlushAll();
ARMABI_MOVI2R(R0, (u32)&(mips_->fpcond));
LDR(R0, R0, Operand2(0, TYPE_IMM));
TST(R0, Operand2(1, TYPE_IMM));
LDR(R0, R10, offsetof(MIPSState, fpcond));
CMP(R0, Operand2(1, TYPE_IMM));
ArmGen::FixupBranch ptr;
js.inDelaySlot = true;
if (!likely)
@ -263,10 +262,10 @@ void Jit::Comp_FPUBranch(u32 op)
{
switch((op >> 16) & 0x1f)
{
case 0: BranchFPFlag(op, CC_NEQ, false); break; // bc1f
case 1: BranchFPFlag(op, CC_EQ, false); break; // bc1t
case 2: BranchFPFlag(op, CC_NEQ, true); break; // bc1fl
case 3: BranchFPFlag(op, CC_EQ, true); break; // bc1tl
case 0: BranchFPFlag(op, CC_EQ, false); break; // bc1f
case 1: BranchFPFlag(op, CC_NEQ, false); break; // bc1t
case 2: BranchFPFlag(op, CC_EQ, true); break; // bc1fl
case 3: BranchFPFlag(op, CC_NEQ, true); break; // bc1tl
default:
_dbg_assert_msg_(CPU,0,"Trying to interpret instruction that can't be interpreted");
break;

View File

@ -36,7 +36,7 @@ void DisassembleArm(const u8 *data, int size) {
// MAGIC SPECIAL CASE for MOVW/MOVT readability!
if ((inst & 0x0FF00000) == 0x03000000 && (next & 0x0FF00000) == 0x03400000) {
u32 low = ((inst & 0x000F0000) >> 4) | (inst & 0x0FFF);
u32 hi = ((next & 0x000F0000) >> 4) | (next & 0x0FFF);
u32 hi = ((next & 0x000F0000) >> 4) | (next & 0x0FFF);
int reg0 = (inst & 0x0000F000) >> 12;
int reg1 = (next & 0x0000F000) >> 12;
if (reg0 == reg1) {
@ -76,8 +76,6 @@ void Jit::ClearCache()
ClearCodeSpace();
}
u8 *codeCache;
#define CACHESIZE 16384*1024
void Jit::CompileAt(u32 addr)
{
u32 op = Memory::Read_Instruction(addr);
@ -86,7 +84,7 @@ void Jit::CompileAt(u32 addr)
void Jit::Compile(u32 em_address)
{
//ERROR_LOG(CPU, "Compile %08x", em_address);
ERROR_LOG(CPU, "Compile %08x", em_address);
if (GetSpaceLeft() < 0x10000 || blocks.IsFull())
{
ClearCache();
@ -105,10 +103,6 @@ void Jit::RunLoopUntil(u64 globalticks)
INFO_LOG(DYNA_REC, "or Two!");
}
void Hullo(int a, int b, int c, int d) {
INFO_LOG(DYNA_REC, "Hullo %08x %08x %08x %08x", a, b, c, d);
}
const u8 *Jit::DoJit(u32 em_address, ArmJitBlock *b)
{
js.cancel = false;
@ -165,6 +159,10 @@ const u8 *Jit::DoJit(u32 em_address, ArmJitBlock *b)
DisassembleArm(b->checkedEntry, GetCodePtr() - b->checkedEntry);
#endif
AlignCode16();
// Don't forget to zap the instruction cache!
FlushIcache();
b->originalSize = numInstructions;
return b->normalEntry;
}

View File

@ -50,7 +50,7 @@ static const ARMReg *GetMIPSAllocationOrder(int &count) {
// R9 and upwards are reserved for jit basics.
// Six allocated registers should be enough...
static const ARMReg allocationOrder[] = {
R2, R3, R4, R5, R6, R7
R2, R3, R4, R5, R6 //, R7
};
count = sizeof(allocationOrder) / sizeof(const int);
return allocationOrder;

View File

@ -83,6 +83,7 @@ void AsmRoutineManager::Generate(MIPSState *mips, MIPSComp::Jit *jit)
CMP(32, M((void*)&coreState), Imm32(0));
FixupBranch badCoreState = J_CC(CC_NZ, true);
FixupBranch skipToRealDispatch2 = J(); //skip the sync and compare first time
dispatcher = GetCodePtr();
// The result of slice decrementation should be in flags if somebody jumped here
@ -90,6 +91,7 @@ void AsmRoutineManager::Generate(MIPSState *mips, MIPSComp::Jit *jit)
FixupBranch bail = J_CC(CC_S, true);
SetJumpTarget(skipToRealDispatch);
SetJumpTarget(skipToRealDispatch2);
dispatcherNoCheck = GetCodePtr();