From 76ca42b758645978788760d01c2482c1c3d47a40 Mon Sep 17 00:00:00 2001 From: Fiora Date: Sun, 14 Dec 2014 16:17:28 -0800 Subject: [PATCH] JIT: free up a register by eliminating RCODE_POINTERS Also use shorter code in the dispatcher if we can get away with it (e.g. on Windows where the relevant memory is in the low 2GB). --- Source/Core/Core/PowerPC/Jit64/JitAsm.cpp | 52 +++++++++++++++---- .../Core/Core/PowerPC/Jit64/JitRegCache.cpp | 4 +- Source/Core/Core/PowerPC/JitCommon/JitBase.h | 2 - 3 files changed, 45 insertions(+), 13 deletions(-) diff --git a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp index fdaaeb50c4..28c2b3fa97 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp +++ b/Source/Core/Core/PowerPC/Jit64/JitAsm.cpp @@ -39,7 +39,6 @@ void Jit64AsmRoutineManager::Generate() // Two statically allocated registers. MOV(64, R(RMEM), Imm64((u64)Memory::base)); - MOV(64, R(RCODE_POINTERS), Imm64((u64)jit->GetBlockCache()->GetCodePointers())); //It's below 2GB so 32 bits are good enough MOV(64, R(RPPCSTATE), Imm64((u64)&PowerPC::ppcState + 0x80)); const u8* outerLoop = GetCodePtr(); @@ -85,6 +84,9 @@ void Jit64AsmRoutineManager::Generate() dispatcherNoCheck = GetCodePtr(); MOV(32, R(RSCRATCH), PPCSTATE(pc)); + u64 icache = (u64)jit->GetBlockCache()->iCache.data(); + u64 icacheVmem = (u64)jit->GetBlockCache()->iCacheVMEM.data(); + u64 icacheEx = (u64)jit->GetBlockCache()->iCacheEx.data(); u32 mask = 0; FixupBranch no_mem; FixupBranch exit_mem; @@ -95,16 +97,31 @@ void Jit64AsmRoutineManager::Generate() TEST(32, R(RSCRATCH), Imm32(mask)); no_mem = J_CC(CC_NZ); AND(32, R(RSCRATCH), Imm32(JIT_ICACHE_MASK)); - MOV(64, R(RSCRATCH2), Imm64((u64)jit->GetBlockCache()->iCache.data())); - MOV(32, R(RSCRATCH), MComplex(RSCRATCH2, RSCRATCH, SCALE_1, 0)); + + if (icache <= INT_MAX) + { + MOV(32, R(RSCRATCH), MDisp(RSCRATCH, (s32)icache)); + } + else + { + MOV(64, R(RSCRATCH2), Imm64(icache)); + MOV(32, R(RSCRATCH), MComplex(RSCRATCH2, RSCRATCH, SCALE_1, 0)); + } exit_mem = J(); SetJumpTarget(no_mem); TEST(32, R(RSCRATCH), Imm32(JIT_ICACHE_VMEM_BIT)); FixupBranch no_vmem = J_CC(CC_Z); AND(32, R(RSCRATCH), Imm32(JIT_ICACHE_MASK)); - MOV(64, R(RSCRATCH2), Imm64((u64)jit->GetBlockCache()->iCacheVMEM.data())); - MOV(32, R(RSCRATCH), MComplex(RSCRATCH2, RSCRATCH, SCALE_1, 0)); + if (icacheVmem <= INT_MAX) + { + MOV(32, R(RSCRATCH), MDisp(RSCRATCH, (s32)icacheVmem)); + } + else + { + MOV(64, R(RSCRATCH2), Imm64(icacheVmem)); + MOV(32, R(RSCRATCH), MComplex(RSCRATCH2, RSCRATCH, SCALE_1, 0)); + } if (SConfig::GetInstance().m_LocalCoreStartupParameter.bWii) exit_vmem = J(); SetJumpTarget(no_vmem); @@ -113,8 +130,16 @@ void Jit64AsmRoutineManager::Generate() TEST(32, R(RSCRATCH), Imm32(JIT_ICACHE_EXRAM_BIT)); FixupBranch no_exram = J_CC(CC_Z); AND(32, R(RSCRATCH), Imm32(JIT_ICACHEEX_MASK)); - MOV(64, R(RSCRATCH2), Imm64((u64)jit->GetBlockCache()->iCacheEx.data())); - MOV(32, R(RSCRATCH), MComplex(RSCRATCH2, RSCRATCH, SCALE_1, 0)); + + if (icacheEx <= INT_MAX) + { + MOV(32, R(RSCRATCH), MDisp(RSCRATCH, (s32)icacheEx)); + } + else + { + MOV(64, R(RSCRATCH2), Imm64(icacheEx)); + MOV(32, R(RSCRATCH), MComplex(RSCRATCH2, RSCRATCH, SCALE_1, 0)); + } SetJumpTarget(no_exram); } @@ -124,8 +149,17 @@ void Jit64AsmRoutineManager::Generate() TEST(32, R(RSCRATCH), R(RSCRATCH)); FixupBranch notfound = J_CC(CC_L); - //grab from list and jump to it - JMPptr(MComplex(RCODE_POINTERS, RSCRATCH, 8, 0)); + //grab from list and jump to it + u64 codePointers = (u64)jit->GetBlockCache()->GetCodePointers(); + if (codePointers <= INT_MAX) + { + JMPptr(MScaled(RSCRATCH, 8, (s32)codePointers)); + } + else + { + MOV(64, R(RSCRATCH2), Imm64(codePointers)); + JMPptr(MComplex(RSCRATCH2, RSCRATCH, 8, 0)); + } SetJumpTarget(notfound); //Ok, no block, let's jit diff --git a/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp b/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp index c7b0dd1db4..f91694ba9e 100644 --- a/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp +++ b/Source/Core/Core/PowerPC/Jit64/JitRegCache.cpp @@ -267,9 +267,9 @@ const int* GPRRegCache::GetAllocationOrder(size_t& count) { // R12, when used as base register, for example in a LEA, can generate bad code! Need to look into this. #ifdef _WIN32 - RSI, RDI, R13, R14, R8, R9, R10, R11, R12, RCX + RSI, RDI, R13, R14, R15, R8, R9, R10, R11, R12, RCX #else - R12, R13, R14, RSI, RDI, R8, R9, R10, R11, RCX + R12, R13, R14, R15, RSI, RDI, R8, R9, R10, R11, RCX #endif }; count = sizeof(allocationOrder) / sizeof(const int); diff --git a/Source/Core/Core/PowerPC/JitCommon/JitBase.h b/Source/Core/Core/PowerPC/JitCommon/JitBase.h index 17d676e864..8af315a7fa 100644 --- a/Source/Core/Core/PowerPC/JitCommon/JitBase.h +++ b/Source/Core/Core/PowerPC/JitCommon/JitBase.h @@ -39,8 +39,6 @@ #define RSCRATCH_EXTRA RCX // RMEM points to the start of emulated memory. #define RMEM RBX -// RCODE_POINTERS does what it says. -#define RCODE_POINTERS R15 // RPPCSTATE points to ppcState + 0x80. It's offset because we want to be able // to address as much as possible in a one-byte offset form. #define RPPCSTATE RBP