mirror of
https://github.com/libretro/pcsx2.git
synced 2025-01-05 00:48:56 +00:00
Implemented Aligned Stack for microVU and superVU (mVUs is currently ifdef'd for GCC only, since implementing aligned stack for other compilers that don't automatically assume it requires some complexity and overhead).
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@2072 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
984e79ae03
commit
b96ab5621f
@ -16,6 +16,13 @@
|
|||||||
// Micro VU recompiler! - author: cottonvibes(@gmail.com)
|
// Micro VU recompiler! - author: cottonvibes(@gmail.com)
|
||||||
|
|
||||||
#include "PrecompiledHeader.h"
|
#include "PrecompiledHeader.h"
|
||||||
|
#include "Common.h"
|
||||||
|
#include "VU.h"
|
||||||
|
#include "GS.h"
|
||||||
|
#include "x86emitter/x86emitter.h"
|
||||||
|
|
||||||
|
using namespace x86Emitter;
|
||||||
|
|
||||||
#include "microVU.h"
|
#include "microVU.h"
|
||||||
|
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
|
@ -17,10 +17,6 @@
|
|||||||
//#define mVUdebug // Prints Extra Info to Console
|
//#define mVUdebug // Prints Extra Info to Console
|
||||||
//#define mVUlogProg // Dumps MicroPrograms to \logs\*.html
|
//#define mVUlogProg // Dumps MicroPrograms to \logs\*.html
|
||||||
|
|
||||||
#include "Common.h"
|
|
||||||
#include "VU.h"
|
|
||||||
#include "GS.h"
|
|
||||||
#include "x86emitter/x86emitter.h"
|
|
||||||
#include "microVU_IR.h"
|
#include "microVU_IR.h"
|
||||||
#include "microVU_Misc.h"
|
#include "microVU_Misc.h"
|
||||||
|
|
||||||
|
@ -90,7 +90,6 @@ microVUt(void) mVUsetupBranch(mV, microFlagCycles& mFC) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void normBranchCompile(microVU* mVU, u32 branchPC) {
|
void normBranchCompile(microVU* mVU, u32 branchPC) {
|
||||||
using namespace x86Emitter;
|
|
||||||
microBlock* pBlock;
|
microBlock* pBlock;
|
||||||
blockCreate(branchPC/8);
|
blockCreate(branchPC/8);
|
||||||
pBlock = mVUblocks[branchPC/8]->search((microRegInfo*)&mVUregs);
|
pBlock = mVUblocks[branchPC/8]->search((microRegInfo*)&mVUregs);
|
||||||
@ -99,7 +98,6 @@ void normBranchCompile(microVU* mVU, u32 branchPC) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void normJumpCompile(mV, microFlagCycles& mFC, bool isEvilJump) {
|
void normJumpCompile(mV, microFlagCycles& mFC, bool isEvilJump) {
|
||||||
using namespace x86Emitter;
|
|
||||||
memcpy_fast(&mVUpBlock->pStateEnd, &mVUregs, sizeof(microRegInfo));
|
memcpy_fast(&mVUpBlock->pStateEnd, &mVUregs, sizeof(microRegInfo));
|
||||||
mVUsetupBranch(mVU, mFC);
|
mVUsetupBranch(mVU, mFC);
|
||||||
mVUbackupRegs(mVU);
|
mVUbackupRegs(mVU);
|
||||||
@ -126,7 +124,6 @@ void normBranch(mV, microFlagCycles& mFC) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void condBranch(mV, microFlagCycles& mFC, int JMPcc) {
|
void condBranch(mV, microFlagCycles& mFC, int JMPcc) {
|
||||||
using namespace x86Emitter;
|
|
||||||
mVUsetupBranch(mVU, mFC);
|
mVUsetupBranch(mVU, mFC);
|
||||||
xCMP(ptr16[&mVU->branch], 0);
|
xCMP(ptr16[&mVU->branch], 0);
|
||||||
incPC(3);
|
incPC(3);
|
||||||
@ -172,8 +169,6 @@ void condBranch(mV, microFlagCycles& mFC, int JMPcc) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void normJump(mV, microFlagCycles& mFC) {
|
void normJump(mV, microFlagCycles& mFC) {
|
||||||
using namespace x86Emitter;
|
|
||||||
|
|
||||||
if (mVUlow.constJump.isValid) { // Jump Address is Constant
|
if (mVUlow.constJump.isValid) { // Jump Address is Constant
|
||||||
if (mVUup.eBit) { // E-bit Jump
|
if (mVUup.eBit) { // E-bit Jump
|
||||||
iPC = (mVUlow.constJump.regValue*2)&(mVU->progSize-1);
|
iPC = (mVUlow.constJump.regValue*2)&(mVU->progSize-1);
|
||||||
|
@ -333,7 +333,6 @@ microVUt(void) mVUinitFirstPass(microVU* mVU, uptr pState, u8* thisPtr) {
|
|||||||
|
|
||||||
microVUr(void*) mVUcompile(microVU* mVU, u32 startPC, uptr pState) {
|
microVUr(void*) mVUcompile(microVU* mVU, u32 startPC, uptr pState) {
|
||||||
|
|
||||||
using namespace x86Emitter;
|
|
||||||
microFlagCycles mFC;
|
microFlagCycles mFC;
|
||||||
u8* thisPtr = x86Ptr;
|
u8* thisPtr = x86Ptr;
|
||||||
const u32 endCount = (((microRegInfo*)pState)->blockType) ? 1 : (mVU->microMemSize / 8);
|
const u32 endCount = (((microRegInfo*)pState)->blockType) ? 1 : (mVU->microMemSize / 8);
|
||||||
@ -414,7 +413,6 @@ microVUr(void*) mVUcompile(microVU* mVU, u32 startPC, uptr pState) {
|
|||||||
// Search for Existing Compiled Block (if found, return x86ptr; else, compile and return x86ptr)
|
// Search for Existing Compiled Block (if found, return x86ptr; else, compile and return x86ptr)
|
||||||
microVUt(void*) mVUblockFetch(microVU* mVU, u32 startPC, uptr pState) {
|
microVUt(void*) mVUblockFetch(microVU* mVU, u32 startPC, uptr pState) {
|
||||||
|
|
||||||
using namespace x86Emitter;
|
|
||||||
if (startPC > mVU->microMemSize-8) { DevCon.Error("microVU%d: invalid startPC [%04x]", mVU->index, startPC); }
|
if (startPC > mVU->microMemSize-8) { DevCon.Error("microVU%d: invalid startPC [%04x]", mVU->index, startPC); }
|
||||||
startPC &= mVU->microMemSize-8;
|
startPC &= mVU->microMemSize-8;
|
||||||
|
|
||||||
|
@ -23,18 +23,23 @@
|
|||||||
void mVUdispatcherA(mV) {
|
void mVUdispatcherA(mV) {
|
||||||
mVU->startFunct = x86Ptr;
|
mVU->startFunct = x86Ptr;
|
||||||
|
|
||||||
// __fastcall = The first two DWORD or smaller arguments are passed in ECX and EDX registers; all other arguments are passed right to left.
|
|
||||||
if (!isVU1) { CALLFunc((uptr)mVUexecuteVU0); }
|
|
||||||
else { CALLFunc((uptr)mVUexecuteVU1); }
|
|
||||||
|
|
||||||
// Backup cpu state
|
// Backup cpu state
|
||||||
PUSH32R(EBX);
|
xPUSH(ebp);
|
||||||
PUSH32R(EBP);
|
xPUSH(ebx);
|
||||||
PUSH32R(ESI);
|
xPUSH(esi);
|
||||||
PUSH32R(EDI);
|
xPUSH(edi);
|
||||||
|
|
||||||
|
// Align the stackframe (GCC only, since GCC assumes stackframe is always aligned)
|
||||||
|
#ifdef __GNUC__
|
||||||
|
xSUB(esp, 12);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// __fastcall = The caller has already put the needed parameters in ecx/edx:
|
||||||
|
if (!isVU1) { xCALL(mVUexecuteVU0); }
|
||||||
|
else { xCALL(mVUexecuteVU1); }
|
||||||
|
|
||||||
// Load VU's MXCSR state
|
// Load VU's MXCSR state
|
||||||
SSE_LDMXCSR((uptr)&g_sseVUMXCSR);
|
xLDMXCSR(&g_sseVUMXCSR);
|
||||||
|
|
||||||
// Load Regs
|
// Load Regs
|
||||||
#ifdef CHECK_MACROVU0
|
#ifdef CHECK_MACROVU0
|
||||||
@ -59,7 +64,7 @@ void mVUdispatcherA(mV) {
|
|||||||
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmT1, 0); // wzyx = PPQQ
|
SSE_SHUFPS_XMM_to_XMM(xmmPQ, xmmT1, 0); // wzyx = PPQQ
|
||||||
|
|
||||||
// Jump to Recompiled Code Block
|
// Jump to Recompiled Code Block
|
||||||
JMPR(EAX);
|
xJMP(eax);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Generates the code to exit from recompiled blocks
|
// Generates the code to exit from recompiled blocks
|
||||||
@ -67,19 +72,25 @@ void mVUdispatcherB(mV) {
|
|||||||
mVU->exitFunct = x86Ptr;
|
mVU->exitFunct = x86Ptr;
|
||||||
|
|
||||||
// Load EE's MXCSR state
|
// Load EE's MXCSR state
|
||||||
SSE_LDMXCSR((uptr)&g_sseMXCSR);
|
xLDMXCSR(&g_sseMXCSR);
|
||||||
|
|
||||||
// __fastcall = The first two DWORD or smaller arguments are passed in ECX and EDX registers; all other arguments are passed right to left.
|
// __fastcall = The first two DWORD or smaller arguments are passed in ECX and EDX registers;
|
||||||
if (!isVU1) { CALLFunc((uptr)mVUcleanUpVU0); }
|
// all other arguments are passed right to left.
|
||||||
else { CALLFunc((uptr)mVUcleanUpVU1); }
|
if (!isVU1) { xCALL(mVUcleanUpVU0); }
|
||||||
|
else { xCALL(mVUcleanUpVU1); }
|
||||||
|
|
||||||
|
// Unalign the stackframe:
|
||||||
|
#ifdef __GNUC__
|
||||||
|
xADD( esp, 12 );
|
||||||
|
#endif
|
||||||
|
|
||||||
// Restore cpu state
|
// Restore cpu state
|
||||||
POP32R(EDI);
|
xPOP(edi);
|
||||||
POP32R(ESI);
|
xPOP(esi);
|
||||||
POP32R(EBP);
|
xPOP(ebx);
|
||||||
POP32R(EBX);
|
xPOP(ebp);
|
||||||
|
|
||||||
RET();
|
xRET();
|
||||||
|
|
||||||
mVUcacheCheck(x86Ptr, mVU->cache, 0x1000);
|
mVUcacheCheck(x86Ptr, mVU->cache, 0x1000);
|
||||||
}
|
}
|
||||||
@ -98,7 +109,7 @@ microVUx(void*) __fastcall mVUexecute(u32 startPC, u32 cycles) {
|
|||||||
mVU->cycles = cycles;
|
mVU->cycles = cycles;
|
||||||
mVU->totalCycles = cycles;
|
mVU->totalCycles = cycles;
|
||||||
|
|
||||||
x86SetPtr(mVU->prog.x86ptr); // Set x86ptr to where last program left off
|
xSetPtr(mVU->prog.x86ptr); // Set x86ptr to where last program left off
|
||||||
return mVUblockFetch(mVU, startPC, (uptr)&mVU->prog.lpState);
|
return mVUblockFetch(mVU, startPC, (uptr)&mVU->prog.lpState);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1128,7 +1128,7 @@ microVUt(void) mVU_XGKICK_DELAY(mV, bool memVI) {
|
|||||||
mVUbackupRegs(mVU);
|
mVUbackupRegs(mVU);
|
||||||
if (memVI) MOV32MtoR(gprT2, (uptr)&mVU->VIxgkick);
|
if (memVI) MOV32MtoR(gprT2, (uptr)&mVU->VIxgkick);
|
||||||
else mVUallocVIa(mVU, gprT2, _Is_);
|
else mVUallocVIa(mVU, gprT2, _Is_);
|
||||||
CALLFunc((uptr)mVU_XGKICK_);
|
xCALL(mVU_XGKICK_);
|
||||||
mVUrestoreRegs(mVU);
|
mVUrestoreRegs(mVU);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1159,7 +1159,6 @@ void setBranchA(mP, int x, int _x_) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void condEvilBranch(mV, int JMPcc) {
|
void condEvilBranch(mV, int JMPcc) {
|
||||||
using namespace x86Emitter;
|
|
||||||
if (mVUlow.badBranch) {
|
if (mVUlow.badBranch) {
|
||||||
xMOV(ptr32[&mVU->branch], eax);
|
xMOV(ptr32[&mVU->branch], eax);
|
||||||
xMOV(ptr32[&mVU->badBranch], branchAddrN);
|
xMOV(ptr32[&mVU->badBranch], branchAddrN);
|
||||||
@ -1202,7 +1201,6 @@ mVUop(mVU_BAL) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
mVUop(mVU_IBEQ) {
|
mVUop(mVU_IBEQ) {
|
||||||
using namespace x86Emitter;
|
|
||||||
setBranchA(mX, 3, 0);
|
setBranchA(mX, 3, 0);
|
||||||
pass1 { mVUanalyzeCondBranch2(mVU, _Is_, _It_); }
|
pass1 { mVUanalyzeCondBranch2(mVU, _Is_, _It_); }
|
||||||
pass2 {
|
pass2 {
|
||||||
@ -1219,7 +1217,6 @@ mVUop(mVU_IBEQ) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
mVUop(mVU_IBGEZ) {
|
mVUop(mVU_IBGEZ) {
|
||||||
using namespace x86Emitter;
|
|
||||||
setBranchA(mX, 4, 0);
|
setBranchA(mX, 4, 0);
|
||||||
pass1 { mVUanalyzeCondBranch1(mVU, _Is_); }
|
pass1 { mVUanalyzeCondBranch1(mVU, _Is_); }
|
||||||
pass2 {
|
pass2 {
|
||||||
@ -1232,7 +1229,6 @@ mVUop(mVU_IBGEZ) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
mVUop(mVU_IBGTZ) {
|
mVUop(mVU_IBGTZ) {
|
||||||
using namespace x86Emitter;
|
|
||||||
setBranchA(mX, 5, 0);
|
setBranchA(mX, 5, 0);
|
||||||
pass1 { mVUanalyzeCondBranch1(mVU, _Is_); }
|
pass1 { mVUanalyzeCondBranch1(mVU, _Is_); }
|
||||||
pass2 {
|
pass2 {
|
||||||
@ -1245,7 +1241,6 @@ mVUop(mVU_IBGTZ) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
mVUop(mVU_IBLEZ) {
|
mVUop(mVU_IBLEZ) {
|
||||||
using namespace x86Emitter;
|
|
||||||
setBranchA(mX, 6, 0);
|
setBranchA(mX, 6, 0);
|
||||||
pass1 { mVUanalyzeCondBranch1(mVU, _Is_); }
|
pass1 { mVUanalyzeCondBranch1(mVU, _Is_); }
|
||||||
pass2 {
|
pass2 {
|
||||||
@ -1258,7 +1253,6 @@ mVUop(mVU_IBLEZ) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
mVUop(mVU_IBLTZ) {
|
mVUop(mVU_IBLTZ) {
|
||||||
using namespace x86Emitter;
|
|
||||||
setBranchA(mX, 7, 0);
|
setBranchA(mX, 7, 0);
|
||||||
pass1 { mVUanalyzeCondBranch1(mVU, _Is_); }
|
pass1 { mVUanalyzeCondBranch1(mVU, _Is_); }
|
||||||
pass2 {
|
pass2 {
|
||||||
@ -1271,7 +1265,6 @@ mVUop(mVU_IBLTZ) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
mVUop(mVU_IBNE) {
|
mVUop(mVU_IBNE) {
|
||||||
using namespace x86Emitter;
|
|
||||||
setBranchA(mX, 8, 0);
|
setBranchA(mX, 8, 0);
|
||||||
pass1 { mVUanalyzeCondBranch2(mVU, _Is_, _It_); }
|
pass1 { mVUanalyzeCondBranch2(mVU, _Is_, _It_); }
|
||||||
pass2 {
|
pass2 {
|
||||||
|
@ -19,8 +19,6 @@
|
|||||||
#include "iR5900.h"
|
#include "iR5900.h"
|
||||||
#include "R5900OpcodeTables.h"
|
#include "R5900OpcodeTables.h"
|
||||||
|
|
||||||
using namespace x86Emitter;
|
|
||||||
|
|
||||||
extern void _vu0WaitMicro();
|
extern void _vu0WaitMicro();
|
||||||
extern void _vu0FinishMicro();
|
extern void _vu0FinishMicro();
|
||||||
|
|
||||||
|
@ -520,7 +520,6 @@ static __pagealigned u8 mVUsearchXMM[0x1000];
|
|||||||
// Generates a custom optimized block-search function
|
// Generates a custom optimized block-search function
|
||||||
// Note: Structs must be 16-byte aligned! (GCC doesn't guarantee this)
|
// Note: Structs must be 16-byte aligned! (GCC doesn't guarantee this)
|
||||||
void mVUcustomSearch() {
|
void mVUcustomSearch() {
|
||||||
using namespace x86Emitter;
|
|
||||||
HostSys::MemProtect(mVUsearchXMM, 0x1000, Protect_ReadWrite, false);
|
HostSys::MemProtect(mVUsearchXMM, 0x1000, Protect_ReadWrite, false);
|
||||||
memset_8<0xcc,0x1000>(mVUsearchXMM);
|
memset_8<0xcc,0x1000>(mVUsearchXMM);
|
||||||
xSetPtr(mVUsearchXMM);
|
xSetPtr(mVUsearchXMM);
|
||||||
|
@ -1969,7 +1969,7 @@ void recVUMI_XTOP( VURegs *VU, int info )
|
|||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
// VU1XGKICK_MTGSTransfer() - Called by ivuZerorec.cpp
|
// VU1XGKICK_MTGSTransfer() - Called by ivuZerorec.cpp
|
||||||
//------------------------------------------------------------------
|
//------------------------------------------------------------------
|
||||||
void VU1XGKICK_MTGSTransfer(u32 *pMem, u32 addr)
|
void __fastcall VU1XGKICK_MTGSTransfer(u32 *pMem, u32 addr)
|
||||||
{
|
{
|
||||||
u32 size;
|
u32 size;
|
||||||
u8* data = ((u8*)pMem + (addr&0x3fff));
|
u8* data = ((u8*)pMem + (addr&0x3fff));
|
||||||
|
@ -61,7 +61,7 @@ struct _vuopinfo {
|
|||||||
|
|
||||||
void SuperVUAnalyzeOp(VURegs *VU, _vuopinfo *info, _VURegsNum* pCodeRegs);
|
void SuperVUAnalyzeOp(VURegs *VU, _vuopinfo *info, _VURegsNum* pCodeRegs);
|
||||||
int eeVURecompileCode(VURegs *VU, _VURegsNum* regs); // allocates all the necessary regs and returns the indices
|
int eeVURecompileCode(VURegs *VU, _VURegsNum* regs); // allocates all the necessary regs and returns the indices
|
||||||
void VU1XGKICK_MTGSTransfer(u32 *pMem, u32 addr); // used for MTGS in XGKICK
|
void __fastcall VU1XGKICK_MTGSTransfer(u32 *pMem, u32 addr); // used for MTGS in XGKICK
|
||||||
|
|
||||||
extern int vucycle;
|
extern int vucycle;
|
||||||
typedef void (*vFloat)(int regd, int regTemp);
|
typedef void (*vFloat)(int regd, int regTemp);
|
||||||
|
@ -2550,8 +2550,6 @@ void SuperVUCleanupProgram(u32 startpc, int vuindex)
|
|||||||
svutime += (u32)(svufinal.QuadPart - svubase.QuadPart);
|
svutime += (u32)(svufinal.QuadPart - svubase.QuadPart);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
assert(s_vu1esp == 0);
|
|
||||||
|
|
||||||
VU = vuindex ? &VU1 : &VU0;
|
VU = vuindex ? &VU1 : &VU0;
|
||||||
VU->cycle += s_TotalVUCycles;
|
VU->cycle += s_TotalVUCycles;
|
||||||
|
|
||||||
@ -2601,9 +2599,8 @@ __declspec(naked) void SuperVUExecuteProgram(u32 startpc, int vuindex)
|
|||||||
mov s_vuedi, edi
|
mov s_vuedi, edi
|
||||||
mov s_vuebx, ebx
|
mov s_vuebx, ebx
|
||||||
|
|
||||||
#ifdef PCSX2_DEBUG
|
|
||||||
mov s_vu1esp, esp
|
mov s_vu1esp, esp
|
||||||
#endif
|
and esp, -16 // align stack for GCC compilance
|
||||||
|
|
||||||
//stmxcsr s_ssecsr
|
//stmxcsr s_ssecsr
|
||||||
ldmxcsr g_sseVUMXCSR
|
ldmxcsr g_sseVUMXCSR
|
||||||
@ -2629,9 +2626,7 @@ __declspec(naked) static void SuperVUEndProgram()
|
|||||||
mov edi, s_vuedi
|
mov edi, s_vuedi
|
||||||
mov ebx, s_vuebx
|
mov ebx, s_vuebx
|
||||||
|
|
||||||
#ifdef PCSX2_DEBUG
|
mov esp, s_vu1esp // restore from aligned stack
|
||||||
sub s_vu1esp, esp
|
|
||||||
#endif
|
|
||||||
|
|
||||||
call SuperVUCleanupProgram
|
call SuperVUCleanupProgram
|
||||||
jmp s_callstack // so returns correctly
|
jmp s_callstack // so returns correctly
|
||||||
@ -4337,11 +4332,11 @@ void recVUMI_XGKICK_(VURegs *VU)
|
|||||||
_freeXMMregs();
|
_freeXMMregs();
|
||||||
|
|
||||||
OR32ItoM((uptr)&psHu32(GIF_STAT), (GIF_STAT_APATH1 | GIF_STAT_OPH)); // Set PATH1 GIF Status Flags
|
OR32ItoM((uptr)&psHu32(GIF_STAT), (GIF_STAT_APATH1 | GIF_STAT_OPH)); // Set PATH1 GIF Status Flags
|
||||||
PUSH32R(s_XGKICKReg);
|
|
||||||
PUSH32I((uptr)VU->Mem);
|
|
||||||
|
|
||||||
CALLFunc((uptr)VU1XGKICK_MTGSTransfer);
|
xMOV(edx, xRegister32(s_XGKICKReg));
|
||||||
ADD32ItoR(ESP, 8);
|
xMOV(ecx, (uptr)VU->Mem);
|
||||||
|
xCALL(VU1XGKICK_MTGSTransfer);
|
||||||
|
|
||||||
AND32ItoM((uptr)&psHu32(GIF_STAT), ~(GIF_STAT_APATH1 | GIF_STAT_OPH)); // Clear PATH1 GIF Status Flags
|
AND32ItoM((uptr)&psHu32(GIF_STAT), ~(GIF_STAT_APATH1 | GIF_STAT_OPH)); // Clear PATH1 GIF Status Flags
|
||||||
s_ScheduleXGKICK = 0;
|
s_ScheduleXGKICK = 0;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user