mirror of
https://github.com/hrydgard/ppsspp.git
synced 2024-11-26 23:10:38 +00:00
Bugfix replace function inlining (compilerPC needs to be increased). Misc.
This commit is contained in:
parent
9e37353571
commit
455a73bba7
@ -144,6 +144,60 @@ static int Replace_vmmul_q_transp() {
|
||||
return 16;
|
||||
}
|
||||
|
||||
// TODO: Inline into a few NEON or SSE instructions - especially if a1 is a known immediate!
|
||||
// Anyway, not sure if worth it. There's not that many matrices written per frame normally.
|
||||
static int Replace_dl_write_matrix() {
|
||||
u32 *dlStruct = (u32 *)Memory::GetPointerUnchecked(PARAM(0));
|
||||
u32 *dlPtr = (u32 *)Memory::GetPointerUnchecked(dlStruct[2]);
|
||||
u32 *dataPtr = (u32 *)Memory::GetPointerUnchecked(PARAM(2));
|
||||
|
||||
u32 matrix;
|
||||
int count = 12;
|
||||
switch (PARAM(1)) {
|
||||
case 3:
|
||||
matrix = 0x40000000; // tex mtx
|
||||
break;
|
||||
case 2:
|
||||
matrix = 0x3A000000;
|
||||
break;
|
||||
case 1:
|
||||
matrix = 0x3C000000;
|
||||
break;
|
||||
case 0:
|
||||
matrix = 0x3E000000;
|
||||
count = 16;
|
||||
break;
|
||||
}
|
||||
|
||||
*dlPtr++ = matrix;
|
||||
matrix += 0x01000000;
|
||||
|
||||
if (count == 16) {
|
||||
// Ultra SIMD friendly!
|
||||
for (int i = 0; i < count; i++) {
|
||||
dlPtr[i] = matrix | (dataPtr[i] >> 8);
|
||||
}
|
||||
} else {
|
||||
// Bit tricky to SIMD (note the offsets) but should be doable
|
||||
dlPtr[0] = matrix | (dataPtr[0] >> 8);
|
||||
dlPtr[1] = matrix | (dataPtr[1] >> 8);
|
||||
dlPtr[2] = matrix | (dataPtr[2] >> 8);
|
||||
dlPtr[3] = matrix | (dataPtr[4] >> 8);
|
||||
dlPtr[4] = matrix | (dataPtr[5] >> 8);
|
||||
dlPtr[5] = matrix | (dataPtr[6] >> 8);
|
||||
dlPtr[6] = matrix | (dataPtr[8] >> 8);
|
||||
dlPtr[7] = matrix | (dataPtr[9] >> 8);
|
||||
dlPtr[8] = matrix | (dataPtr[10] >> 8);
|
||||
dlPtr[9] = matrix | (dataPtr[12] >> 8);
|
||||
dlPtr[10] = matrix | (dataPtr[13] >> 8);
|
||||
dlPtr[11] = matrix | (dataPtr[14] >> 8);
|
||||
}
|
||||
|
||||
dlStruct[2] += (1 + count) * 4;
|
||||
RETURN(dlStruct[2]);
|
||||
return 60;
|
||||
}
|
||||
|
||||
// Can either replace with C functions or functions emitted in Asm/ArmAsm.
|
||||
static const ReplacementTableEntry entries[] = {
|
||||
// TODO: I think some games can be helped quite a bit by implementing the
|
||||
@ -154,6 +208,7 @@ static const ReplacementTableEntry entries[] = {
|
||||
{ "cosf", &Replace_cosf, 0, 0},
|
||||
{ "sqrtf", &Replace_sqrtf, 0, 0},
|
||||
{ "atan2f", &Replace_atan2f, 0, 0},
|
||||
/*
|
||||
{ "memcpy", &Replace_memcpy, 0, 0},
|
||||
{ "memmove", &Replace_memmove, 0, 0},
|
||||
{ "memset", &Replace_memset, 0, 0},
|
||||
@ -162,7 +217,13 @@ static const ReplacementTableEntry entries[] = {
|
||||
{ "strncpy", &Replace_strncpy, 0, 0},
|
||||
{ "strcmp", &Replace_strcmp, 0, 0},
|
||||
{ "strncmp", &Replace_strncmp, 0, 0},
|
||||
*/
|
||||
{ "fabsf", 0, &MIPSComp::Jit::Replace_fabsf, REPFLAG_ALLOWINLINE},
|
||||
{ "dl_write_matrix", &Replace_dl_write_matrix, 0, 0},
|
||||
{ "dl_write_matrix_2", &Replace_dl_write_matrix, 0, 0},
|
||||
// dl_write_matrix_3 doesn't take the dl as a parameter, it accesses a global instead. Need to extract the address of the global from the code when replacing...
|
||||
// dunno about write_matrix_3 and 4
|
||||
|
||||
// { "vmmul_q_transp", &Replace_vmmul_q_transp, 0, 0},
|
||||
{}
|
||||
};
|
||||
|
@ -24,8 +24,8 @@
|
||||
namespace MIPSComp {
|
||||
|
||||
int Jit::Replace_fabsf() {
|
||||
fpr.MapDirtyIn(0, 13);
|
||||
VABS(fpr.R(0), fpr.R(13));
|
||||
fpr.MapDirtyIn(0, 12);
|
||||
VABS(fpr.R(0), fpr.R(12));
|
||||
return 6; // Number of instructions in the MIPS function
|
||||
}
|
||||
|
||||
|
@ -453,7 +453,7 @@ skip:
|
||||
}
|
||||
}
|
||||
|
||||
void AnalyzeFunction(u32 startAddr, u32 size, const char *name) {
|
||||
void RegisterFunction(u32 startAddr, u32 size, const char *name) {
|
||||
// Check if we have this already
|
||||
for (auto iter = functions.begin(); iter != functions.end(); iter++) {
|
||||
if (iter->start == startAddr) {
|
||||
@ -485,8 +485,6 @@ skip:
|
||||
}
|
||||
|
||||
void ForgetFunctions(u32 startAddr, u32 endAddr) {
|
||||
StoreHashMap(GetSysDirectory(DIRECTORY_SYSTEM) + "knownfuncs.ini");
|
||||
|
||||
// It makes sense to forget functions as modules are unloaded but it breaks
|
||||
// the easy way of saving a hashmap by unloading and loading a game. I added
|
||||
// an alternative way.
|
||||
|
@ -97,7 +97,7 @@ namespace MIPSAnalyst
|
||||
|
||||
// If we have loaded symbols from the elf, we'll register functions as they are touched
|
||||
// so that we don't just dump them all in the cache.
|
||||
void AnalyzeFunction(u32 startAddr, u32 size, const char *name);
|
||||
void RegisterFunction(u32 startAddr, u32 size, const char *name);
|
||||
void ScanForFunctions(u32 startAddr, u32 endAddr, bool insertSymbols);
|
||||
void ForgetFunctions(u32 startAddr, u32 endAddr);
|
||||
void CompileLeafs();
|
||||
|
@ -556,6 +556,9 @@ void Jit::Comp_Jump(MIPSOpcode op)
|
||||
if (ReplaceJalTo(targetAddr))
|
||||
return;
|
||||
|
||||
// Check for small function inlining (future)
|
||||
|
||||
|
||||
// Save return address - might be overwritten by delay slot.
|
||||
gpr.SetImm(MIPS_REG_RA, js.compilerPC + 8);
|
||||
CompileDelaySlot(DELAYSLOT_NICE);
|
||||
|
@ -241,7 +241,6 @@ void Jit::Comp_FPU2op(MIPSOpcode op) {
|
||||
fpr.MapReg(fd, fd == fs, true);
|
||||
MOVSS(fpr.RX(fd), fpr.R(fs));
|
||||
PAND(fpr.RX(fd), M((void *)ssNoSignMask));
|
||||
fpr.ReleaseSpillLocks();
|
||||
break;
|
||||
|
||||
case 6: //F(fd) = F(fs); break; //mov
|
||||
@ -249,7 +248,6 @@ void Jit::Comp_FPU2op(MIPSOpcode op) {
|
||||
fpr.SpillLock(fd, fs);
|
||||
fpr.MapReg(fd, fd == fs, true);
|
||||
MOVSS(fpr.RX(fd), fpr.R(fs));
|
||||
fpr.ReleaseSpillLocks();
|
||||
}
|
||||
break;
|
||||
|
||||
@ -258,7 +256,6 @@ void Jit::Comp_FPU2op(MIPSOpcode op) {
|
||||
fpr.MapReg(fd, fd == fs, true);
|
||||
MOVSS(fpr.RX(fd), fpr.R(fs));
|
||||
PXOR(fpr.RX(fd), M((void *)ssSignBits2));
|
||||
fpr.ReleaseSpillLocks();
|
||||
break;
|
||||
|
||||
|
||||
@ -266,8 +263,7 @@ void Jit::Comp_FPU2op(MIPSOpcode op) {
|
||||
fpr.SpillLock(fd, fs); // this probably works, just badly tested
|
||||
fpr.MapReg(fd, fd == fs, true);
|
||||
SQRTSS(fpr.RX(fd), fpr.R(fs));
|
||||
fpr.ReleaseSpillLocks();
|
||||
return;
|
||||
break;
|
||||
|
||||
case 13: //FsI(fd) = F(fs)>=0 ? (int)floorf(F(fs)) : (int)ceilf(F(fs)); break;//trunc.w.s
|
||||
{
|
||||
@ -289,7 +285,6 @@ void Jit::Comp_FPU2op(MIPSOpcode op) {
|
||||
|
||||
SetJumpTarget(skip);
|
||||
MOV(32, fpr.R(fd), R(EAX));
|
||||
fpr.ReleaseSpillLocks();
|
||||
}
|
||||
break;
|
||||
|
||||
@ -305,9 +300,10 @@ void Jit::Comp_FPU2op(MIPSOpcode op) {
|
||||
case 15: //FsI(fd) = (int)floorf(F(fs)); break; //floor.w.s
|
||||
case 36: //FsI(fd) = (int) F(fs); break; //cvt.w.s
|
||||
default:
|
||||
Comp_Generic(op);
|
||||
DISABLE;
|
||||
return;
|
||||
}
|
||||
fpr.ReleaseSpillLocks();
|
||||
}
|
||||
|
||||
void Jit::Comp_mxc1(MIPSOpcode op)
|
||||
|
@ -24,6 +24,7 @@ static const u64 MEMORY_ALIGNED16(ssNoSignMask[2]) = {0x7FFFFFFF7FFFFFFFULL, 0x7
|
||||
namespace MIPSComp {
|
||||
|
||||
int Jit::Replace_fabsf() {
|
||||
fpr.SpillLock(0, 12);
|
||||
fpr.MapReg(0, MAP_DIRTY | MAP_NOINIT);
|
||||
MOVSS(fpr.RX(0), fpr.R(12));
|
||||
ANDPS(fpr.RX(0), M((void *)&ssNoSignMask));
|
||||
|
@ -422,6 +422,7 @@ bool Jit::ReplaceJalTo(u32 dest) {
|
||||
MIPSReplaceFunc repl = entry->jitReplaceFunc;
|
||||
int cycles = (this->*repl)();
|
||||
js.downcountAmount += cycles;
|
||||
js.compilerPC += 4;
|
||||
// No writing exits, keep going!
|
||||
|
||||
// Add a trigger so that if the inlined code changes, we invalidate this block.
|
||||
|
@ -919,7 +919,7 @@ void CtrlDisAsmView::onMouseUp(WPARAM wParam, LPARAM lParam, int button)
|
||||
if (InputBox_GetString(MainWindow::GetHInstance(), MainWindow::GetHWND(), L"New function name", name, newname)) {
|
||||
symbolMap.SetLabelName(newname.c_str(),funcBegin);
|
||||
u32 funcSize = symbolMap.GetFunctionSize(curAddress);
|
||||
MIPSAnalyst::AnalyzeFunction(funcBegin, funcSize, newname.c_str());
|
||||
MIPSAnalyst::RegisterFunction(funcBegin, funcSize, newname.c_str());
|
||||
MIPSAnalyst::UpdateHashMap();
|
||||
MIPSAnalyst::ApplyHashMap();
|
||||
SendMessage(GetParent(wnd),WM_DEB_MAPLOADED,0,0);
|
||||
|
@ -51,9 +51,10 @@
|
||||
26cc90cb25af9d27:476 = log10
|
||||
2774614d57d4baa2:28 = vsub_q
|
||||
279c6bf9cf99cc85:436 = strncpy
|
||||
2876ed93c5fd1211:328 = dl_write_matrix_4
|
||||
2965b1ad3ca15cc1:44 = vtfm_t
|
||||
299a370587df078f:116 = strange_copy_routine
|
||||
2abca53599f09ea7:608 = dl_write_matrix_2
|
||||
2abca53599f09ea7:608 = dl_write_matrix_3
|
||||
2adb92e8855c454e:48 = vtfm_q
|
||||
2adc229bef7bbc75:40 = isnan
|
||||
2bcf5268dd26345a:340 = acos
|
||||
@ -71,6 +72,7 @@
|
||||
32ceb9a7f72b9385:440 = _strtoul_r
|
||||
32e6bc7c151491ed:68 = memchr
|
||||
335df69db1073a8d:96 = wcscpy
|
||||
35d3527ff8c22ff2:56 = matrix_scale_q
|
||||
373ce518eee5a2d2:20 = matrix300_store_q
|
||||
388043e96b0e11fd:144 = dl_write_material2
|
||||
38f19bc3be215acc:388 = log10f
|
||||
@ -248,6 +250,7 @@ c319f0d107dd2f45:888 = __muldf3
|
||||
c35c10300b6b6091:620 = floor
|
||||
c51519f5dab342d4:224 = cosf
|
||||
c52c14b9af8c3008:76 = memcmp
|
||||
c54eae62622f1e11:164 = dl_write_bone_matrix_load
|
||||
c96e3a087ebf49a9:100 = dl_write_light_color
|
||||
c96e3a087ebf49a9:-1 = dl_write_light_color_2
|
||||
cb7a2edd603ecfef:48 = vtfm_p
|
||||
|
Loading…
Reference in New Issue
Block a user