Bugfix replace function inlining (compilerPC needs to be increased). Misc.

This commit is contained in:
Henrik Rydgard 2013-12-20 15:37:37 +01:00
parent 9e37353571
commit 455a73bba7
10 changed files with 78 additions and 15 deletions

View File

@ -144,6 +144,60 @@ static int Replace_vmmul_q_transp() {
return 16;
}
// TODO: Inline into a few NEON or SSE instructions - especially if a1 is a known immediate!
// Anyway, not sure if worth it. There's not that many matrices written per frame normally.
static int Replace_dl_write_matrix() {
u32 *dlStruct = (u32 *)Memory::GetPointerUnchecked(PARAM(0));
u32 *dlPtr = (u32 *)Memory::GetPointerUnchecked(dlStruct[2]);
u32 *dataPtr = (u32 *)Memory::GetPointerUnchecked(PARAM(2));
u32 matrix;
int count = 12;
switch (PARAM(1)) {
case 3:
matrix = 0x40000000; // tex mtx
break;
case 2:
matrix = 0x3A000000;
break;
case 1:
matrix = 0x3C000000;
break;
case 0:
matrix = 0x3E000000;
count = 16;
break;
}
*dlPtr++ = matrix;
matrix += 0x01000000;
if (count == 16) {
// Ultra SIMD friendly!
for (int i = 0; i < count; i++) {
dlPtr[i] = matrix | (dataPtr[i] >> 8);
}
} else {
// Bit tricky to SIMD (note the offsets) but should be doable
dlPtr[0] = matrix | (dataPtr[0] >> 8);
dlPtr[1] = matrix | (dataPtr[1] >> 8);
dlPtr[2] = matrix | (dataPtr[2] >> 8);
dlPtr[3] = matrix | (dataPtr[4] >> 8);
dlPtr[4] = matrix | (dataPtr[5] >> 8);
dlPtr[5] = matrix | (dataPtr[6] >> 8);
dlPtr[6] = matrix | (dataPtr[8] >> 8);
dlPtr[7] = matrix | (dataPtr[9] >> 8);
dlPtr[8] = matrix | (dataPtr[10] >> 8);
dlPtr[9] = matrix | (dataPtr[12] >> 8);
dlPtr[10] = matrix | (dataPtr[13] >> 8);
dlPtr[11] = matrix | (dataPtr[14] >> 8);
}
dlStruct[2] += (1 + count) * 4;
RETURN(dlStruct[2]);
return 60;
}
// Can either replace with C functions or functions emitted in Asm/ArmAsm.
static const ReplacementTableEntry entries[] = {
// TODO: I think some games can be helped quite a bit by implementing the
@ -154,6 +208,7 @@ static const ReplacementTableEntry entries[] = {
{ "cosf", &Replace_cosf, 0, 0},
{ "sqrtf", &Replace_sqrtf, 0, 0},
{ "atan2f", &Replace_atan2f, 0, 0},
/*
{ "memcpy", &Replace_memcpy, 0, 0},
{ "memmove", &Replace_memmove, 0, 0},
{ "memset", &Replace_memset, 0, 0},
@ -162,7 +217,13 @@ static const ReplacementTableEntry entries[] = {
{ "strncpy", &Replace_strncpy, 0, 0},
{ "strcmp", &Replace_strcmp, 0, 0},
{ "strncmp", &Replace_strncmp, 0, 0},
*/
{ "fabsf", 0, &MIPSComp::Jit::Replace_fabsf, REPFLAG_ALLOWINLINE},
{ "dl_write_matrix", &Replace_dl_write_matrix, 0, 0},
{ "dl_write_matrix_2", &Replace_dl_write_matrix, 0, 0},
// dl_write_matrix_3 doesn't take the dl as a parameter, it accesses a global instead. Need to extract the address of the global from the code when replacing...
// dunno about write_matrix_3 and 4
// { "vmmul_q_transp", &Replace_vmmul_q_transp, 0, 0},
{}
};

View File

@ -24,8 +24,8 @@
namespace MIPSComp {
int Jit::Replace_fabsf() {
fpr.MapDirtyIn(0, 13);
VABS(fpr.R(0), fpr.R(13));
fpr.MapDirtyIn(0, 12);
VABS(fpr.R(0), fpr.R(12));
return 6; // Number of instructions in the MIPS function
}

View File

@ -453,7 +453,7 @@ skip:
}
}
void AnalyzeFunction(u32 startAddr, u32 size, const char *name) {
void RegisterFunction(u32 startAddr, u32 size, const char *name) {
// Check if we have this already
for (auto iter = functions.begin(); iter != functions.end(); iter++) {
if (iter->start == startAddr) {
@ -485,8 +485,6 @@ skip:
}
void ForgetFunctions(u32 startAddr, u32 endAddr) {
StoreHashMap(GetSysDirectory(DIRECTORY_SYSTEM) + "knownfuncs.ini");
// It makes sense to forget functions as modules are unloaded but it breaks
// the easy way of saving a hashmap by unloading and loading a game. I added
// an alternative way.

View File

@ -97,7 +97,7 @@ namespace MIPSAnalyst
// If we have loaded symbols from the elf, we'll register functions as they are touched
// so that we don't just dump them all in the cache.
void AnalyzeFunction(u32 startAddr, u32 size, const char *name);
void RegisterFunction(u32 startAddr, u32 size, const char *name);
void ScanForFunctions(u32 startAddr, u32 endAddr, bool insertSymbols);
void ForgetFunctions(u32 startAddr, u32 endAddr);
void CompileLeafs();

View File

@ -556,6 +556,9 @@ void Jit::Comp_Jump(MIPSOpcode op)
if (ReplaceJalTo(targetAddr))
return;
// Check for small function inlining (future)
// Save return address - might be overwritten by delay slot.
gpr.SetImm(MIPS_REG_RA, js.compilerPC + 8);
CompileDelaySlot(DELAYSLOT_NICE);

View File

@ -241,7 +241,6 @@ void Jit::Comp_FPU2op(MIPSOpcode op) {
fpr.MapReg(fd, fd == fs, true);
MOVSS(fpr.RX(fd), fpr.R(fs));
PAND(fpr.RX(fd), M((void *)ssNoSignMask));
fpr.ReleaseSpillLocks();
break;
case 6: //F(fd) = F(fs); break; //mov
@ -249,7 +248,6 @@ void Jit::Comp_FPU2op(MIPSOpcode op) {
fpr.SpillLock(fd, fs);
fpr.MapReg(fd, fd == fs, true);
MOVSS(fpr.RX(fd), fpr.R(fs));
fpr.ReleaseSpillLocks();
}
break;
@ -258,7 +256,6 @@ void Jit::Comp_FPU2op(MIPSOpcode op) {
fpr.MapReg(fd, fd == fs, true);
MOVSS(fpr.RX(fd), fpr.R(fs));
PXOR(fpr.RX(fd), M((void *)ssSignBits2));
fpr.ReleaseSpillLocks();
break;
@ -266,8 +263,7 @@ void Jit::Comp_FPU2op(MIPSOpcode op) {
fpr.SpillLock(fd, fs); // this probably works, just badly tested
fpr.MapReg(fd, fd == fs, true);
SQRTSS(fpr.RX(fd), fpr.R(fs));
fpr.ReleaseSpillLocks();
return;
break;
case 13: //FsI(fd) = F(fs)>=0 ? (int)floorf(F(fs)) : (int)ceilf(F(fs)); break;//trunc.w.s
{
@ -289,7 +285,6 @@ void Jit::Comp_FPU2op(MIPSOpcode op) {
SetJumpTarget(skip);
MOV(32, fpr.R(fd), R(EAX));
fpr.ReleaseSpillLocks();
}
break;
@ -305,9 +300,10 @@ void Jit::Comp_FPU2op(MIPSOpcode op) {
case 15: //FsI(fd) = (int)floorf(F(fs)); break; //floor.w.s
case 36: //FsI(fd) = (int) F(fs); break; //cvt.w.s
default:
Comp_Generic(op);
DISABLE;
return;
}
fpr.ReleaseSpillLocks();
}
void Jit::Comp_mxc1(MIPSOpcode op)

View File

@ -24,6 +24,7 @@ static const u64 MEMORY_ALIGNED16(ssNoSignMask[2]) = {0x7FFFFFFF7FFFFFFFULL, 0x7
namespace MIPSComp {
int Jit::Replace_fabsf() {
fpr.SpillLock(0, 12);
fpr.MapReg(0, MAP_DIRTY | MAP_NOINIT);
MOVSS(fpr.RX(0), fpr.R(12));
ANDPS(fpr.RX(0), M((void *)&ssNoSignMask));

View File

@ -422,6 +422,7 @@ bool Jit::ReplaceJalTo(u32 dest) {
MIPSReplaceFunc repl = entry->jitReplaceFunc;
int cycles = (this->*repl)();
js.downcountAmount += cycles;
js.compilerPC += 4;
// No writing exits, keep going!
// Add a trigger so that if the inlined code changes, we invalidate this block.

View File

@ -919,7 +919,7 @@ void CtrlDisAsmView::onMouseUp(WPARAM wParam, LPARAM lParam, int button)
if (InputBox_GetString(MainWindow::GetHInstance(), MainWindow::GetHWND(), L"New function name", name, newname)) {
symbolMap.SetLabelName(newname.c_str(),funcBegin);
u32 funcSize = symbolMap.GetFunctionSize(curAddress);
MIPSAnalyst::AnalyzeFunction(funcBegin, funcSize, newname.c_str());
MIPSAnalyst::RegisterFunction(funcBegin, funcSize, newname.c_str());
MIPSAnalyst::UpdateHashMap();
MIPSAnalyst::ApplyHashMap();
SendMessage(GetParent(wnd),WM_DEB_MAPLOADED,0,0);

View File

@ -51,9 +51,10 @@
26cc90cb25af9d27:476 = log10
2774614d57d4baa2:28 = vsub_q
279c6bf9cf99cc85:436 = strncpy
2876ed93c5fd1211:328 = dl_write_matrix_4
2965b1ad3ca15cc1:44 = vtfm_t
299a370587df078f:116 = strange_copy_routine
2abca53599f09ea7:608 = dl_write_matrix_2
2abca53599f09ea7:608 = dl_write_matrix_3
2adb92e8855c454e:48 = vtfm_q
2adc229bef7bbc75:40 = isnan
2bcf5268dd26345a:340 = acos
@ -71,6 +72,7 @@
32ceb9a7f72b9385:440 = _strtoul_r
32e6bc7c151491ed:68 = memchr
335df69db1073a8d:96 = wcscpy
35d3527ff8c22ff2:56 = matrix_scale_q
373ce518eee5a2d2:20 = matrix300_store_q
388043e96b0e11fd:144 = dl_write_material2
38f19bc3be215acc:388 = log10f
@ -248,6 +250,7 @@ c319f0d107dd2f45:888 = __muldf3
c35c10300b6b6091:620 = floor
c51519f5dab342d4:224 = cosf
c52c14b9af8c3008:76 = memcmp
c54eae62622f1e11:164 = dl_write_bone_matrix_load
c96e3a087ebf49a9:100 = dl_write_light_color
c96e3a087ebf49a9:-1 = dl_write_light_color_2
cb7a2edd603ecfef:48 = vtfm_p