diff --git a/Core/MIPS/ARM/ArmAsm.cpp b/Core/MIPS/ARM/ArmAsm.cpp index 12cf33676..d089410cd 100644 --- a/Core/MIPS/ARM/ArmAsm.cpp +++ b/Core/MIPS/ARM/ArmAsm.cpp @@ -65,13 +65,22 @@ void JitAt() MIPSComp::jit->Compile(currentMIPS->pc); } +/* +double testD; + +u32 DoubleTest(u32 sp) { + volatile double local = 1.0; + testD += local; + return (u32)(&local); +} + void ShowPC(u32 sp) { if (currentMIPS) { - WARN_LOG(HLE, "PC : %08x ArmSP : %08x", currentMIPS->pc, sp); + ERROR_LOG(HLE, "ShowPC : %08x ArmSP : %08x", currentMIPS->pc, sp); } else { ERROR_LOG(HLE, "Universe corrupt?"); } -} +}*/ void DisassembleArm(const u8 *data, int size); @@ -90,6 +99,13 @@ void Jit::GenerateFixedCode() SetCC(CC_AL); PUSH(9, R4, R5, R6, R7, R8, R9, R10, R11, _LR); + // Take care to 8-byte align stack for function calls. + // This actually misaligns the stack within the JIT itself but that doesn't really matter + // as the JIT does not use the stack at all. + SUB(_SP, _SP, 4); + + // QuickCallFunction(R3, (void *)&DoubleTest); + // QuickCallFunction(R3, (void *)&ShowPC); // Fixed registers, these are always kept when in Jit context. // R13 cannot be used as it's the stack pointer. @@ -132,7 +148,9 @@ void Jit::GenerateFixedCode() BIC(R0, R0, Operand2(0xFC, 4)); CMP(R1, Operand2(MIPS_EMUHACK_OPCODE >> 24, 4)); SetCC(CC_EQ); - // IDEA - we have 24 bits, why not just use offsets from base of code? + // IDEA - we have 26 bits, why not just use offsets from base of code? + // Another idea: Shift the bloc number left by two in the op, this would let us do + // LDR(R0, R9, R0, true, true); here, replacing the two next instructions. ADD(R0, R9, Operand2(2, ST_LSL, R0)); LDR(R0, R0); B(R0); @@ -154,6 +172,8 @@ void Jit::GenerateFixedCode() breakpointBailout = GetCodePtr(); + ADD(_SP, _SP, 4); + POP(9, R4, R5, R6, R7, R8, R9, R10, R11, _PC); // Returns // Uncomment if you want to see the output... diff --git a/Core/MIPS/ARM/ArmCompLoadStore.cpp b/Core/MIPS/ARM/ArmCompLoadStore.cpp index 5479580c9..b310b8936 100644 --- a/Core/MIPS/ARM/ArmCompLoadStore.cpp +++ b/Core/MIPS/ARM/ArmCompLoadStore.cpp @@ -14,13 +14,35 @@ // Official git repository and contact information can be found at // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. + + +// Optimization ideas: +// +// It's common to see sequences of stores writing or reading to a contiguous set of +// addresses in function prologues/epilogues: +// sw s5, 104(sp) +// sw s4, 100(sp) +// sw s3, 96(sp) +// sw s2, 92(sp) +// sw s1, 88(sp) +// sw s0, 84(sp) +// sw ra, 108(sp) +// mov s4, a0 +// mov s3, a1 +// ... +// Such sequences could easily be detected and turned into nice contiguous +// sequences of ARM stores instead of the current 3 instructions per sw/lw. +// +// Also, if we kept track of the likely register content of a cached register, +// (pointer or data), we could avoid many BIC instructions. + + #include "../../MemMap.h" #include "../MIPSAnalyst.h" #include "../../Config.h" #include "ArmJit.h" #include "ArmRegCache.h" - #define _RS ((op>>21) & 0x1F) #define _RT ((op>>16) & 0x1F) #define _RD ((op>>11) & 0x1F) @@ -44,7 +66,7 @@ namespace MIPSComp // Don't load anything into $zr return; } - switch (o) + switch (o) { case 37: //R(rt) = ReadMem16(addr); break; //lhu Comp_Generic(op);