diff --git a/Core/MIPS/ARM64/Arm64IRAsm.cpp b/Core/MIPS/ARM64/Arm64IRAsm.cpp index 42bee863a2..d623c6cd58 100644 --- a/Core/MIPS/ARM64/Arm64IRAsm.cpp +++ b/Core/MIPS/ARM64/Arm64IRAsm.cpp @@ -50,8 +50,18 @@ static void ShowPC(void *membase, void *jitbase) { } void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) { - BeginWrite(GetMemoryProtectPageSize()); + // This will be used as a writable scratch area, always 32-bit accessible. const u8 *start = AlignCodePage(); + if (DebugProfilerEnabled()) { + ProtectMemoryPages(start, GetMemoryProtectPageSize(), MEM_PROT_READ | MEM_PROT_WRITE); + hooks_.profilerPC = (uint32_t *)GetWritableCodePtr(); + Write32(0); + hooks_.profilerStatus = (IRProfilerStatus *)GetWritableCodePtr(); + Write32(0); + } + + const u8 *disasmStart = AlignCodePage(); + BeginWrite(GetMemoryProtectPageSize()); if (jo.useStaticAlloc) { saveStaticRegisters_ = AlignCode16(); @@ -63,8 +73,6 @@ void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) { regs_.EmitLoadStaticRegisters(); LDR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount)); RET(); - - start = saveStaticRegisters_; } else { saveStaticRegisters_ = nullptr; loadStaticRegisters_ = nullptr; @@ -152,13 +160,17 @@ void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) { MOVI2R(JITBASEREG, (intptr_t)GetBasePtr() - MIPS_EMUHACK_OPCODE); LoadStaticRegisters(); + WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT); MovFromPC(SCRATCH1); + WriteDebugPC(SCRATCH1); outerLoopPCInSCRATCH1_ = GetCodePtr(); MovToPC(SCRATCH1); outerLoop_ = GetCodePtr(); SaveStaticRegisters(); // Advance can change the downcount, so must save/restore RestoreRoundingMode(true); + WriteDebugProfilerStatus(IRProfilerStatus::TIMER_ADVANCE); QuickCallFunction(SCRATCH1_64, &CoreTiming::Advance); + WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT); ApplyRoundingMode(true); LoadStaticRegisters(); @@ -191,6 +203,7 @@ void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) { } MovFromPC(SCRATCH1); + WriteDebugPC(SCRATCH1); #ifdef MASKED_PSP_MEMORY ANDI2R(SCRATCH1, SCRATCH1, Memory::MEMVIEW32_MASK); #endif @@ -206,7 +219,9 @@ void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) { // No block found, let's jit. We don't need to save static regs, they're all callee saved. RestoreRoundingMode(true); + WriteDebugProfilerStatus(IRProfilerStatus::COMPILING); QuickCallFunction(SCRATCH1_64, &MIPSComp::JitAt); + WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT); ApplyRoundingMode(true); // Let's just dispatch again, we'll enter the block since we know it's there. @@ -221,6 +236,7 @@ void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) { const uint8_t *quitLoop = GetCodePtr(); SetJumpTarget(badCoreState); + WriteDebugProfilerStatus(IRProfilerStatus::NOT_RUNNING); SaveStaticRegisters(); RestoreRoundingMode(true); @@ -251,7 +267,7 @@ void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) { // Leave this at the end, add more stuff above. if (enableDisasm) { - std::vector lines = DisassembleArm64(start, (int)(GetCodePtr() - start)); + std::vector lines = DisassembleArm64(disasmStart, (int)(GetCodePtr() - disasmStart)); for (auto s : lines) { INFO_LOG(JIT, "%s", s.c_str()); } diff --git a/Core/MIPS/ARM64/Arm64IRCompFPU.cpp b/Core/MIPS/ARM64/Arm64IRCompFPU.cpp index 03b9effd26..99b502c74b 100644 --- a/Core/MIPS/ARM64/Arm64IRCompFPU.cpp +++ b/Core/MIPS/ARM64/Arm64IRCompFPU.cpp @@ -508,6 +508,8 @@ void Arm64JitBackend::CompIR_FSpecial(IRInst inst) { auto callFuncF_F = [&](float (*func)(float)) { regs_.FlushBeforeCall(); + WriteDebugProfilerStatus(IRProfilerStatus::MATH_HELPER); + // It might be in a non-volatile register. // TODO: May have to handle a transfer if SIMD here. if (regs_.IsFPRMapped(inst.src1)) { @@ -527,6 +529,8 @@ void Arm64JitBackend::CompIR_FSpecial(IRInst inst) { if (regs_.F(inst.dest) != S0) { fp_.FMOV(regs_.F(inst.dest), S0); } + + WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT); }; switch (inst.op) { diff --git a/Core/MIPS/ARM64/Arm64IRCompSystem.cpp b/Core/MIPS/ARM64/Arm64IRCompSystem.cpp index 7544a0cc03..8fba3c3205 100644 --- a/Core/MIPS/ARM64/Arm64IRCompSystem.cpp +++ b/Core/MIPS/ARM64/Arm64IRCompSystem.cpp @@ -210,6 +210,7 @@ void Arm64JitBackend::CompIR_System(IRInst inst) { FlushAll(); SaveStaticRegisters(); + WriteDebugProfilerStatus(IRProfilerStatus::SYSCALL); #ifdef USE_PROFILER // When profiling, we can't skip CallSyscall, since it times syscalls. MOVI2R(W0, inst.constant); @@ -229,6 +230,7 @@ void Arm64JitBackend::CompIR_System(IRInst inst) { } #endif + WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT); LoadStaticRegisters(); // This is always followed by an ExitToPC, where we check coreState. break; @@ -236,7 +238,9 @@ void Arm64JitBackend::CompIR_System(IRInst inst) { case IROp::CallReplacement: FlushAll(); SaveStaticRegisters(); + WriteDebugProfilerStatus(IRProfilerStatus::REPLACEMENT); QuickCallFunction(SCRATCH2_64, GetReplacementFunc(inst.constant)->replaceFunc); + WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT); LoadStaticRegisters(); SUB(DOWNCOUNTREG, DOWNCOUNTREG, W0); break; diff --git a/Core/MIPS/ARM64/Arm64IRJit.cpp b/Core/MIPS/ARM64/Arm64IRJit.cpp index 88cee8f012..ab7692dcc5 100644 --- a/Core/MIPS/ARM64/Arm64IRJit.cpp +++ b/Core/MIPS/ARM64/Arm64IRJit.cpp @@ -76,6 +76,8 @@ bool Arm64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload) SetBlockCheckedOffset(block_num, (int)GetOffset(GetCodePointer())); wroteCheckedOffset = true; + WriteDebugPC(startPC); + // Check the sign bit to check if negative. FixupBranch normalEntry = TBZ(DOWNCOUNTREG, 31); MOVI2R(SCRATCH1, startPC); @@ -129,6 +131,8 @@ bool Arm64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload) } if (jo.enableBlocklink && jo.useBackJump) { + WriteDebugPC(startPC); + // Small blocks are common, check if it's < 32KB long. ptrdiff_t distance = blockStart - GetCodePointer(); if (distance >= -0x8000 && distance < 0x8000) { @@ -229,8 +233,10 @@ void Arm64JitBackend::CompIR_Generic(IRInst inst) { FlushAll(); SaveStaticRegisters(); + WriteDebugProfilerStatus(IRProfilerStatus::IR_INTERPRET); MOVI2R(X0, value); QuickCallFunction(SCRATCH2_64, &DoIRInst); + WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT); LoadStaticRegisters(); // We only need to check the return value if it's a potential exit. @@ -256,12 +262,14 @@ void Arm64JitBackend::CompIR_Interpret(IRInst inst) { // IR protects us against this being a branching instruction (well, hopefully.) FlushAll(); SaveStaticRegisters(); + WriteDebugProfilerStatus(IRProfilerStatus::INTERPRET); if (DebugStatsEnabled()) { MOVP2R(X0, MIPSGetName(op)); QuickCallFunction(SCRATCH2_64, &NotifyMIPSInterpret); } MOVI2R(X0, inst.constant); QuickCallFunction(SCRATCH2_64, MIPSGetInterpretFunc(op)); + WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT); LoadStaticRegisters(); } @@ -354,6 +362,32 @@ void Arm64JitBackend::MovToPC(ARM64Reg r) { STR(INDEX_UNSIGNED, r, CTXREG, offsetof(MIPSState, pc)); } +void Arm64JitBackend::WriteDebugPC(uint32_t pc) { + if (hooks_.profilerPC) { + int offset = (int)((const u8 *)hooks_.profilerPC - GetBasePtr()); + MOVI2R(SCRATCH2, MIPS_EMUHACK_OPCODE + offset); + MOVI2R(SCRATCH1, pc); + STR(SCRATCH1, JITBASEREG, SCRATCH2); + } +} + +void Arm64JitBackend::WriteDebugPC(ARM64Reg r) { + if (hooks_.profilerPC) { + int offset = (int)((const u8 *)hooks_.profilerPC - GetBasePtr()); + MOVI2R(SCRATCH2, MIPS_EMUHACK_OPCODE + offset); + STR(r, JITBASEREG, SCRATCH2); + } +} + +void Arm64JitBackend::WriteDebugProfilerStatus(IRProfilerStatus status) { + if (hooks_.profilerPC) { + int offset = (int)((const u8 *)hooks_.profilerStatus - GetBasePtr()); + MOVI2R(SCRATCH2, MIPS_EMUHACK_OPCODE + offset); + MOVI2R(SCRATCH1, (int)status); + STR(SCRATCH1, JITBASEREG, SCRATCH2); + } +} + void Arm64JitBackend::SaveStaticRegisters() { if (jo.useStaticAlloc) { QuickCallFunction(SCRATCH2_64, saveStaticRegisters_); diff --git a/Core/MIPS/ARM64/Arm64IRJit.h b/Core/MIPS/ARM64/Arm64IRJit.h index c33d289a14..055e525565 100644 --- a/Core/MIPS/ARM64/Arm64IRJit.h +++ b/Core/MIPS/ARM64/Arm64IRJit.h @@ -57,6 +57,11 @@ private: void UpdateRoundingMode(bool force = false); void MovFromPC(Arm64Gen::ARM64Reg r); void MovToPC(Arm64Gen::ARM64Reg r); + // Destroys SCRATCH2. + void WriteDebugPC(uint32_t pc); + void WriteDebugPC(Arm64Gen::ARM64Reg r); + // Destroys SCRATCH2. + void WriteDebugProfilerStatus(IRProfilerStatus status); void SaveStaticRegisters(); void LoadStaticRegisters(); diff --git a/Core/MIPS/IR/IRNativeCommon.cpp b/Core/MIPS/IR/IRNativeCommon.cpp index aa8751b945..784d0c7ae2 100644 --- a/Core/MIPS/IR/IRNativeCommon.cpp +++ b/Core/MIPS/IR/IRNativeCommon.cpp @@ -15,7 +15,9 @@ // Official git repository and contact information can be found at // https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/. +#include #include +#include #include "Common/Profiler/Profiler.h" #include "Common/StringUtils.h" #include "Common/TimeUtil.h" @@ -31,18 +33,57 @@ namespace MIPSComp { // Compile time flag to enable debug stats for not compiled ops. static constexpr bool enableDebugStats = false; +// Compile time flag for enabling the simple IR jit profiler. +static constexpr bool enableDebugProfiler = false; // Used only for debugging when enableDebug is true above. static std::map debugSeenNotCompiledIR; static std::map debugSeenNotCompiled; +static std::map, int> debugSeenPCUsage; static double lastDebugStatsLog = 0.0; +static constexpr double debugStatsFrequency = 5.0; + +static std::thread debugProfilerThread; +std::atomic debugProfilerThreadStatus = false; + +template +class IRProfilerTopValues { +public: + void Add(const std::pair &v, int c) { + for (int i = 0; i < N; ++i) { + if (c > counts[i]) { + counts[i] = c; + values[i] = v; + return; + } + } + } + + int counts[N]{}; + std::pair values[N]{}; +}; + +const char *IRProfilerStatusToString(IRProfilerStatus s) { + switch (s) { + case IRProfilerStatus::NOT_RUNNING: return "NOT_RUNNING"; + case IRProfilerStatus::IN_JIT: return "IN_JIT"; + case IRProfilerStatus::TIMER_ADVANCE: return "TIMER_ADVANCE"; + case IRProfilerStatus::COMPILING: return "COMPILING"; + case IRProfilerStatus::MATH_HELPER: return "MATH_HELPER"; + case IRProfilerStatus::REPLACEMENT: return "REPLACEMENT"; + case IRProfilerStatus::SYSCALL: return "SYSCALL"; + case IRProfilerStatus::INTERPRET: return "INTERPRET"; + case IRProfilerStatus::IR_INTERPRET: return "IR_INTERPRET"; + } + return "INVALID"; +} static void LogDebugStats() { - if (!enableDebugStats) + if (!enableDebugStats && !enableDebugProfiler) return; double now = time_now_d(); - if (now < lastDebugStatsLog + 1.0) + if (now < lastDebugStatsLog + debugStatsFrequency) return; lastDebugStatsLog = now; @@ -66,16 +107,36 @@ static void LogDebugStats() { } debugSeenNotCompiled.clear(); + IRProfilerTopValues<4> slowestPCs; + int64_t totalCount = 0; + for (auto it : debugSeenPCUsage) { + slowestPCs.Add(it.first, it.second); + totalCount += it.second; + } + debugSeenPCUsage.clear(); + if (worstIROp != -1) WARN_LOG(JIT, "Most not compiled IR op: %s (%d)", GetIRMeta((IROp)worstIROp)->name, worstIRVal); if (worstName != nullptr) WARN_LOG(JIT, "Most not compiled op: %s (%d)", worstName, worstVal); + if (slowestPCs.counts[0] != 0) { + for (int i = 0; i < 4; ++i) { + uint32_t pc = slowestPCs.values[i].first; + const char *status = IRProfilerStatusToString(slowestPCs.values[i].second); + const std::string label = g_symbolMap ? g_symbolMap->GetDescription(pc) : ""; + WARN_LOG(JIT, "Slowest sampled PC #%d: %08x (%s)/%s (%f%%)", i, pc, label.c_str(), status, 100.0 * (double)slowestPCs.counts[i] / (double)totalCount); + } + } } bool IRNativeBackend::DebugStatsEnabled() const { return enableDebugStats; } +bool IRNativeBackend::DebugProfilerEnabled() const { + return enableDebugProfiler; +} + void IRNativeBackend::NotifyMIPSInterpret(const char *name) { _assert_(enableDebugStats); debugSeenNotCompiled[name]++; @@ -120,6 +181,13 @@ int IRNativeBackend::ReportBadAddress(uint32_t addr, uint32_t alignment, uint32_ IRNativeBackend::IRNativeBackend(IRBlockCache &blocks) : blocks_(blocks) {} +IRNativeBackend::~IRNativeBackend() { + if (debugProfilerThreadStatus) { + debugProfilerThreadStatus = false; + debugProfilerThread.join(); + } +} + void IRNativeBackend::CompileIRInst(IRInst inst) { switch (inst.op) { case IROp::Nop: @@ -421,6 +489,20 @@ void IRNativeJit::Init(IRNativeBackend &backend) { // Wanted this to be a reference, but vtbls get in the way. Shouldn't change. hooks_ = backend.GetNativeHooks(); + + if (enableDebugProfiler && hooks_.profilerPC) { + debugProfilerThreadStatus = true; + debugProfilerThread = std::thread([&] { + // Spin, spin spin... maybe could at least hook into sleeps. + while (debugProfilerThreadStatus) { + IRProfilerStatus stat = *hooks_.profilerStatus; + uint32_t pc = *hooks_.profilerPC; + if (stat != IRProfilerStatus::NOT_RUNNING && stat != IRProfilerStatus::SYSCALL) { + debugSeenPCUsage[std::make_pair(pc, stat)]++; + } + } + }); + } } bool IRNativeJit::CompileTargetBlock(IRBlock *block, int block_num, bool preload) { @@ -432,7 +514,7 @@ void IRNativeJit::FinalizeTargetBlock(IRBlock *block, int block_num) { } void IRNativeJit::RunLoopUntil(u64 globalticks) { - if constexpr (enableDebugStats) { + if constexpr (enableDebugStats || enableDebugProfiler) { LogDebugStats(); } diff --git a/Core/MIPS/IR/IRNativeCommon.h b/Core/MIPS/IR/IRNativeCommon.h index e93786ec07..4afc503698 100644 --- a/Core/MIPS/IR/IRNativeCommon.h +++ b/Core/MIPS/IR/IRNativeCommon.h @@ -25,12 +25,27 @@ namespace MIPSComp { typedef void (*IRNativeFuncNoArg)(); +enum class IRProfilerStatus : int32_t { + NOT_RUNNING, + IN_JIT, + TIMER_ADVANCE, + COMPILING, + MATH_HELPER, + REPLACEMENT, + SYSCALL, + INTERPRET, + IR_INTERPRET, +}; + struct IRNativeHooks { IRNativeFuncNoArg enterDispatcher = nullptr; const uint8_t *dispatcher = nullptr; const uint8_t *dispatchFetch = nullptr; const uint8_t *crashHandler = nullptr; + + uint32_t *profilerPC = nullptr; + IRProfilerStatus *profilerStatus = nullptr; }; struct IRNativeBlockExit { @@ -47,7 +62,7 @@ struct IRNativeBlock { class IRNativeBackend { public: IRNativeBackend(IRBlockCache &blocks); - virtual ~IRNativeBackend() {} + virtual ~IRNativeBackend(); void CompileIRInst(IRInst inst); @@ -120,6 +135,7 @@ protected: // Returns true when debugging statistics should be compiled in. bool DebugStatsEnabled() const; + bool DebugProfilerEnabled() const; // Callback (compile when DebugStatsEnabled()) to log a base interpreter hit. // Call the func returned by MIPSGetInterpretFunc(op) directly for interpret. diff --git a/Core/MIPS/RiscV/RiscVAsm.cpp b/Core/MIPS/RiscV/RiscVAsm.cpp index 135e0604e8..730a6d9dcc 100644 --- a/Core/MIPS/RiscV/RiscVAsm.cpp +++ b/Core/MIPS/RiscV/RiscVAsm.cpp @@ -45,8 +45,19 @@ static void ShowPC(u32 downcount, void *membase, void *jitbase) { } void RiscVJitBackend::GenerateFixedCode(MIPSState *mipsState) { - BeginWrite(GetMemoryProtectPageSize()); + // This will be used as a writable scratch area, always 32-bit accessible. const u8 *start = AlignCodePage(); + if (DebugProfilerEnabled()) { + ProtectMemoryPages(start, GetMemoryProtectPageSize(), MEM_PROT_READ | MEM_PROT_WRITE); + hooks_.profilerPC = (uint32_t *)GetWritableCodePtr(); + *hooks_.profilerPC = 0; + hooks_.profilerStatus = (IRProfilerStatus *)GetWritableCodePtr() + 1; + *hooks_.profilerStatus = IRProfilerStatus::NOT_RUNNING; + SetCodePointer(GetCodePtr() + sizeof(uint32_t) * 2, GetWritableCodePtr() + sizeof(uint32_t) * 2); + } + + const u8 *disasmStart = AlignCodePage(); + BeginWrite(GetMemoryProtectPageSize()); if (jo.useStaticAlloc) { saveStaticRegisters_ = AlignCode16(); @@ -58,8 +69,6 @@ void RiscVJitBackend::GenerateFixedCode(MIPSState *mipsState) { regs_.EmitLoadStaticRegisters(); LW(DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount)); RET(); - - start = saveStaticRegisters_; } else { saveStaticRegisters_ = nullptr; loadStaticRegisters_ = nullptr; @@ -124,14 +133,18 @@ void RiscVJitBackend::GenerateFixedCode(MIPSState *mipsState) { LI(JITBASEREG, GetBasePtr() - MIPS_EMUHACK_OPCODE, SCRATCH1); LoadStaticRegisters(); + WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT); MovFromPC(SCRATCH1); + WriteDebugPC(SCRATCH1); outerLoopPCInSCRATCH1_ = GetCodePtr(); MovToPC(SCRATCH1); outerLoop_ = GetCodePtr(); // Advance can change the downcount (or thread), so must save/restore around it. SaveStaticRegisters(); RestoreRoundingMode(true); + WriteDebugProfilerStatus(IRProfilerStatus::TIMER_ADVANCE); QuickCallFunction(&CoreTiming::Advance, X7); + WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT); ApplyRoundingMode(true); LoadStaticRegisters(); @@ -162,6 +175,7 @@ void RiscVJitBackend::GenerateFixedCode(MIPSState *mipsState) { } LWU(SCRATCH1, CTXREG, offsetof(MIPSState, pc)); + WriteDebugPC(SCRATCH1); #ifdef MASKED_PSP_MEMORY LI(SCRATCH2, 0x3FFFFFFF); AND(SCRATCH1, SCRATCH1, SCRATCH2); @@ -180,7 +194,9 @@ void RiscVJitBackend::GenerateFixedCode(MIPSState *mipsState) { // No block found, let's jit. We don't need to save static regs, they're all callee saved. RestoreRoundingMode(true); + WriteDebugProfilerStatus(IRProfilerStatus::COMPILING); QuickCallFunction(&MIPSComp::JitAt, X7); + WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT); ApplyRoundingMode(true); // Try again, the block index should be set now. @@ -195,6 +211,7 @@ void RiscVJitBackend::GenerateFixedCode(MIPSState *mipsState) { const uint8_t *quitLoop = GetCodePtr(); SetJumpTarget(badCoreState); + WriteDebugProfilerStatus(IRProfilerStatus::NOT_RUNNING); SaveStaticRegisters(); RestoreRoundingMode(true); diff --git a/Core/MIPS/RiscV/RiscVCompFPU.cpp b/Core/MIPS/RiscV/RiscVCompFPU.cpp index 2c56ce5962..132ef8e58c 100644 --- a/Core/MIPS/RiscV/RiscVCompFPU.cpp +++ b/Core/MIPS/RiscV/RiscVCompFPU.cpp @@ -585,6 +585,8 @@ void RiscVJitBackend::CompIR_FSpecial(IRInst inst) { auto callFuncF_F = [&](float (*func)(float)) { regs_.FlushBeforeCall(); + WriteDebugProfilerStatus(IRProfilerStatus::MATH_HELPER); + // It might be in a non-volatile register. // TODO: May have to handle a transfer if SIMD here. if (regs_.IsFPRMapped(inst.src1)) { @@ -600,6 +602,8 @@ void RiscVJitBackend::CompIR_FSpecial(IRInst inst) { if (regs_.F(inst.dest) != F10) { FMV(32, regs_.F(inst.dest), F10); } + + WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT); }; RiscVReg tempReg = INVALID_REG; diff --git a/Core/MIPS/RiscV/RiscVCompSystem.cpp b/Core/MIPS/RiscV/RiscVCompSystem.cpp index 878687e64d..4605648ed8 100644 --- a/Core/MIPS/RiscV/RiscVCompSystem.cpp +++ b/Core/MIPS/RiscV/RiscVCompSystem.cpp @@ -188,6 +188,7 @@ void RiscVJitBackend::CompIR_System(IRInst inst) { FlushAll(); SaveStaticRegisters(); + WriteDebugProfilerStatus(IRProfilerStatus::SYSCALL); #ifdef USE_PROFILER // When profiling, we can't skip CallSyscall, since it times syscalls. LI(X10, (int32_t)inst.constant); @@ -207,6 +208,7 @@ void RiscVJitBackend::CompIR_System(IRInst inst) { } #endif + WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT); LoadStaticRegisters(); // This is always followed by an ExitToPC, where we check coreState. break; @@ -214,7 +216,9 @@ void RiscVJitBackend::CompIR_System(IRInst inst) { case IROp::CallReplacement: FlushAll(); SaveStaticRegisters(); + WriteDebugProfilerStatus(IRProfilerStatus::REPLACEMENT); QuickCallFunction(GetReplacementFunc(inst.constant)->replaceFunc, SCRATCH2); + WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT); LoadStaticRegisters(); SUB(DOWNCOUNTREG, DOWNCOUNTREG, X10); break; diff --git a/Core/MIPS/RiscV/RiscVJit.cpp b/Core/MIPS/RiscV/RiscVJit.cpp index 8d3f0155c3..be4a453482 100644 --- a/Core/MIPS/RiscV/RiscVJit.cpp +++ b/Core/MIPS/RiscV/RiscVJit.cpp @@ -67,6 +67,8 @@ bool RiscVJitBackend::CompileBlock(IRBlock *block, int block_num, bool preload) SetBlockCheckedOffset(block_num, (int)GetOffset(GetCodePointer())); wroteCheckedOffset = true; + WriteDebugPC(startPC); + FixupBranch normalEntry = BGE(DOWNCOUNTREG, R_ZERO); LI(SCRATCH1, startPC); QuickJ(R_RA, outerLoopPCInSCRATCH1_); @@ -118,6 +120,8 @@ bool RiscVJitBackend::CompileBlock(IRBlock *block, int block_num, bool preload) } if (jo.enableBlocklink && jo.useBackJump) { + WriteDebugPC(startPC); + // Most blocks shouldn't be >= 4KB, so usually we can just BGE. if (BInRange(blockStart)) { BGE(DOWNCOUNTREG, R_ZERO, blockStart); @@ -218,7 +222,9 @@ void RiscVJitBackend::CompIR_Generic(IRInst inst) { FlushAll(); LI(X10, value, SCRATCH2); SaveStaticRegisters(); + WriteDebugProfilerStatus(IRProfilerStatus::IR_INTERPRET); QuickCallFunction(&DoIRInst, SCRATCH2); + WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT); LoadStaticRegisters(); // We only need to check the return value if it's a potential exit. @@ -241,12 +247,14 @@ void RiscVJitBackend::CompIR_Interpret(IRInst inst) { // IR protects us against this being a branching instruction (well, hopefully.) FlushAll(); SaveStaticRegisters(); + WriteDebugProfilerStatus(IRProfilerStatus::INTERPRET); if (DebugStatsEnabled()) { LI(X10, MIPSGetName(op)); QuickCallFunction(&NotifyMIPSInterpret, SCRATCH2); } LI(X10, (int32_t)inst.constant); QuickCallFunction((const u8 *)MIPSGetInterpretFunc(op), SCRATCH2); + WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT); LoadStaticRegisters(); } @@ -329,6 +337,32 @@ void RiscVJitBackend::MovToPC(RiscVReg r) { SW(r, CTXREG, offsetof(MIPSState, pc)); } +void RiscVJitBackend::WriteDebugPC(uint32_t pc) { + if (hooks_.profilerPC) { + int offset = (const u8 *)hooks_.profilerPC - GetBasePtr(); + LI(SCRATCH2, hooks_.profilerPC); + LI(R_RA, (int32_t)pc); + SW(R_RA, SCRATCH2, 0); + } +} + +void RiscVJitBackend::WriteDebugPC(RiscVReg r) { + if (hooks_.profilerPC) { + int offset = (const u8 *)hooks_.profilerPC - GetBasePtr(); + LI(SCRATCH2, hooks_.profilerPC); + SW(r, SCRATCH2, 0); + } +} + +void RiscVJitBackend::WriteDebugProfilerStatus(IRProfilerStatus status) { + if (hooks_.profilerPC) { + int offset = (const u8 *)hooks_.profilerStatus - GetBasePtr(); + LI(SCRATCH2, hooks_.profilerStatus); + LI(R_RA, (int)status); + SW(R_RA, SCRATCH2, 0); + } +} + void RiscVJitBackend::SaveStaticRegisters() { if (jo.useStaticAlloc) { QuickCallFunction(saveStaticRegisters_); diff --git a/Core/MIPS/RiscV/RiscVJit.h b/Core/MIPS/RiscV/RiscVJit.h index 46448c4c71..7ccbcce90b 100644 --- a/Core/MIPS/RiscV/RiscVJit.h +++ b/Core/MIPS/RiscV/RiscVJit.h @@ -50,6 +50,9 @@ private: void ApplyRoundingMode(bool force = false); void MovFromPC(RiscVGen::RiscVReg r); void MovToPC(RiscVGen::RiscVReg r); + void WriteDebugPC(uint32_t pc); + void WriteDebugPC(RiscVGen::RiscVReg r); + void WriteDebugProfilerStatus(IRProfilerStatus status); void SaveStaticRegisters(); void LoadStaticRegisters(); diff --git a/Core/MIPS/x86/X64IRAsm.cpp b/Core/MIPS/x86/X64IRAsm.cpp index 5267c1022a..2e095c4c12 100644 --- a/Core/MIPS/x86/X64IRAsm.cpp +++ b/Core/MIPS/x86/X64IRAsm.cpp @@ -49,8 +49,21 @@ static void ShowPC(void *membase, void *jitbase) { } void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) { - BeginWrite(GetMemoryProtectPageSize()); + // This will be used as a writable scratch area, always 32-bit accessible. const u8 *start = AlignCodePage(); + if (DebugProfilerEnabled()) { + ProtectMemoryPages(start, GetMemoryProtectPageSize(), MEM_PROT_READ | MEM_PROT_WRITE); + hooks_.profilerPC = (uint32_t *)GetWritableCodePtr(); + Write32(0); + hooks_.profilerStatus = (IRProfilerStatus *)GetWritableCodePtr(); + Write32(0); + } + + EmitFPUConstants(); + EmitVecConstants(); + + const u8 *disasmStart = AlignCodePage(); + BeginWrite(GetMemoryProtectPageSize()); jo.downcountInRegister = false; #if PPSSPP_ARCH(AMD64) @@ -83,8 +96,6 @@ void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) { if (jo.downcountInRegister) MOV(32, R(DOWNCOUNTREG), MDisp(CTXREG, downcountOffset)); RET(); - - start = saveStaticRegisters_; } else { saveStaticRegisters_ = nullptr; loadStaticRegisters_ = nullptr; @@ -146,14 +157,18 @@ void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) { MOV(PTRBITS, R(CTXREG), ImmPtr(&mipsState->f[0])); LoadStaticRegisters(); + WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT); MovFromPC(SCRATCH1); + WriteDebugPC(SCRATCH1); outerLoopPCInSCRATCH1_ = GetCodePtr(); MovToPC(SCRATCH1); outerLoop_ = GetCodePtr(); // Advance can change the downcount (or thread), so must save/restore around it. SaveStaticRegisters(); RestoreRoundingMode(true); + WriteDebugProfilerStatus(IRProfilerStatus::TIMER_ADVANCE); ABI_CallFunction(reinterpret_cast(&CoreTiming::Advance)); + WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT); ApplyRoundingMode(true); LoadStaticRegisters(); @@ -209,6 +224,7 @@ void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) { } MovFromPC(SCRATCH1); + WriteDebugPC(SCRATCH1); #ifdef MASKED_PSP_MEMORY AND(32, R(SCRATCH1), Imm32(Memory::MEMVIEW32_MASK)); #endif @@ -247,7 +263,9 @@ void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) { // No block found, let's jit. We don't need to save static regs, they're all callee saved. RestoreRoundingMode(true); + WriteDebugProfilerStatus(IRProfilerStatus::COMPILING); ABI_CallFunction(&MIPSComp::JitAt); + WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT); ApplyRoundingMode(true); // Let's just dispatch again, we'll enter the block since we know it's there. JMP(dispatcherNoCheck_, true); @@ -265,6 +283,7 @@ void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) { const uint8_t *quitLoop = GetCodePtr(); SetJumpTarget(badCoreState); + WriteDebugProfilerStatus(IRProfilerStatus::NOT_RUNNING); SaveStaticRegisters(); RestoreRoundingMode(true); ABI_PopAllCalleeSavedRegsAndAdjustStack(); @@ -283,16 +302,13 @@ void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) { // Leave this at the end, add more stuff above. if (enableDisasm) { #if PPSSPP_ARCH(AMD64) - std::vector lines = DisassembleX86(start, (int)(GetCodePtr() - start)); + std::vector lines = DisassembleX86(disasmStart, (int)(GetCodePtr() - disasmStart)); for (auto s : lines) { INFO_LOG(JIT, "%s", s.c_str()); } #endif } - EmitFPUConstants(); - EmitVecConstants(); - // Let's spare the pre-generated code from unprotect-reprotect. AlignCodePage(); jitStartOffset_ = (int)(GetCodePtr() - start); diff --git a/Core/MIPS/x86/X64IRCompFPU.cpp b/Core/MIPS/x86/X64IRCompFPU.cpp index d1b23d6356..13f9075f11 100644 --- a/Core/MIPS/x86/X64IRCompFPU.cpp +++ b/Core/MIPS/x86/X64IRCompFPU.cpp @@ -972,6 +972,7 @@ void X64JitBackend::CompIR_FSpecial(IRInst inst) { auto callFuncF_F = [&](const void *func) { regs_.FlushBeforeCall(); + WriteDebugProfilerStatus(IRProfilerStatus::MATH_HELPER); #if X64JIT_USE_XMM_CALL if (regs_.IsFPRMapped(inst.src1)) { @@ -1004,6 +1005,8 @@ void X64JitBackend::CompIR_FSpecial(IRInst inst) { regs_.MapFPR(inst.dest, MIPSMap::NOINIT); MOVD_xmm(regs_.FX(inst.dest), R(SCRATCH1)); #endif + + WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT); }; switch (inst.op) { diff --git a/Core/MIPS/x86/X64IRCompSystem.cpp b/Core/MIPS/x86/X64IRCompSystem.cpp index 92c8bafd75..9d1723aef5 100644 --- a/Core/MIPS/x86/X64IRCompSystem.cpp +++ b/Core/MIPS/x86/X64IRCompSystem.cpp @@ -203,6 +203,7 @@ void X64JitBackend::CompIR_System(IRInst inst) { FlushAll(); SaveStaticRegisters(); + WriteDebugProfilerStatus(IRProfilerStatus::SYSCALL); #ifdef USE_PROFILER // When profiling, we can't skip CallSyscall, since it times syscalls. ABI_CallFunctionC((const u8 *)&CallSyscall, inst.constant); @@ -219,6 +220,7 @@ void X64JitBackend::CompIR_System(IRInst inst) { } #endif + WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT); LoadStaticRegisters(); // This is always followed by an ExitToPC, where we check coreState. break; @@ -226,7 +228,9 @@ void X64JitBackend::CompIR_System(IRInst inst) { case IROp::CallReplacement: FlushAll(); SaveStaticRegisters(); + WriteDebugProfilerStatus(IRProfilerStatus::REPLACEMENT); ABI_CallFunction(GetReplacementFunc(inst.constant)->replaceFunc); + WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT); LoadStaticRegisters(); //SUB(32, R(DOWNCOUNTREG), R(DOWNCOUNTREG), R(EAX)); SUB(32, MDisp(CTXREG, downcountOffset), R(EAX)); diff --git a/Core/MIPS/x86/X64IRJit.cpp b/Core/MIPS/x86/X64IRJit.cpp index 6c1a9966bc..98279e3989 100644 --- a/Core/MIPS/x86/X64IRJit.cpp +++ b/Core/MIPS/x86/X64IRJit.cpp @@ -64,6 +64,8 @@ bool X64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload) { SetBlockCheckedOffset(block_num, (int)GetOffset(GetCodePointer())); wroteCheckedOffset = true; + WriteDebugPC(startPC); + // TODO: See if we can get flags to always have the downcount compare. if (jo.downcountInRegister) { TEST(32, R(DOWNCOUNTREG), R(DOWNCOUNTREG)); @@ -122,6 +124,8 @@ bool X64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload) { } if (jo.enableBlocklink && jo.useBackJump) { + WriteDebugPC(startPC); + if (jo.downcountInRegister) { TEST(32, R(DOWNCOUNTREG), R(DOWNCOUNTREG)); } else { @@ -216,11 +220,13 @@ void X64JitBackend::CompIR_Generic(IRInst inst) { FlushAll(); SaveStaticRegisters(); + WriteDebugProfilerStatus(IRProfilerStatus::IR_INTERPRET); #if PPSSPP_ARCH(AMD64) ABI_CallFunctionP((const void *)&DoIRInst, (void *)value); #else ABI_CallFunctionCC((const void *)&DoIRInst, (u32)(value & 0xFFFFFFFF), (u32)(value >> 32)); #endif + WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT); LoadStaticRegisters(); // We only need to check the return value if it's a potential exit. @@ -238,10 +244,12 @@ void X64JitBackend::CompIR_Interpret(IRInst inst) { // IR protects us against this being a branching instruction (well, hopefully.) FlushAll(); SaveStaticRegisters(); + WriteDebugProfilerStatus(IRProfilerStatus::INTERPRET); if (DebugStatsEnabled()) { ABI_CallFunctionP((const void *)&NotifyMIPSInterpret, (void *)MIPSGetName(op)); } ABI_CallFunctionC((const void *)MIPSGetInterpretFunc(op), inst.constant); + WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT); LoadStaticRegisters(); } @@ -346,6 +354,21 @@ void X64JitBackend::MovToPC(X64Reg r) { MOV(32, MDisp(CTXREG, pcOffset), R(r)); } +void X64JitBackend::WriteDebugPC(uint32_t pc) { + if (hooks_.profilerPC) + MOV(32, M(hooks_.profilerPC), Imm32(pc)); +} + +void X64JitBackend::WriteDebugPC(Gen::X64Reg r) { + if (hooks_.profilerPC) + MOV(32, M(hooks_.profilerPC), R(r)); +} + +void X64JitBackend::WriteDebugProfilerStatus(IRProfilerStatus status) { + if (hooks_.profilerPC) + MOV(32, M(hooks_.profilerStatus), Imm32((int32_t)status)); +} + void X64JitBackend::SaveStaticRegisters() { if (jo.useStaticAlloc) { //CALL(saveStaticRegisters_); diff --git a/Core/MIPS/x86/X64IRJit.h b/Core/MIPS/x86/X64IRJit.h index 43a70ff50a..15a2fb9b44 100644 --- a/Core/MIPS/x86/X64IRJit.h +++ b/Core/MIPS/x86/X64IRJit.h @@ -66,6 +66,9 @@ private: void ApplyRoundingMode(bool force = false); void MovFromPC(Gen::X64Reg r); void MovToPC(Gen::X64Reg r); + void WriteDebugPC(uint32_t pc); + void WriteDebugPC(Gen::X64Reg r); + void WriteDebugProfilerStatus(IRProfilerStatus status); void SaveStaticRegisters(); void LoadStaticRegisters();