IR: Add mini native jit MIPS block profiler.

This commit is contained in:
Unknown W. Brackets 2023-09-10 09:49:14 -07:00
parent 06a1f0b72c
commit 9b2fa46861
17 changed files with 290 additions and 18 deletions

View File

@ -50,8 +50,18 @@ static void ShowPC(void *membase, void *jitbase) {
}
void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
BeginWrite(GetMemoryProtectPageSize());
// This will be used as a writable scratch area, always 32-bit accessible.
const u8 *start = AlignCodePage();
if (DebugProfilerEnabled()) {
ProtectMemoryPages(start, GetMemoryProtectPageSize(), MEM_PROT_READ | MEM_PROT_WRITE);
hooks_.profilerPC = (uint32_t *)GetWritableCodePtr();
Write32(0);
hooks_.profilerStatus = (IRProfilerStatus *)GetWritableCodePtr();
Write32(0);
}
const u8 *disasmStart = AlignCodePage();
BeginWrite(GetMemoryProtectPageSize());
if (jo.useStaticAlloc) {
saveStaticRegisters_ = AlignCode16();
@ -63,8 +73,6 @@ void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
regs_.EmitLoadStaticRegisters();
LDR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount));
RET();
start = saveStaticRegisters_;
} else {
saveStaticRegisters_ = nullptr;
loadStaticRegisters_ = nullptr;
@ -152,13 +160,17 @@ void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
MOVI2R(JITBASEREG, (intptr_t)GetBasePtr() - MIPS_EMUHACK_OPCODE);
LoadStaticRegisters();
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
MovFromPC(SCRATCH1);
WriteDebugPC(SCRATCH1);
outerLoopPCInSCRATCH1_ = GetCodePtr();
MovToPC(SCRATCH1);
outerLoop_ = GetCodePtr();
SaveStaticRegisters(); // Advance can change the downcount, so must save/restore
RestoreRoundingMode(true);
WriteDebugProfilerStatus(IRProfilerStatus::TIMER_ADVANCE);
QuickCallFunction(SCRATCH1_64, &CoreTiming::Advance);
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
ApplyRoundingMode(true);
LoadStaticRegisters();
@ -191,6 +203,7 @@ void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
}
MovFromPC(SCRATCH1);
WriteDebugPC(SCRATCH1);
#ifdef MASKED_PSP_MEMORY
ANDI2R(SCRATCH1, SCRATCH1, Memory::MEMVIEW32_MASK);
#endif
@ -206,7 +219,9 @@ void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
// No block found, let's jit. We don't need to save static regs, they're all callee saved.
RestoreRoundingMode(true);
WriteDebugProfilerStatus(IRProfilerStatus::COMPILING);
QuickCallFunction(SCRATCH1_64, &MIPSComp::JitAt);
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
ApplyRoundingMode(true);
// Let's just dispatch again, we'll enter the block since we know it's there.
@ -221,6 +236,7 @@ void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
const uint8_t *quitLoop = GetCodePtr();
SetJumpTarget(badCoreState);
WriteDebugProfilerStatus(IRProfilerStatus::NOT_RUNNING);
SaveStaticRegisters();
RestoreRoundingMode(true);
@ -251,7 +267,7 @@ void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
// Leave this at the end, add more stuff above.
if (enableDisasm) {
std::vector<std::string> lines = DisassembleArm64(start, (int)(GetCodePtr() - start));
std::vector<std::string> lines = DisassembleArm64(disasmStart, (int)(GetCodePtr() - disasmStart));
for (auto s : lines) {
INFO_LOG(JIT, "%s", s.c_str());
}

View File

@ -508,6 +508,8 @@ void Arm64JitBackend::CompIR_FSpecial(IRInst inst) {
auto callFuncF_F = [&](float (*func)(float)) {
regs_.FlushBeforeCall();
WriteDebugProfilerStatus(IRProfilerStatus::MATH_HELPER);
// It might be in a non-volatile register.
// TODO: May have to handle a transfer if SIMD here.
if (regs_.IsFPRMapped(inst.src1)) {
@ -527,6 +529,8 @@ void Arm64JitBackend::CompIR_FSpecial(IRInst inst) {
if (regs_.F(inst.dest) != S0) {
fp_.FMOV(regs_.F(inst.dest), S0);
}
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
};
switch (inst.op) {

View File

@ -210,6 +210,7 @@ void Arm64JitBackend::CompIR_System(IRInst inst) {
FlushAll();
SaveStaticRegisters();
WriteDebugProfilerStatus(IRProfilerStatus::SYSCALL);
#ifdef USE_PROFILER
// When profiling, we can't skip CallSyscall, since it times syscalls.
MOVI2R(W0, inst.constant);
@ -229,6 +230,7 @@ void Arm64JitBackend::CompIR_System(IRInst inst) {
}
#endif
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
LoadStaticRegisters();
// This is always followed by an ExitToPC, where we check coreState.
break;
@ -236,7 +238,9 @@ void Arm64JitBackend::CompIR_System(IRInst inst) {
case IROp::CallReplacement:
FlushAll();
SaveStaticRegisters();
WriteDebugProfilerStatus(IRProfilerStatus::REPLACEMENT);
QuickCallFunction(SCRATCH2_64, GetReplacementFunc(inst.constant)->replaceFunc);
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
LoadStaticRegisters();
SUB(DOWNCOUNTREG, DOWNCOUNTREG, W0);
break;

View File

@ -76,6 +76,8 @@ bool Arm64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload)
SetBlockCheckedOffset(block_num, (int)GetOffset(GetCodePointer()));
wroteCheckedOffset = true;
WriteDebugPC(startPC);
// Check the sign bit to check if negative.
FixupBranch normalEntry = TBZ(DOWNCOUNTREG, 31);
MOVI2R(SCRATCH1, startPC);
@ -129,6 +131,8 @@ bool Arm64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload)
}
if (jo.enableBlocklink && jo.useBackJump) {
WriteDebugPC(startPC);
// Small blocks are common, check if it's < 32KB long.
ptrdiff_t distance = blockStart - GetCodePointer();
if (distance >= -0x8000 && distance < 0x8000) {
@ -229,8 +233,10 @@ void Arm64JitBackend::CompIR_Generic(IRInst inst) {
FlushAll();
SaveStaticRegisters();
WriteDebugProfilerStatus(IRProfilerStatus::IR_INTERPRET);
MOVI2R(X0, value);
QuickCallFunction(SCRATCH2_64, &DoIRInst);
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
LoadStaticRegisters();
// We only need to check the return value if it's a potential exit.
@ -256,12 +262,14 @@ void Arm64JitBackend::CompIR_Interpret(IRInst inst) {
// IR protects us against this being a branching instruction (well, hopefully.)
FlushAll();
SaveStaticRegisters();
WriteDebugProfilerStatus(IRProfilerStatus::INTERPRET);
if (DebugStatsEnabled()) {
MOVP2R(X0, MIPSGetName(op));
QuickCallFunction(SCRATCH2_64, &NotifyMIPSInterpret);
}
MOVI2R(X0, inst.constant);
QuickCallFunction(SCRATCH2_64, MIPSGetInterpretFunc(op));
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
LoadStaticRegisters();
}
@ -354,6 +362,32 @@ void Arm64JitBackend::MovToPC(ARM64Reg r) {
STR(INDEX_UNSIGNED, r, CTXREG, offsetof(MIPSState, pc));
}
void Arm64JitBackend::WriteDebugPC(uint32_t pc) {
if (hooks_.profilerPC) {
int offset = (int)((const u8 *)hooks_.profilerPC - GetBasePtr());
MOVI2R(SCRATCH2, MIPS_EMUHACK_OPCODE + offset);
MOVI2R(SCRATCH1, pc);
STR(SCRATCH1, JITBASEREG, SCRATCH2);
}
}
void Arm64JitBackend::WriteDebugPC(ARM64Reg r) {
if (hooks_.profilerPC) {
int offset = (int)((const u8 *)hooks_.profilerPC - GetBasePtr());
MOVI2R(SCRATCH2, MIPS_EMUHACK_OPCODE + offset);
STR(r, JITBASEREG, SCRATCH2);
}
}
void Arm64JitBackend::WriteDebugProfilerStatus(IRProfilerStatus status) {
if (hooks_.profilerPC) {
int offset = (int)((const u8 *)hooks_.profilerStatus - GetBasePtr());
MOVI2R(SCRATCH2, MIPS_EMUHACK_OPCODE + offset);
MOVI2R(SCRATCH1, (int)status);
STR(SCRATCH1, JITBASEREG, SCRATCH2);
}
}
void Arm64JitBackend::SaveStaticRegisters() {
if (jo.useStaticAlloc) {
QuickCallFunction(SCRATCH2_64, saveStaticRegisters_);

View File

@ -57,6 +57,11 @@ private:
void UpdateRoundingMode(bool force = false);
void MovFromPC(Arm64Gen::ARM64Reg r);
void MovToPC(Arm64Gen::ARM64Reg r);
// Destroys SCRATCH2.
void WriteDebugPC(uint32_t pc);
void WriteDebugPC(Arm64Gen::ARM64Reg r);
// Destroys SCRATCH2.
void WriteDebugProfilerStatus(IRProfilerStatus status);
void SaveStaticRegisters();
void LoadStaticRegisters();

View File

@ -15,7 +15,9 @@
// Official git repository and contact information can be found at
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include <atomic>
#include <climits>
#include <thread>
#include "Common/Profiler/Profiler.h"
#include "Common/StringUtils.h"
#include "Common/TimeUtil.h"
@ -31,18 +33,57 @@ namespace MIPSComp {
// Compile time flag to enable debug stats for not compiled ops.
static constexpr bool enableDebugStats = false;
// Compile time flag for enabling the simple IR jit profiler.
static constexpr bool enableDebugProfiler = false;
// Used only for debugging when enableDebug is true above.
static std::map<uint8_t, int> debugSeenNotCompiledIR;
static std::map<const char *, int> debugSeenNotCompiled;
static std::map<std::pair<uint32_t, IRProfilerStatus>, int> debugSeenPCUsage;
static double lastDebugStatsLog = 0.0;
static constexpr double debugStatsFrequency = 5.0;
static std::thread debugProfilerThread;
std::atomic<bool> debugProfilerThreadStatus = false;
template <int N>
class IRProfilerTopValues {
public:
void Add(const std::pair<uint32_t, IRProfilerStatus> &v, int c) {
for (int i = 0; i < N; ++i) {
if (c > counts[i]) {
counts[i] = c;
values[i] = v;
return;
}
}
}
int counts[N]{};
std::pair<uint32_t, IRProfilerStatus> values[N]{};
};
const char *IRProfilerStatusToString(IRProfilerStatus s) {
switch (s) {
case IRProfilerStatus::NOT_RUNNING: return "NOT_RUNNING";
case IRProfilerStatus::IN_JIT: return "IN_JIT";
case IRProfilerStatus::TIMER_ADVANCE: return "TIMER_ADVANCE";
case IRProfilerStatus::COMPILING: return "COMPILING";
case IRProfilerStatus::MATH_HELPER: return "MATH_HELPER";
case IRProfilerStatus::REPLACEMENT: return "REPLACEMENT";
case IRProfilerStatus::SYSCALL: return "SYSCALL";
case IRProfilerStatus::INTERPRET: return "INTERPRET";
case IRProfilerStatus::IR_INTERPRET: return "IR_INTERPRET";
}
return "INVALID";
}
static void LogDebugStats() {
if (!enableDebugStats)
if (!enableDebugStats && !enableDebugProfiler)
return;
double now = time_now_d();
if (now < lastDebugStatsLog + 1.0)
if (now < lastDebugStatsLog + debugStatsFrequency)
return;
lastDebugStatsLog = now;
@ -66,16 +107,36 @@ static void LogDebugStats() {
}
debugSeenNotCompiled.clear();
IRProfilerTopValues<4> slowestPCs;
int64_t totalCount = 0;
for (auto it : debugSeenPCUsage) {
slowestPCs.Add(it.first, it.second);
totalCount += it.second;
}
debugSeenPCUsage.clear();
if (worstIROp != -1)
WARN_LOG(JIT, "Most not compiled IR op: %s (%d)", GetIRMeta((IROp)worstIROp)->name, worstIRVal);
if (worstName != nullptr)
WARN_LOG(JIT, "Most not compiled op: %s (%d)", worstName, worstVal);
if (slowestPCs.counts[0] != 0) {
for (int i = 0; i < 4; ++i) {
uint32_t pc = slowestPCs.values[i].first;
const char *status = IRProfilerStatusToString(slowestPCs.values[i].second);
const std::string label = g_symbolMap ? g_symbolMap->GetDescription(pc) : "";
WARN_LOG(JIT, "Slowest sampled PC #%d: %08x (%s)/%s (%f%%)", i, pc, label.c_str(), status, 100.0 * (double)slowestPCs.counts[i] / (double)totalCount);
}
}
}
bool IRNativeBackend::DebugStatsEnabled() const {
return enableDebugStats;
}
bool IRNativeBackend::DebugProfilerEnabled() const {
return enableDebugProfiler;
}
void IRNativeBackend::NotifyMIPSInterpret(const char *name) {
_assert_(enableDebugStats);
debugSeenNotCompiled[name]++;
@ -120,6 +181,13 @@ int IRNativeBackend::ReportBadAddress(uint32_t addr, uint32_t alignment, uint32_
IRNativeBackend::IRNativeBackend(IRBlockCache &blocks) : blocks_(blocks) {}
IRNativeBackend::~IRNativeBackend() {
if (debugProfilerThreadStatus) {
debugProfilerThreadStatus = false;
debugProfilerThread.join();
}
}
void IRNativeBackend::CompileIRInst(IRInst inst) {
switch (inst.op) {
case IROp::Nop:
@ -421,6 +489,20 @@ void IRNativeJit::Init(IRNativeBackend &backend) {
// Wanted this to be a reference, but vtbls get in the way. Shouldn't change.
hooks_ = backend.GetNativeHooks();
if (enableDebugProfiler && hooks_.profilerPC) {
debugProfilerThreadStatus = true;
debugProfilerThread = std::thread([&] {
// Spin, spin spin... maybe could at least hook into sleeps.
while (debugProfilerThreadStatus) {
IRProfilerStatus stat = *hooks_.profilerStatus;
uint32_t pc = *hooks_.profilerPC;
if (stat != IRProfilerStatus::NOT_RUNNING && stat != IRProfilerStatus::SYSCALL) {
debugSeenPCUsage[std::make_pair(pc, stat)]++;
}
}
});
}
}
bool IRNativeJit::CompileTargetBlock(IRBlock *block, int block_num, bool preload) {
@ -432,7 +514,7 @@ void IRNativeJit::FinalizeTargetBlock(IRBlock *block, int block_num) {
}
void IRNativeJit::RunLoopUntil(u64 globalticks) {
if constexpr (enableDebugStats) {
if constexpr (enableDebugStats || enableDebugProfiler) {
LogDebugStats();
}

View File

@ -25,12 +25,27 @@ namespace MIPSComp {
typedef void (*IRNativeFuncNoArg)();
enum class IRProfilerStatus : int32_t {
NOT_RUNNING,
IN_JIT,
TIMER_ADVANCE,
COMPILING,
MATH_HELPER,
REPLACEMENT,
SYSCALL,
INTERPRET,
IR_INTERPRET,
};
struct IRNativeHooks {
IRNativeFuncNoArg enterDispatcher = nullptr;
const uint8_t *dispatcher = nullptr;
const uint8_t *dispatchFetch = nullptr;
const uint8_t *crashHandler = nullptr;
uint32_t *profilerPC = nullptr;
IRProfilerStatus *profilerStatus = nullptr;
};
struct IRNativeBlockExit {
@ -47,7 +62,7 @@ struct IRNativeBlock {
class IRNativeBackend {
public:
IRNativeBackend(IRBlockCache &blocks);
virtual ~IRNativeBackend() {}
virtual ~IRNativeBackend();
void CompileIRInst(IRInst inst);
@ -120,6 +135,7 @@ protected:
// Returns true when debugging statistics should be compiled in.
bool DebugStatsEnabled() const;
bool DebugProfilerEnabled() const;
// Callback (compile when DebugStatsEnabled()) to log a base interpreter hit.
// Call the func returned by MIPSGetInterpretFunc(op) directly for interpret.

View File

@ -45,8 +45,19 @@ static void ShowPC(u32 downcount, void *membase, void *jitbase) {
}
void RiscVJitBackend::GenerateFixedCode(MIPSState *mipsState) {
BeginWrite(GetMemoryProtectPageSize());
// This will be used as a writable scratch area, always 32-bit accessible.
const u8 *start = AlignCodePage();
if (DebugProfilerEnabled()) {
ProtectMemoryPages(start, GetMemoryProtectPageSize(), MEM_PROT_READ | MEM_PROT_WRITE);
hooks_.profilerPC = (uint32_t *)GetWritableCodePtr();
*hooks_.profilerPC = 0;
hooks_.profilerStatus = (IRProfilerStatus *)GetWritableCodePtr() + 1;
*hooks_.profilerStatus = IRProfilerStatus::NOT_RUNNING;
SetCodePointer(GetCodePtr() + sizeof(uint32_t) * 2, GetWritableCodePtr() + sizeof(uint32_t) * 2);
}
const u8 *disasmStart = AlignCodePage();
BeginWrite(GetMemoryProtectPageSize());
if (jo.useStaticAlloc) {
saveStaticRegisters_ = AlignCode16();
@ -58,8 +69,6 @@ void RiscVJitBackend::GenerateFixedCode(MIPSState *mipsState) {
regs_.EmitLoadStaticRegisters();
LW(DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount));
RET();
start = saveStaticRegisters_;
} else {
saveStaticRegisters_ = nullptr;
loadStaticRegisters_ = nullptr;
@ -124,14 +133,18 @@ void RiscVJitBackend::GenerateFixedCode(MIPSState *mipsState) {
LI(JITBASEREG, GetBasePtr() - MIPS_EMUHACK_OPCODE, SCRATCH1);
LoadStaticRegisters();
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
MovFromPC(SCRATCH1);
WriteDebugPC(SCRATCH1);
outerLoopPCInSCRATCH1_ = GetCodePtr();
MovToPC(SCRATCH1);
outerLoop_ = GetCodePtr();
// Advance can change the downcount (or thread), so must save/restore around it.
SaveStaticRegisters();
RestoreRoundingMode(true);
WriteDebugProfilerStatus(IRProfilerStatus::TIMER_ADVANCE);
QuickCallFunction(&CoreTiming::Advance, X7);
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
ApplyRoundingMode(true);
LoadStaticRegisters();
@ -162,6 +175,7 @@ void RiscVJitBackend::GenerateFixedCode(MIPSState *mipsState) {
}
LWU(SCRATCH1, CTXREG, offsetof(MIPSState, pc));
WriteDebugPC(SCRATCH1);
#ifdef MASKED_PSP_MEMORY
LI(SCRATCH2, 0x3FFFFFFF);
AND(SCRATCH1, SCRATCH1, SCRATCH2);
@ -180,7 +194,9 @@ void RiscVJitBackend::GenerateFixedCode(MIPSState *mipsState) {
// No block found, let's jit. We don't need to save static regs, they're all callee saved.
RestoreRoundingMode(true);
WriteDebugProfilerStatus(IRProfilerStatus::COMPILING);
QuickCallFunction(&MIPSComp::JitAt, X7);
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
ApplyRoundingMode(true);
// Try again, the block index should be set now.
@ -195,6 +211,7 @@ void RiscVJitBackend::GenerateFixedCode(MIPSState *mipsState) {
const uint8_t *quitLoop = GetCodePtr();
SetJumpTarget(badCoreState);
WriteDebugProfilerStatus(IRProfilerStatus::NOT_RUNNING);
SaveStaticRegisters();
RestoreRoundingMode(true);

View File

@ -585,6 +585,8 @@ void RiscVJitBackend::CompIR_FSpecial(IRInst inst) {
auto callFuncF_F = [&](float (*func)(float)) {
regs_.FlushBeforeCall();
WriteDebugProfilerStatus(IRProfilerStatus::MATH_HELPER);
// It might be in a non-volatile register.
// TODO: May have to handle a transfer if SIMD here.
if (regs_.IsFPRMapped(inst.src1)) {
@ -600,6 +602,8 @@ void RiscVJitBackend::CompIR_FSpecial(IRInst inst) {
if (regs_.F(inst.dest) != F10) {
FMV(32, regs_.F(inst.dest), F10);
}
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
};
RiscVReg tempReg = INVALID_REG;

View File

@ -188,6 +188,7 @@ void RiscVJitBackend::CompIR_System(IRInst inst) {
FlushAll();
SaveStaticRegisters();
WriteDebugProfilerStatus(IRProfilerStatus::SYSCALL);
#ifdef USE_PROFILER
// When profiling, we can't skip CallSyscall, since it times syscalls.
LI(X10, (int32_t)inst.constant);
@ -207,6 +208,7 @@ void RiscVJitBackend::CompIR_System(IRInst inst) {
}
#endif
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
LoadStaticRegisters();
// This is always followed by an ExitToPC, where we check coreState.
break;
@ -214,7 +216,9 @@ void RiscVJitBackend::CompIR_System(IRInst inst) {
case IROp::CallReplacement:
FlushAll();
SaveStaticRegisters();
WriteDebugProfilerStatus(IRProfilerStatus::REPLACEMENT);
QuickCallFunction(GetReplacementFunc(inst.constant)->replaceFunc, SCRATCH2);
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
LoadStaticRegisters();
SUB(DOWNCOUNTREG, DOWNCOUNTREG, X10);
break;

View File

@ -67,6 +67,8 @@ bool RiscVJitBackend::CompileBlock(IRBlock *block, int block_num, bool preload)
SetBlockCheckedOffset(block_num, (int)GetOffset(GetCodePointer()));
wroteCheckedOffset = true;
WriteDebugPC(startPC);
FixupBranch normalEntry = BGE(DOWNCOUNTREG, R_ZERO);
LI(SCRATCH1, startPC);
QuickJ(R_RA, outerLoopPCInSCRATCH1_);
@ -118,6 +120,8 @@ bool RiscVJitBackend::CompileBlock(IRBlock *block, int block_num, bool preload)
}
if (jo.enableBlocklink && jo.useBackJump) {
WriteDebugPC(startPC);
// Most blocks shouldn't be >= 4KB, so usually we can just BGE.
if (BInRange(blockStart)) {
BGE(DOWNCOUNTREG, R_ZERO, blockStart);
@ -218,7 +222,9 @@ void RiscVJitBackend::CompIR_Generic(IRInst inst) {
FlushAll();
LI(X10, value, SCRATCH2);
SaveStaticRegisters();
WriteDebugProfilerStatus(IRProfilerStatus::IR_INTERPRET);
QuickCallFunction(&DoIRInst, SCRATCH2);
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
LoadStaticRegisters();
// We only need to check the return value if it's a potential exit.
@ -241,12 +247,14 @@ void RiscVJitBackend::CompIR_Interpret(IRInst inst) {
// IR protects us against this being a branching instruction (well, hopefully.)
FlushAll();
SaveStaticRegisters();
WriteDebugProfilerStatus(IRProfilerStatus::INTERPRET);
if (DebugStatsEnabled()) {
LI(X10, MIPSGetName(op));
QuickCallFunction(&NotifyMIPSInterpret, SCRATCH2);
}
LI(X10, (int32_t)inst.constant);
QuickCallFunction((const u8 *)MIPSGetInterpretFunc(op), SCRATCH2);
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
LoadStaticRegisters();
}
@ -329,6 +337,32 @@ void RiscVJitBackend::MovToPC(RiscVReg r) {
SW(r, CTXREG, offsetof(MIPSState, pc));
}
void RiscVJitBackend::WriteDebugPC(uint32_t pc) {
if (hooks_.profilerPC) {
int offset = (const u8 *)hooks_.profilerPC - GetBasePtr();
LI(SCRATCH2, hooks_.profilerPC);
LI(R_RA, (int32_t)pc);
SW(R_RA, SCRATCH2, 0);
}
}
void RiscVJitBackend::WriteDebugPC(RiscVReg r) {
if (hooks_.profilerPC) {
int offset = (const u8 *)hooks_.profilerPC - GetBasePtr();
LI(SCRATCH2, hooks_.profilerPC);
SW(r, SCRATCH2, 0);
}
}
void RiscVJitBackend::WriteDebugProfilerStatus(IRProfilerStatus status) {
if (hooks_.profilerPC) {
int offset = (const u8 *)hooks_.profilerStatus - GetBasePtr();
LI(SCRATCH2, hooks_.profilerStatus);
LI(R_RA, (int)status);
SW(R_RA, SCRATCH2, 0);
}
}
void RiscVJitBackend::SaveStaticRegisters() {
if (jo.useStaticAlloc) {
QuickCallFunction(saveStaticRegisters_);

View File

@ -50,6 +50,9 @@ private:
void ApplyRoundingMode(bool force = false);
void MovFromPC(RiscVGen::RiscVReg r);
void MovToPC(RiscVGen::RiscVReg r);
void WriteDebugPC(uint32_t pc);
void WriteDebugPC(RiscVGen::RiscVReg r);
void WriteDebugProfilerStatus(IRProfilerStatus status);
void SaveStaticRegisters();
void LoadStaticRegisters();

View File

@ -49,8 +49,21 @@ static void ShowPC(void *membase, void *jitbase) {
}
void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
BeginWrite(GetMemoryProtectPageSize());
// This will be used as a writable scratch area, always 32-bit accessible.
const u8 *start = AlignCodePage();
if (DebugProfilerEnabled()) {
ProtectMemoryPages(start, GetMemoryProtectPageSize(), MEM_PROT_READ | MEM_PROT_WRITE);
hooks_.profilerPC = (uint32_t *)GetWritableCodePtr();
Write32(0);
hooks_.profilerStatus = (IRProfilerStatus *)GetWritableCodePtr();
Write32(0);
}
EmitFPUConstants();
EmitVecConstants();
const u8 *disasmStart = AlignCodePage();
BeginWrite(GetMemoryProtectPageSize());
jo.downcountInRegister = false;
#if PPSSPP_ARCH(AMD64)
@ -83,8 +96,6 @@ void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
if (jo.downcountInRegister)
MOV(32, R(DOWNCOUNTREG), MDisp(CTXREG, downcountOffset));
RET();
start = saveStaticRegisters_;
} else {
saveStaticRegisters_ = nullptr;
loadStaticRegisters_ = nullptr;
@ -146,14 +157,18 @@ void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
MOV(PTRBITS, R(CTXREG), ImmPtr(&mipsState->f[0]));
LoadStaticRegisters();
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
MovFromPC(SCRATCH1);
WriteDebugPC(SCRATCH1);
outerLoopPCInSCRATCH1_ = GetCodePtr();
MovToPC(SCRATCH1);
outerLoop_ = GetCodePtr();
// Advance can change the downcount (or thread), so must save/restore around it.
SaveStaticRegisters();
RestoreRoundingMode(true);
WriteDebugProfilerStatus(IRProfilerStatus::TIMER_ADVANCE);
ABI_CallFunction(reinterpret_cast<void *>(&CoreTiming::Advance));
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
ApplyRoundingMode(true);
LoadStaticRegisters();
@ -209,6 +224,7 @@ void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
}
MovFromPC(SCRATCH1);
WriteDebugPC(SCRATCH1);
#ifdef MASKED_PSP_MEMORY
AND(32, R(SCRATCH1), Imm32(Memory::MEMVIEW32_MASK));
#endif
@ -247,7 +263,9 @@ void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
// No block found, let's jit. We don't need to save static regs, they're all callee saved.
RestoreRoundingMode(true);
WriteDebugProfilerStatus(IRProfilerStatus::COMPILING);
ABI_CallFunction(&MIPSComp::JitAt);
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
ApplyRoundingMode(true);
// Let's just dispatch again, we'll enter the block since we know it's there.
JMP(dispatcherNoCheck_, true);
@ -265,6 +283,7 @@ void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
const uint8_t *quitLoop = GetCodePtr();
SetJumpTarget(badCoreState);
WriteDebugProfilerStatus(IRProfilerStatus::NOT_RUNNING);
SaveStaticRegisters();
RestoreRoundingMode(true);
ABI_PopAllCalleeSavedRegsAndAdjustStack();
@ -283,16 +302,13 @@ void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
// Leave this at the end, add more stuff above.
if (enableDisasm) {
#if PPSSPP_ARCH(AMD64)
std::vector<std::string> lines = DisassembleX86(start, (int)(GetCodePtr() - start));
std::vector<std::string> lines = DisassembleX86(disasmStart, (int)(GetCodePtr() - disasmStart));
for (auto s : lines) {
INFO_LOG(JIT, "%s", s.c_str());
}
#endif
}
EmitFPUConstants();
EmitVecConstants();
// Let's spare the pre-generated code from unprotect-reprotect.
AlignCodePage();
jitStartOffset_ = (int)(GetCodePtr() - start);

View File

@ -972,6 +972,7 @@ void X64JitBackend::CompIR_FSpecial(IRInst inst) {
auto callFuncF_F = [&](const void *func) {
regs_.FlushBeforeCall();
WriteDebugProfilerStatus(IRProfilerStatus::MATH_HELPER);
#if X64JIT_USE_XMM_CALL
if (regs_.IsFPRMapped(inst.src1)) {
@ -1004,6 +1005,8 @@ void X64JitBackend::CompIR_FSpecial(IRInst inst) {
regs_.MapFPR(inst.dest, MIPSMap::NOINIT);
MOVD_xmm(regs_.FX(inst.dest), R(SCRATCH1));
#endif
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
};
switch (inst.op) {

View File

@ -203,6 +203,7 @@ void X64JitBackend::CompIR_System(IRInst inst) {
FlushAll();
SaveStaticRegisters();
WriteDebugProfilerStatus(IRProfilerStatus::SYSCALL);
#ifdef USE_PROFILER
// When profiling, we can't skip CallSyscall, since it times syscalls.
ABI_CallFunctionC((const u8 *)&CallSyscall, inst.constant);
@ -219,6 +220,7 @@ void X64JitBackend::CompIR_System(IRInst inst) {
}
#endif
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
LoadStaticRegisters();
// This is always followed by an ExitToPC, where we check coreState.
break;
@ -226,7 +228,9 @@ void X64JitBackend::CompIR_System(IRInst inst) {
case IROp::CallReplacement:
FlushAll();
SaveStaticRegisters();
WriteDebugProfilerStatus(IRProfilerStatus::REPLACEMENT);
ABI_CallFunction(GetReplacementFunc(inst.constant)->replaceFunc);
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
LoadStaticRegisters();
//SUB(32, R(DOWNCOUNTREG), R(DOWNCOUNTREG), R(EAX));
SUB(32, MDisp(CTXREG, downcountOffset), R(EAX));

View File

@ -64,6 +64,8 @@ bool X64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload) {
SetBlockCheckedOffset(block_num, (int)GetOffset(GetCodePointer()));
wroteCheckedOffset = true;
WriteDebugPC(startPC);
// TODO: See if we can get flags to always have the downcount compare.
if (jo.downcountInRegister) {
TEST(32, R(DOWNCOUNTREG), R(DOWNCOUNTREG));
@ -122,6 +124,8 @@ bool X64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload) {
}
if (jo.enableBlocklink && jo.useBackJump) {
WriteDebugPC(startPC);
if (jo.downcountInRegister) {
TEST(32, R(DOWNCOUNTREG), R(DOWNCOUNTREG));
} else {
@ -216,11 +220,13 @@ void X64JitBackend::CompIR_Generic(IRInst inst) {
FlushAll();
SaveStaticRegisters();
WriteDebugProfilerStatus(IRProfilerStatus::IR_INTERPRET);
#if PPSSPP_ARCH(AMD64)
ABI_CallFunctionP((const void *)&DoIRInst, (void *)value);
#else
ABI_CallFunctionCC((const void *)&DoIRInst, (u32)(value & 0xFFFFFFFF), (u32)(value >> 32));
#endif
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
LoadStaticRegisters();
// We only need to check the return value if it's a potential exit.
@ -238,10 +244,12 @@ void X64JitBackend::CompIR_Interpret(IRInst inst) {
// IR protects us against this being a branching instruction (well, hopefully.)
FlushAll();
SaveStaticRegisters();
WriteDebugProfilerStatus(IRProfilerStatus::INTERPRET);
if (DebugStatsEnabled()) {
ABI_CallFunctionP((const void *)&NotifyMIPSInterpret, (void *)MIPSGetName(op));
}
ABI_CallFunctionC((const void *)MIPSGetInterpretFunc(op), inst.constant);
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
LoadStaticRegisters();
}
@ -346,6 +354,21 @@ void X64JitBackend::MovToPC(X64Reg r) {
MOV(32, MDisp(CTXREG, pcOffset), R(r));
}
void X64JitBackend::WriteDebugPC(uint32_t pc) {
if (hooks_.profilerPC)
MOV(32, M(hooks_.profilerPC), Imm32(pc));
}
void X64JitBackend::WriteDebugPC(Gen::X64Reg r) {
if (hooks_.profilerPC)
MOV(32, M(hooks_.profilerPC), R(r));
}
void X64JitBackend::WriteDebugProfilerStatus(IRProfilerStatus status) {
if (hooks_.profilerPC)
MOV(32, M(hooks_.profilerStatus), Imm32((int32_t)status));
}
void X64JitBackend::SaveStaticRegisters() {
if (jo.useStaticAlloc) {
//CALL(saveStaticRegisters_);

View File

@ -66,6 +66,9 @@ private:
void ApplyRoundingMode(bool force = false);
void MovFromPC(Gen::X64Reg r);
void MovToPC(Gen::X64Reg r);
void WriteDebugPC(uint32_t pc);
void WriteDebugPC(Gen::X64Reg r);
void WriteDebugProfilerStatus(IRProfilerStatus status);
void SaveStaticRegisters();
void LoadStaticRegisters();