mirror of
https://github.com/hrydgard/ppsspp.git
synced 2025-03-04 04:07:08 +00:00
IR: Add mini native jit MIPS block profiler.
This commit is contained in:
parent
06a1f0b72c
commit
9b2fa46861
@ -50,8 +50,18 @@ static void ShowPC(void *membase, void *jitbase) {
|
||||
}
|
||||
|
||||
void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
|
||||
BeginWrite(GetMemoryProtectPageSize());
|
||||
// This will be used as a writable scratch area, always 32-bit accessible.
|
||||
const u8 *start = AlignCodePage();
|
||||
if (DebugProfilerEnabled()) {
|
||||
ProtectMemoryPages(start, GetMemoryProtectPageSize(), MEM_PROT_READ | MEM_PROT_WRITE);
|
||||
hooks_.profilerPC = (uint32_t *)GetWritableCodePtr();
|
||||
Write32(0);
|
||||
hooks_.profilerStatus = (IRProfilerStatus *)GetWritableCodePtr();
|
||||
Write32(0);
|
||||
}
|
||||
|
||||
const u8 *disasmStart = AlignCodePage();
|
||||
BeginWrite(GetMemoryProtectPageSize());
|
||||
|
||||
if (jo.useStaticAlloc) {
|
||||
saveStaticRegisters_ = AlignCode16();
|
||||
@ -63,8 +73,6 @@ void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
|
||||
regs_.EmitLoadStaticRegisters();
|
||||
LDR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount));
|
||||
RET();
|
||||
|
||||
start = saveStaticRegisters_;
|
||||
} else {
|
||||
saveStaticRegisters_ = nullptr;
|
||||
loadStaticRegisters_ = nullptr;
|
||||
@ -152,13 +160,17 @@ void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
|
||||
MOVI2R(JITBASEREG, (intptr_t)GetBasePtr() - MIPS_EMUHACK_OPCODE);
|
||||
|
||||
LoadStaticRegisters();
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
|
||||
MovFromPC(SCRATCH1);
|
||||
WriteDebugPC(SCRATCH1);
|
||||
outerLoopPCInSCRATCH1_ = GetCodePtr();
|
||||
MovToPC(SCRATCH1);
|
||||
outerLoop_ = GetCodePtr();
|
||||
SaveStaticRegisters(); // Advance can change the downcount, so must save/restore
|
||||
RestoreRoundingMode(true);
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::TIMER_ADVANCE);
|
||||
QuickCallFunction(SCRATCH1_64, &CoreTiming::Advance);
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
|
||||
ApplyRoundingMode(true);
|
||||
LoadStaticRegisters();
|
||||
|
||||
@ -191,6 +203,7 @@ void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
|
||||
}
|
||||
|
||||
MovFromPC(SCRATCH1);
|
||||
WriteDebugPC(SCRATCH1);
|
||||
#ifdef MASKED_PSP_MEMORY
|
||||
ANDI2R(SCRATCH1, SCRATCH1, Memory::MEMVIEW32_MASK);
|
||||
#endif
|
||||
@ -206,7 +219,9 @@ void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
|
||||
|
||||
// No block found, let's jit. We don't need to save static regs, they're all callee saved.
|
||||
RestoreRoundingMode(true);
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::COMPILING);
|
||||
QuickCallFunction(SCRATCH1_64, &MIPSComp::JitAt);
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
|
||||
ApplyRoundingMode(true);
|
||||
|
||||
// Let's just dispatch again, we'll enter the block since we know it's there.
|
||||
@ -221,6 +236,7 @@ void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
|
||||
const uint8_t *quitLoop = GetCodePtr();
|
||||
SetJumpTarget(badCoreState);
|
||||
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::NOT_RUNNING);
|
||||
SaveStaticRegisters();
|
||||
RestoreRoundingMode(true);
|
||||
|
||||
@ -251,7 +267,7 @@ void Arm64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
|
||||
|
||||
// Leave this at the end, add more stuff above.
|
||||
if (enableDisasm) {
|
||||
std::vector<std::string> lines = DisassembleArm64(start, (int)(GetCodePtr() - start));
|
||||
std::vector<std::string> lines = DisassembleArm64(disasmStart, (int)(GetCodePtr() - disasmStart));
|
||||
for (auto s : lines) {
|
||||
INFO_LOG(JIT, "%s", s.c_str());
|
||||
}
|
||||
|
@ -508,6 +508,8 @@ void Arm64JitBackend::CompIR_FSpecial(IRInst inst) {
|
||||
|
||||
auto callFuncF_F = [&](float (*func)(float)) {
|
||||
regs_.FlushBeforeCall();
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::MATH_HELPER);
|
||||
|
||||
// It might be in a non-volatile register.
|
||||
// TODO: May have to handle a transfer if SIMD here.
|
||||
if (regs_.IsFPRMapped(inst.src1)) {
|
||||
@ -527,6 +529,8 @@ void Arm64JitBackend::CompIR_FSpecial(IRInst inst) {
|
||||
if (regs_.F(inst.dest) != S0) {
|
||||
fp_.FMOV(regs_.F(inst.dest), S0);
|
||||
}
|
||||
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
|
||||
};
|
||||
|
||||
switch (inst.op) {
|
||||
|
@ -210,6 +210,7 @@ void Arm64JitBackend::CompIR_System(IRInst inst) {
|
||||
FlushAll();
|
||||
SaveStaticRegisters();
|
||||
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::SYSCALL);
|
||||
#ifdef USE_PROFILER
|
||||
// When profiling, we can't skip CallSyscall, since it times syscalls.
|
||||
MOVI2R(W0, inst.constant);
|
||||
@ -229,6 +230,7 @@ void Arm64JitBackend::CompIR_System(IRInst inst) {
|
||||
}
|
||||
#endif
|
||||
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
|
||||
LoadStaticRegisters();
|
||||
// This is always followed by an ExitToPC, where we check coreState.
|
||||
break;
|
||||
@ -236,7 +238,9 @@ void Arm64JitBackend::CompIR_System(IRInst inst) {
|
||||
case IROp::CallReplacement:
|
||||
FlushAll();
|
||||
SaveStaticRegisters();
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::REPLACEMENT);
|
||||
QuickCallFunction(SCRATCH2_64, GetReplacementFunc(inst.constant)->replaceFunc);
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
|
||||
LoadStaticRegisters();
|
||||
SUB(DOWNCOUNTREG, DOWNCOUNTREG, W0);
|
||||
break;
|
||||
|
@ -76,6 +76,8 @@ bool Arm64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload)
|
||||
SetBlockCheckedOffset(block_num, (int)GetOffset(GetCodePointer()));
|
||||
wroteCheckedOffset = true;
|
||||
|
||||
WriteDebugPC(startPC);
|
||||
|
||||
// Check the sign bit to check if negative.
|
||||
FixupBranch normalEntry = TBZ(DOWNCOUNTREG, 31);
|
||||
MOVI2R(SCRATCH1, startPC);
|
||||
@ -129,6 +131,8 @@ bool Arm64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload)
|
||||
}
|
||||
|
||||
if (jo.enableBlocklink && jo.useBackJump) {
|
||||
WriteDebugPC(startPC);
|
||||
|
||||
// Small blocks are common, check if it's < 32KB long.
|
||||
ptrdiff_t distance = blockStart - GetCodePointer();
|
||||
if (distance >= -0x8000 && distance < 0x8000) {
|
||||
@ -229,8 +233,10 @@ void Arm64JitBackend::CompIR_Generic(IRInst inst) {
|
||||
|
||||
FlushAll();
|
||||
SaveStaticRegisters();
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IR_INTERPRET);
|
||||
MOVI2R(X0, value);
|
||||
QuickCallFunction(SCRATCH2_64, &DoIRInst);
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
|
||||
LoadStaticRegisters();
|
||||
|
||||
// We only need to check the return value if it's a potential exit.
|
||||
@ -256,12 +262,14 @@ void Arm64JitBackend::CompIR_Interpret(IRInst inst) {
|
||||
// IR protects us against this being a branching instruction (well, hopefully.)
|
||||
FlushAll();
|
||||
SaveStaticRegisters();
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::INTERPRET);
|
||||
if (DebugStatsEnabled()) {
|
||||
MOVP2R(X0, MIPSGetName(op));
|
||||
QuickCallFunction(SCRATCH2_64, &NotifyMIPSInterpret);
|
||||
}
|
||||
MOVI2R(X0, inst.constant);
|
||||
QuickCallFunction(SCRATCH2_64, MIPSGetInterpretFunc(op));
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
|
||||
LoadStaticRegisters();
|
||||
}
|
||||
|
||||
@ -354,6 +362,32 @@ void Arm64JitBackend::MovToPC(ARM64Reg r) {
|
||||
STR(INDEX_UNSIGNED, r, CTXREG, offsetof(MIPSState, pc));
|
||||
}
|
||||
|
||||
void Arm64JitBackend::WriteDebugPC(uint32_t pc) {
|
||||
if (hooks_.profilerPC) {
|
||||
int offset = (int)((const u8 *)hooks_.profilerPC - GetBasePtr());
|
||||
MOVI2R(SCRATCH2, MIPS_EMUHACK_OPCODE + offset);
|
||||
MOVI2R(SCRATCH1, pc);
|
||||
STR(SCRATCH1, JITBASEREG, SCRATCH2);
|
||||
}
|
||||
}
|
||||
|
||||
void Arm64JitBackend::WriteDebugPC(ARM64Reg r) {
|
||||
if (hooks_.profilerPC) {
|
||||
int offset = (int)((const u8 *)hooks_.profilerPC - GetBasePtr());
|
||||
MOVI2R(SCRATCH2, MIPS_EMUHACK_OPCODE + offset);
|
||||
STR(r, JITBASEREG, SCRATCH2);
|
||||
}
|
||||
}
|
||||
|
||||
void Arm64JitBackend::WriteDebugProfilerStatus(IRProfilerStatus status) {
|
||||
if (hooks_.profilerPC) {
|
||||
int offset = (int)((const u8 *)hooks_.profilerStatus - GetBasePtr());
|
||||
MOVI2R(SCRATCH2, MIPS_EMUHACK_OPCODE + offset);
|
||||
MOVI2R(SCRATCH1, (int)status);
|
||||
STR(SCRATCH1, JITBASEREG, SCRATCH2);
|
||||
}
|
||||
}
|
||||
|
||||
void Arm64JitBackend::SaveStaticRegisters() {
|
||||
if (jo.useStaticAlloc) {
|
||||
QuickCallFunction(SCRATCH2_64, saveStaticRegisters_);
|
||||
|
@ -57,6 +57,11 @@ private:
|
||||
void UpdateRoundingMode(bool force = false);
|
||||
void MovFromPC(Arm64Gen::ARM64Reg r);
|
||||
void MovToPC(Arm64Gen::ARM64Reg r);
|
||||
// Destroys SCRATCH2.
|
||||
void WriteDebugPC(uint32_t pc);
|
||||
void WriteDebugPC(Arm64Gen::ARM64Reg r);
|
||||
// Destroys SCRATCH2.
|
||||
void WriteDebugProfilerStatus(IRProfilerStatus status);
|
||||
|
||||
void SaveStaticRegisters();
|
||||
void LoadStaticRegisters();
|
||||
|
@ -15,7 +15,9 @@
|
||||
// Official git repository and contact information can be found at
|
||||
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
||||
|
||||
#include <atomic>
|
||||
#include <climits>
|
||||
#include <thread>
|
||||
#include "Common/Profiler/Profiler.h"
|
||||
#include "Common/StringUtils.h"
|
||||
#include "Common/TimeUtil.h"
|
||||
@ -31,18 +33,57 @@ namespace MIPSComp {
|
||||
|
||||
// Compile time flag to enable debug stats for not compiled ops.
|
||||
static constexpr bool enableDebugStats = false;
|
||||
// Compile time flag for enabling the simple IR jit profiler.
|
||||
static constexpr bool enableDebugProfiler = false;
|
||||
|
||||
// Used only for debugging when enableDebug is true above.
|
||||
static std::map<uint8_t, int> debugSeenNotCompiledIR;
|
||||
static std::map<const char *, int> debugSeenNotCompiled;
|
||||
static std::map<std::pair<uint32_t, IRProfilerStatus>, int> debugSeenPCUsage;
|
||||
static double lastDebugStatsLog = 0.0;
|
||||
static constexpr double debugStatsFrequency = 5.0;
|
||||
|
||||
static std::thread debugProfilerThread;
|
||||
std::atomic<bool> debugProfilerThreadStatus = false;
|
||||
|
||||
template <int N>
|
||||
class IRProfilerTopValues {
|
||||
public:
|
||||
void Add(const std::pair<uint32_t, IRProfilerStatus> &v, int c) {
|
||||
for (int i = 0; i < N; ++i) {
|
||||
if (c > counts[i]) {
|
||||
counts[i] = c;
|
||||
values[i] = v;
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
int counts[N]{};
|
||||
std::pair<uint32_t, IRProfilerStatus> values[N]{};
|
||||
};
|
||||
|
||||
const char *IRProfilerStatusToString(IRProfilerStatus s) {
|
||||
switch (s) {
|
||||
case IRProfilerStatus::NOT_RUNNING: return "NOT_RUNNING";
|
||||
case IRProfilerStatus::IN_JIT: return "IN_JIT";
|
||||
case IRProfilerStatus::TIMER_ADVANCE: return "TIMER_ADVANCE";
|
||||
case IRProfilerStatus::COMPILING: return "COMPILING";
|
||||
case IRProfilerStatus::MATH_HELPER: return "MATH_HELPER";
|
||||
case IRProfilerStatus::REPLACEMENT: return "REPLACEMENT";
|
||||
case IRProfilerStatus::SYSCALL: return "SYSCALL";
|
||||
case IRProfilerStatus::INTERPRET: return "INTERPRET";
|
||||
case IRProfilerStatus::IR_INTERPRET: return "IR_INTERPRET";
|
||||
}
|
||||
return "INVALID";
|
||||
}
|
||||
|
||||
static void LogDebugStats() {
|
||||
if (!enableDebugStats)
|
||||
if (!enableDebugStats && !enableDebugProfiler)
|
||||
return;
|
||||
|
||||
double now = time_now_d();
|
||||
if (now < lastDebugStatsLog + 1.0)
|
||||
if (now < lastDebugStatsLog + debugStatsFrequency)
|
||||
return;
|
||||
lastDebugStatsLog = now;
|
||||
|
||||
@ -66,16 +107,36 @@ static void LogDebugStats() {
|
||||
}
|
||||
debugSeenNotCompiled.clear();
|
||||
|
||||
IRProfilerTopValues<4> slowestPCs;
|
||||
int64_t totalCount = 0;
|
||||
for (auto it : debugSeenPCUsage) {
|
||||
slowestPCs.Add(it.first, it.second);
|
||||
totalCount += it.second;
|
||||
}
|
||||
debugSeenPCUsage.clear();
|
||||
|
||||
if (worstIROp != -1)
|
||||
WARN_LOG(JIT, "Most not compiled IR op: %s (%d)", GetIRMeta((IROp)worstIROp)->name, worstIRVal);
|
||||
if (worstName != nullptr)
|
||||
WARN_LOG(JIT, "Most not compiled op: %s (%d)", worstName, worstVal);
|
||||
if (slowestPCs.counts[0] != 0) {
|
||||
for (int i = 0; i < 4; ++i) {
|
||||
uint32_t pc = slowestPCs.values[i].first;
|
||||
const char *status = IRProfilerStatusToString(slowestPCs.values[i].second);
|
||||
const std::string label = g_symbolMap ? g_symbolMap->GetDescription(pc) : "";
|
||||
WARN_LOG(JIT, "Slowest sampled PC #%d: %08x (%s)/%s (%f%%)", i, pc, label.c_str(), status, 100.0 * (double)slowestPCs.counts[i] / (double)totalCount);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool IRNativeBackend::DebugStatsEnabled() const {
|
||||
return enableDebugStats;
|
||||
}
|
||||
|
||||
bool IRNativeBackend::DebugProfilerEnabled() const {
|
||||
return enableDebugProfiler;
|
||||
}
|
||||
|
||||
void IRNativeBackend::NotifyMIPSInterpret(const char *name) {
|
||||
_assert_(enableDebugStats);
|
||||
debugSeenNotCompiled[name]++;
|
||||
@ -120,6 +181,13 @@ int IRNativeBackend::ReportBadAddress(uint32_t addr, uint32_t alignment, uint32_
|
||||
|
||||
IRNativeBackend::IRNativeBackend(IRBlockCache &blocks) : blocks_(blocks) {}
|
||||
|
||||
IRNativeBackend::~IRNativeBackend() {
|
||||
if (debugProfilerThreadStatus) {
|
||||
debugProfilerThreadStatus = false;
|
||||
debugProfilerThread.join();
|
||||
}
|
||||
}
|
||||
|
||||
void IRNativeBackend::CompileIRInst(IRInst inst) {
|
||||
switch (inst.op) {
|
||||
case IROp::Nop:
|
||||
@ -421,6 +489,20 @@ void IRNativeJit::Init(IRNativeBackend &backend) {
|
||||
|
||||
// Wanted this to be a reference, but vtbls get in the way. Shouldn't change.
|
||||
hooks_ = backend.GetNativeHooks();
|
||||
|
||||
if (enableDebugProfiler && hooks_.profilerPC) {
|
||||
debugProfilerThreadStatus = true;
|
||||
debugProfilerThread = std::thread([&] {
|
||||
// Spin, spin spin... maybe could at least hook into sleeps.
|
||||
while (debugProfilerThreadStatus) {
|
||||
IRProfilerStatus stat = *hooks_.profilerStatus;
|
||||
uint32_t pc = *hooks_.profilerPC;
|
||||
if (stat != IRProfilerStatus::NOT_RUNNING && stat != IRProfilerStatus::SYSCALL) {
|
||||
debugSeenPCUsage[std::make_pair(pc, stat)]++;
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
bool IRNativeJit::CompileTargetBlock(IRBlock *block, int block_num, bool preload) {
|
||||
@ -432,7 +514,7 @@ void IRNativeJit::FinalizeTargetBlock(IRBlock *block, int block_num) {
|
||||
}
|
||||
|
||||
void IRNativeJit::RunLoopUntil(u64 globalticks) {
|
||||
if constexpr (enableDebugStats) {
|
||||
if constexpr (enableDebugStats || enableDebugProfiler) {
|
||||
LogDebugStats();
|
||||
}
|
||||
|
||||
|
@ -25,12 +25,27 @@ namespace MIPSComp {
|
||||
|
||||
typedef void (*IRNativeFuncNoArg)();
|
||||
|
||||
enum class IRProfilerStatus : int32_t {
|
||||
NOT_RUNNING,
|
||||
IN_JIT,
|
||||
TIMER_ADVANCE,
|
||||
COMPILING,
|
||||
MATH_HELPER,
|
||||
REPLACEMENT,
|
||||
SYSCALL,
|
||||
INTERPRET,
|
||||
IR_INTERPRET,
|
||||
};
|
||||
|
||||
struct IRNativeHooks {
|
||||
IRNativeFuncNoArg enterDispatcher = nullptr;
|
||||
|
||||
const uint8_t *dispatcher = nullptr;
|
||||
const uint8_t *dispatchFetch = nullptr;
|
||||
const uint8_t *crashHandler = nullptr;
|
||||
|
||||
uint32_t *profilerPC = nullptr;
|
||||
IRProfilerStatus *profilerStatus = nullptr;
|
||||
};
|
||||
|
||||
struct IRNativeBlockExit {
|
||||
@ -47,7 +62,7 @@ struct IRNativeBlock {
|
||||
class IRNativeBackend {
|
||||
public:
|
||||
IRNativeBackend(IRBlockCache &blocks);
|
||||
virtual ~IRNativeBackend() {}
|
||||
virtual ~IRNativeBackend();
|
||||
|
||||
void CompileIRInst(IRInst inst);
|
||||
|
||||
@ -120,6 +135,7 @@ protected:
|
||||
|
||||
// Returns true when debugging statistics should be compiled in.
|
||||
bool DebugStatsEnabled() const;
|
||||
bool DebugProfilerEnabled() const;
|
||||
|
||||
// Callback (compile when DebugStatsEnabled()) to log a base interpreter hit.
|
||||
// Call the func returned by MIPSGetInterpretFunc(op) directly for interpret.
|
||||
|
@ -45,8 +45,19 @@ static void ShowPC(u32 downcount, void *membase, void *jitbase) {
|
||||
}
|
||||
|
||||
void RiscVJitBackend::GenerateFixedCode(MIPSState *mipsState) {
|
||||
BeginWrite(GetMemoryProtectPageSize());
|
||||
// This will be used as a writable scratch area, always 32-bit accessible.
|
||||
const u8 *start = AlignCodePage();
|
||||
if (DebugProfilerEnabled()) {
|
||||
ProtectMemoryPages(start, GetMemoryProtectPageSize(), MEM_PROT_READ | MEM_PROT_WRITE);
|
||||
hooks_.profilerPC = (uint32_t *)GetWritableCodePtr();
|
||||
*hooks_.profilerPC = 0;
|
||||
hooks_.profilerStatus = (IRProfilerStatus *)GetWritableCodePtr() + 1;
|
||||
*hooks_.profilerStatus = IRProfilerStatus::NOT_RUNNING;
|
||||
SetCodePointer(GetCodePtr() + sizeof(uint32_t) * 2, GetWritableCodePtr() + sizeof(uint32_t) * 2);
|
||||
}
|
||||
|
||||
const u8 *disasmStart = AlignCodePage();
|
||||
BeginWrite(GetMemoryProtectPageSize());
|
||||
|
||||
if (jo.useStaticAlloc) {
|
||||
saveStaticRegisters_ = AlignCode16();
|
||||
@ -58,8 +69,6 @@ void RiscVJitBackend::GenerateFixedCode(MIPSState *mipsState) {
|
||||
regs_.EmitLoadStaticRegisters();
|
||||
LW(DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount));
|
||||
RET();
|
||||
|
||||
start = saveStaticRegisters_;
|
||||
} else {
|
||||
saveStaticRegisters_ = nullptr;
|
||||
loadStaticRegisters_ = nullptr;
|
||||
@ -124,14 +133,18 @@ void RiscVJitBackend::GenerateFixedCode(MIPSState *mipsState) {
|
||||
LI(JITBASEREG, GetBasePtr() - MIPS_EMUHACK_OPCODE, SCRATCH1);
|
||||
|
||||
LoadStaticRegisters();
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
|
||||
MovFromPC(SCRATCH1);
|
||||
WriteDebugPC(SCRATCH1);
|
||||
outerLoopPCInSCRATCH1_ = GetCodePtr();
|
||||
MovToPC(SCRATCH1);
|
||||
outerLoop_ = GetCodePtr();
|
||||
// Advance can change the downcount (or thread), so must save/restore around it.
|
||||
SaveStaticRegisters();
|
||||
RestoreRoundingMode(true);
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::TIMER_ADVANCE);
|
||||
QuickCallFunction(&CoreTiming::Advance, X7);
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
|
||||
ApplyRoundingMode(true);
|
||||
LoadStaticRegisters();
|
||||
|
||||
@ -162,6 +175,7 @@ void RiscVJitBackend::GenerateFixedCode(MIPSState *mipsState) {
|
||||
}
|
||||
|
||||
LWU(SCRATCH1, CTXREG, offsetof(MIPSState, pc));
|
||||
WriteDebugPC(SCRATCH1);
|
||||
#ifdef MASKED_PSP_MEMORY
|
||||
LI(SCRATCH2, 0x3FFFFFFF);
|
||||
AND(SCRATCH1, SCRATCH1, SCRATCH2);
|
||||
@ -180,7 +194,9 @@ void RiscVJitBackend::GenerateFixedCode(MIPSState *mipsState) {
|
||||
|
||||
// No block found, let's jit. We don't need to save static regs, they're all callee saved.
|
||||
RestoreRoundingMode(true);
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::COMPILING);
|
||||
QuickCallFunction(&MIPSComp::JitAt, X7);
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
|
||||
ApplyRoundingMode(true);
|
||||
|
||||
// Try again, the block index should be set now.
|
||||
@ -195,6 +211,7 @@ void RiscVJitBackend::GenerateFixedCode(MIPSState *mipsState) {
|
||||
const uint8_t *quitLoop = GetCodePtr();
|
||||
SetJumpTarget(badCoreState);
|
||||
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::NOT_RUNNING);
|
||||
SaveStaticRegisters();
|
||||
RestoreRoundingMode(true);
|
||||
|
||||
|
@ -585,6 +585,8 @@ void RiscVJitBackend::CompIR_FSpecial(IRInst inst) {
|
||||
|
||||
auto callFuncF_F = [&](float (*func)(float)) {
|
||||
regs_.FlushBeforeCall();
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::MATH_HELPER);
|
||||
|
||||
// It might be in a non-volatile register.
|
||||
// TODO: May have to handle a transfer if SIMD here.
|
||||
if (regs_.IsFPRMapped(inst.src1)) {
|
||||
@ -600,6 +602,8 @@ void RiscVJitBackend::CompIR_FSpecial(IRInst inst) {
|
||||
if (regs_.F(inst.dest) != F10) {
|
||||
FMV(32, regs_.F(inst.dest), F10);
|
||||
}
|
||||
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
|
||||
};
|
||||
|
||||
RiscVReg tempReg = INVALID_REG;
|
||||
|
@ -188,6 +188,7 @@ void RiscVJitBackend::CompIR_System(IRInst inst) {
|
||||
FlushAll();
|
||||
SaveStaticRegisters();
|
||||
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::SYSCALL);
|
||||
#ifdef USE_PROFILER
|
||||
// When profiling, we can't skip CallSyscall, since it times syscalls.
|
||||
LI(X10, (int32_t)inst.constant);
|
||||
@ -207,6 +208,7 @@ void RiscVJitBackend::CompIR_System(IRInst inst) {
|
||||
}
|
||||
#endif
|
||||
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
|
||||
LoadStaticRegisters();
|
||||
// This is always followed by an ExitToPC, where we check coreState.
|
||||
break;
|
||||
@ -214,7 +216,9 @@ void RiscVJitBackend::CompIR_System(IRInst inst) {
|
||||
case IROp::CallReplacement:
|
||||
FlushAll();
|
||||
SaveStaticRegisters();
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::REPLACEMENT);
|
||||
QuickCallFunction(GetReplacementFunc(inst.constant)->replaceFunc, SCRATCH2);
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
|
||||
LoadStaticRegisters();
|
||||
SUB(DOWNCOUNTREG, DOWNCOUNTREG, X10);
|
||||
break;
|
||||
|
@ -67,6 +67,8 @@ bool RiscVJitBackend::CompileBlock(IRBlock *block, int block_num, bool preload)
|
||||
SetBlockCheckedOffset(block_num, (int)GetOffset(GetCodePointer()));
|
||||
wroteCheckedOffset = true;
|
||||
|
||||
WriteDebugPC(startPC);
|
||||
|
||||
FixupBranch normalEntry = BGE(DOWNCOUNTREG, R_ZERO);
|
||||
LI(SCRATCH1, startPC);
|
||||
QuickJ(R_RA, outerLoopPCInSCRATCH1_);
|
||||
@ -118,6 +120,8 @@ bool RiscVJitBackend::CompileBlock(IRBlock *block, int block_num, bool preload)
|
||||
}
|
||||
|
||||
if (jo.enableBlocklink && jo.useBackJump) {
|
||||
WriteDebugPC(startPC);
|
||||
|
||||
// Most blocks shouldn't be >= 4KB, so usually we can just BGE.
|
||||
if (BInRange(blockStart)) {
|
||||
BGE(DOWNCOUNTREG, R_ZERO, blockStart);
|
||||
@ -218,7 +222,9 @@ void RiscVJitBackend::CompIR_Generic(IRInst inst) {
|
||||
FlushAll();
|
||||
LI(X10, value, SCRATCH2);
|
||||
SaveStaticRegisters();
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IR_INTERPRET);
|
||||
QuickCallFunction(&DoIRInst, SCRATCH2);
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
|
||||
LoadStaticRegisters();
|
||||
|
||||
// We only need to check the return value if it's a potential exit.
|
||||
@ -241,12 +247,14 @@ void RiscVJitBackend::CompIR_Interpret(IRInst inst) {
|
||||
// IR protects us against this being a branching instruction (well, hopefully.)
|
||||
FlushAll();
|
||||
SaveStaticRegisters();
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::INTERPRET);
|
||||
if (DebugStatsEnabled()) {
|
||||
LI(X10, MIPSGetName(op));
|
||||
QuickCallFunction(&NotifyMIPSInterpret, SCRATCH2);
|
||||
}
|
||||
LI(X10, (int32_t)inst.constant);
|
||||
QuickCallFunction((const u8 *)MIPSGetInterpretFunc(op), SCRATCH2);
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
|
||||
LoadStaticRegisters();
|
||||
}
|
||||
|
||||
@ -329,6 +337,32 @@ void RiscVJitBackend::MovToPC(RiscVReg r) {
|
||||
SW(r, CTXREG, offsetof(MIPSState, pc));
|
||||
}
|
||||
|
||||
void RiscVJitBackend::WriteDebugPC(uint32_t pc) {
|
||||
if (hooks_.profilerPC) {
|
||||
int offset = (const u8 *)hooks_.profilerPC - GetBasePtr();
|
||||
LI(SCRATCH2, hooks_.profilerPC);
|
||||
LI(R_RA, (int32_t)pc);
|
||||
SW(R_RA, SCRATCH2, 0);
|
||||
}
|
||||
}
|
||||
|
||||
void RiscVJitBackend::WriteDebugPC(RiscVReg r) {
|
||||
if (hooks_.profilerPC) {
|
||||
int offset = (const u8 *)hooks_.profilerPC - GetBasePtr();
|
||||
LI(SCRATCH2, hooks_.profilerPC);
|
||||
SW(r, SCRATCH2, 0);
|
||||
}
|
||||
}
|
||||
|
||||
void RiscVJitBackend::WriteDebugProfilerStatus(IRProfilerStatus status) {
|
||||
if (hooks_.profilerPC) {
|
||||
int offset = (const u8 *)hooks_.profilerStatus - GetBasePtr();
|
||||
LI(SCRATCH2, hooks_.profilerStatus);
|
||||
LI(R_RA, (int)status);
|
||||
SW(R_RA, SCRATCH2, 0);
|
||||
}
|
||||
}
|
||||
|
||||
void RiscVJitBackend::SaveStaticRegisters() {
|
||||
if (jo.useStaticAlloc) {
|
||||
QuickCallFunction(saveStaticRegisters_);
|
||||
|
@ -50,6 +50,9 @@ private:
|
||||
void ApplyRoundingMode(bool force = false);
|
||||
void MovFromPC(RiscVGen::RiscVReg r);
|
||||
void MovToPC(RiscVGen::RiscVReg r);
|
||||
void WriteDebugPC(uint32_t pc);
|
||||
void WriteDebugPC(RiscVGen::RiscVReg r);
|
||||
void WriteDebugProfilerStatus(IRProfilerStatus status);
|
||||
|
||||
void SaveStaticRegisters();
|
||||
void LoadStaticRegisters();
|
||||
|
@ -49,8 +49,21 @@ static void ShowPC(void *membase, void *jitbase) {
|
||||
}
|
||||
|
||||
void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
|
||||
BeginWrite(GetMemoryProtectPageSize());
|
||||
// This will be used as a writable scratch area, always 32-bit accessible.
|
||||
const u8 *start = AlignCodePage();
|
||||
if (DebugProfilerEnabled()) {
|
||||
ProtectMemoryPages(start, GetMemoryProtectPageSize(), MEM_PROT_READ | MEM_PROT_WRITE);
|
||||
hooks_.profilerPC = (uint32_t *)GetWritableCodePtr();
|
||||
Write32(0);
|
||||
hooks_.profilerStatus = (IRProfilerStatus *)GetWritableCodePtr();
|
||||
Write32(0);
|
||||
}
|
||||
|
||||
EmitFPUConstants();
|
||||
EmitVecConstants();
|
||||
|
||||
const u8 *disasmStart = AlignCodePage();
|
||||
BeginWrite(GetMemoryProtectPageSize());
|
||||
|
||||
jo.downcountInRegister = false;
|
||||
#if PPSSPP_ARCH(AMD64)
|
||||
@ -83,8 +96,6 @@ void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
|
||||
if (jo.downcountInRegister)
|
||||
MOV(32, R(DOWNCOUNTREG), MDisp(CTXREG, downcountOffset));
|
||||
RET();
|
||||
|
||||
start = saveStaticRegisters_;
|
||||
} else {
|
||||
saveStaticRegisters_ = nullptr;
|
||||
loadStaticRegisters_ = nullptr;
|
||||
@ -146,14 +157,18 @@ void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
|
||||
MOV(PTRBITS, R(CTXREG), ImmPtr(&mipsState->f[0]));
|
||||
|
||||
LoadStaticRegisters();
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
|
||||
MovFromPC(SCRATCH1);
|
||||
WriteDebugPC(SCRATCH1);
|
||||
outerLoopPCInSCRATCH1_ = GetCodePtr();
|
||||
MovToPC(SCRATCH1);
|
||||
outerLoop_ = GetCodePtr();
|
||||
// Advance can change the downcount (or thread), so must save/restore around it.
|
||||
SaveStaticRegisters();
|
||||
RestoreRoundingMode(true);
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::TIMER_ADVANCE);
|
||||
ABI_CallFunction(reinterpret_cast<void *>(&CoreTiming::Advance));
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
|
||||
ApplyRoundingMode(true);
|
||||
LoadStaticRegisters();
|
||||
|
||||
@ -209,6 +224,7 @@ void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
|
||||
}
|
||||
|
||||
MovFromPC(SCRATCH1);
|
||||
WriteDebugPC(SCRATCH1);
|
||||
#ifdef MASKED_PSP_MEMORY
|
||||
AND(32, R(SCRATCH1), Imm32(Memory::MEMVIEW32_MASK));
|
||||
#endif
|
||||
@ -247,7 +263,9 @@ void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
|
||||
|
||||
// No block found, let's jit. We don't need to save static regs, they're all callee saved.
|
||||
RestoreRoundingMode(true);
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::COMPILING);
|
||||
ABI_CallFunction(&MIPSComp::JitAt);
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
|
||||
ApplyRoundingMode(true);
|
||||
// Let's just dispatch again, we'll enter the block since we know it's there.
|
||||
JMP(dispatcherNoCheck_, true);
|
||||
@ -265,6 +283,7 @@ void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
|
||||
const uint8_t *quitLoop = GetCodePtr();
|
||||
SetJumpTarget(badCoreState);
|
||||
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::NOT_RUNNING);
|
||||
SaveStaticRegisters();
|
||||
RestoreRoundingMode(true);
|
||||
ABI_PopAllCalleeSavedRegsAndAdjustStack();
|
||||
@ -283,16 +302,13 @@ void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
|
||||
// Leave this at the end, add more stuff above.
|
||||
if (enableDisasm) {
|
||||
#if PPSSPP_ARCH(AMD64)
|
||||
std::vector<std::string> lines = DisassembleX86(start, (int)(GetCodePtr() - start));
|
||||
std::vector<std::string> lines = DisassembleX86(disasmStart, (int)(GetCodePtr() - disasmStart));
|
||||
for (auto s : lines) {
|
||||
INFO_LOG(JIT, "%s", s.c_str());
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
EmitFPUConstants();
|
||||
EmitVecConstants();
|
||||
|
||||
// Let's spare the pre-generated code from unprotect-reprotect.
|
||||
AlignCodePage();
|
||||
jitStartOffset_ = (int)(GetCodePtr() - start);
|
||||
|
@ -972,6 +972,7 @@ void X64JitBackend::CompIR_FSpecial(IRInst inst) {
|
||||
|
||||
auto callFuncF_F = [&](const void *func) {
|
||||
regs_.FlushBeforeCall();
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::MATH_HELPER);
|
||||
|
||||
#if X64JIT_USE_XMM_CALL
|
||||
if (regs_.IsFPRMapped(inst.src1)) {
|
||||
@ -1004,6 +1005,8 @@ void X64JitBackend::CompIR_FSpecial(IRInst inst) {
|
||||
regs_.MapFPR(inst.dest, MIPSMap::NOINIT);
|
||||
MOVD_xmm(regs_.FX(inst.dest), R(SCRATCH1));
|
||||
#endif
|
||||
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
|
||||
};
|
||||
|
||||
switch (inst.op) {
|
||||
|
@ -203,6 +203,7 @@ void X64JitBackend::CompIR_System(IRInst inst) {
|
||||
FlushAll();
|
||||
SaveStaticRegisters();
|
||||
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::SYSCALL);
|
||||
#ifdef USE_PROFILER
|
||||
// When profiling, we can't skip CallSyscall, since it times syscalls.
|
||||
ABI_CallFunctionC((const u8 *)&CallSyscall, inst.constant);
|
||||
@ -219,6 +220,7 @@ void X64JitBackend::CompIR_System(IRInst inst) {
|
||||
}
|
||||
#endif
|
||||
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
|
||||
LoadStaticRegisters();
|
||||
// This is always followed by an ExitToPC, where we check coreState.
|
||||
break;
|
||||
@ -226,7 +228,9 @@ void X64JitBackend::CompIR_System(IRInst inst) {
|
||||
case IROp::CallReplacement:
|
||||
FlushAll();
|
||||
SaveStaticRegisters();
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::REPLACEMENT);
|
||||
ABI_CallFunction(GetReplacementFunc(inst.constant)->replaceFunc);
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
|
||||
LoadStaticRegisters();
|
||||
//SUB(32, R(DOWNCOUNTREG), R(DOWNCOUNTREG), R(EAX));
|
||||
SUB(32, MDisp(CTXREG, downcountOffset), R(EAX));
|
||||
|
@ -64,6 +64,8 @@ bool X64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload) {
|
||||
SetBlockCheckedOffset(block_num, (int)GetOffset(GetCodePointer()));
|
||||
wroteCheckedOffset = true;
|
||||
|
||||
WriteDebugPC(startPC);
|
||||
|
||||
// TODO: See if we can get flags to always have the downcount compare.
|
||||
if (jo.downcountInRegister) {
|
||||
TEST(32, R(DOWNCOUNTREG), R(DOWNCOUNTREG));
|
||||
@ -122,6 +124,8 @@ bool X64JitBackend::CompileBlock(IRBlock *block, int block_num, bool preload) {
|
||||
}
|
||||
|
||||
if (jo.enableBlocklink && jo.useBackJump) {
|
||||
WriteDebugPC(startPC);
|
||||
|
||||
if (jo.downcountInRegister) {
|
||||
TEST(32, R(DOWNCOUNTREG), R(DOWNCOUNTREG));
|
||||
} else {
|
||||
@ -216,11 +220,13 @@ void X64JitBackend::CompIR_Generic(IRInst inst) {
|
||||
|
||||
FlushAll();
|
||||
SaveStaticRegisters();
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IR_INTERPRET);
|
||||
#if PPSSPP_ARCH(AMD64)
|
||||
ABI_CallFunctionP((const void *)&DoIRInst, (void *)value);
|
||||
#else
|
||||
ABI_CallFunctionCC((const void *)&DoIRInst, (u32)(value & 0xFFFFFFFF), (u32)(value >> 32));
|
||||
#endif
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
|
||||
LoadStaticRegisters();
|
||||
|
||||
// We only need to check the return value if it's a potential exit.
|
||||
@ -238,10 +244,12 @@ void X64JitBackend::CompIR_Interpret(IRInst inst) {
|
||||
// IR protects us against this being a branching instruction (well, hopefully.)
|
||||
FlushAll();
|
||||
SaveStaticRegisters();
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::INTERPRET);
|
||||
if (DebugStatsEnabled()) {
|
||||
ABI_CallFunctionP((const void *)&NotifyMIPSInterpret, (void *)MIPSGetName(op));
|
||||
}
|
||||
ABI_CallFunctionC((const void *)MIPSGetInterpretFunc(op), inst.constant);
|
||||
WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);
|
||||
LoadStaticRegisters();
|
||||
}
|
||||
|
||||
@ -346,6 +354,21 @@ void X64JitBackend::MovToPC(X64Reg r) {
|
||||
MOV(32, MDisp(CTXREG, pcOffset), R(r));
|
||||
}
|
||||
|
||||
void X64JitBackend::WriteDebugPC(uint32_t pc) {
|
||||
if (hooks_.profilerPC)
|
||||
MOV(32, M(hooks_.profilerPC), Imm32(pc));
|
||||
}
|
||||
|
||||
void X64JitBackend::WriteDebugPC(Gen::X64Reg r) {
|
||||
if (hooks_.profilerPC)
|
||||
MOV(32, M(hooks_.profilerPC), R(r));
|
||||
}
|
||||
|
||||
void X64JitBackend::WriteDebugProfilerStatus(IRProfilerStatus status) {
|
||||
if (hooks_.profilerPC)
|
||||
MOV(32, M(hooks_.profilerStatus), Imm32((int32_t)status));
|
||||
}
|
||||
|
||||
void X64JitBackend::SaveStaticRegisters() {
|
||||
if (jo.useStaticAlloc) {
|
||||
//CALL(saveStaticRegisters_);
|
||||
|
@ -66,6 +66,9 @@ private:
|
||||
void ApplyRoundingMode(bool force = false);
|
||||
void MovFromPC(Gen::X64Reg r);
|
||||
void MovToPC(Gen::X64Reg r);
|
||||
void WriteDebugPC(uint32_t pc);
|
||||
void WriteDebugPC(Gen::X64Reg r);
|
||||
void WriteDebugProfilerStatus(IRProfilerStatus status);
|
||||
|
||||
void SaveStaticRegisters();
|
||||
void LoadStaticRegisters();
|
||||
|
Loading…
x
Reference in New Issue
Block a user