Merge pull request #17993 from unknownbrackets/x86-jit-minor

x86jit: Replacements, expose for profiling better
This commit is contained in:
Henrik Rydgård 2023-08-28 10:23:00 +02:00 committed by GitHub
commit dddf63d057
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 34 additions and 337 deletions

View File

@ -18,6 +18,7 @@
#include "Common/Profiler/Profiler.h"
#include "Common/StringUtils.h"
#include "Common/TimeUtil.h"
#include "Core/Debugger/SymbolMap.h"
#include "Core/MIPS/MIPSTables.h"
#include "Core/MIPS/IR/IRNativeCommon.h"
@ -451,8 +452,9 @@ bool IRNativeJit::DescribeCodePtr(const u8 *ptr, std::string &name) {
break;
}
// Used by profiling tools that don't like spaces.
if (block_num == -1) {
name = "(unknown or deleted block)";
name = "unknownOrDeletedBlock";
return true;
}
@ -460,7 +462,13 @@ bool IRNativeJit::DescribeCodePtr(const u8 *ptr, std::string &name) {
if (block) {
u32 start = 0, size = 0;
block->GetRange(start, size);
name = StringFromFormat("(block %d at %08x)", block_num, start);
// It helps to know which func this block is inside.
const std::string label = g_symbolMap ? g_symbolMap->GetDescription(start) : "";
if (!label.empty())
name = StringFromFormat("block%d_%08x_%s", block_num, start, label.c_str());
else
name = StringFromFormat("block%d_%08x", block_num, start);
return true;
}
return false;

View File

@ -236,7 +236,7 @@ void MIPSState::UpdateCore(CPUCore desired) {
switch (PSP_CoreParameter().cpuCore) {
case CPUCore::JIT:
case CPUCore::JIT_IR:
INFO_LOG(CPU, "Switching to JIT");
INFO_LOG(CPU, "Switching to JIT%s", PSP_CoreParameter().cpuCore == CPUCore::JIT_IR ? " IR" : "");
if (oldjit) {
std::lock_guard<std::recursive_mutex> guard(MIPSComp::jitLock);
MIPSComp::jit = nullptr;
@ -246,7 +246,7 @@ void MIPSState::UpdateCore(CPUCore desired) {
break;
case CPUCore::IR_INTERPRETER:
INFO_LOG(CPU, "Switching to IRJIT");
INFO_LOG(CPU, "Switching to IR interpreter");
if (oldjit) {
std::lock_guard<std::recursive_mutex> guard(MIPSComp::jitLock);
MIPSComp::jit = nullptr;

View File

@ -252,8 +252,11 @@ void RiscVJitBackend::FlushAll() {
bool RiscVJitBackend::DescribeCodePtr(const u8 *ptr, std::string &name) const {
// Used in disassembly viewer.
// Don't use spaces; profilers get confused or truncate them.
if (ptr == dispatcherPCInSCRATCH1_) {
name = "dispatcher (PC in SCRATCH1)";
name = "dispatcherPCInSCRATCH1";
} else if (ptr == outerLoopPCInSCRATCH1_) {
name = "outerLoopPCInSCRATCH1";
} else if (ptr == dispatcherNoCheck_) {
name = "dispatcherNoCheck";
} else if (ptr == saveStaticRegisters_) {
@ -262,6 +265,8 @@ bool RiscVJitBackend::DescribeCodePtr(const u8 *ptr, std::string &name) const {
name = "loadStaticRegisters";
} else if (ptr == applyRoundingMode_) {
name = "applyRoundingMode";
} else if (ptr >= GetBasePtr() && ptr < GetBasePtr() + jitStartOffset_) {
name = "fixedCode";
} else {
return IRNativeBackend::DescribeCodePtr(ptr, name);
}

View File

@ -1,299 +0,0 @@
#include "ppsspp_config.h"
#if PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
#include "Core/MIPS/x86/IRToX86.h"
namespace MIPSComp {
// Initial attempt at converting IR directly to x86.
// This is intended to be an easy way to benefit from the IR with the current infrastructure.
// Later tries may go across multiple blocks and a different representation.
struct GPRMapping {
Gen::OpArg dest;
Gen::OpArg src1;
Gen::OpArg src2;
};
struct FPRMapping {
Gen::OpArg dest;
Gen::OpArg src1;
Gen::OpArg src2;
};
class GreedyRegallocGPR {
public:
GPRMapping Map(IRInst inst, const IRMeta &meta);
private:
};
GPRMapping GreedyRegallocGPR::Map(IRInst inst, const IRMeta &meta) {
GPRMapping mapping;
if (meta.types[0] == 'G') {
}
// etc..
return mapping;
}
// Every 4 registers can also be mapped into an SSE register.
// When changing from single to vec4 mapping, we'll just flush, for now.
class GreedyRegallocFPR {
public:
FPRMapping Map(IRInst inst, const IRMeta &meta);
};
FPRMapping GreedyRegallocFPR::Map(IRInst inst, const IRMeta &meta) {
FPRMapping mapping;
return mapping;
}
// This requires that ThreeOpToTwoOp has been run as the last pass.
void IRToX86::ConvertIRToNative(const IRInst *instructions, int count, const u32 *constants) {
// Set up regcaches
using namespace Gen;
GreedyRegallocGPR gprAlloc;
GreedyRegallocFPR fprAlloc;
// Loop through all the instructions, emitting code as we go.
// Note that we do need to implement them all - fallbacks are not permitted.
for (int i = 0; i < count; i++) {
const IRInst *inst = &instructions[i];
const IRMeta &meta = *GetIRMeta(inst->op);
GPRMapping gpr = gprAlloc.Map(*inst, meta);
FPRMapping fpr = fprAlloc.Map(*inst, meta);
bool symmetric = false;
switch (inst->op) {
case IROp::Nop:
_assert_(false);
break;
// Output-only
case IROp::SetConst:
code_->MOV(32, gpr.dest, Imm32(constants[inst->src1]));
break;
case IROp::SetConstF:
code_->MOV(32, gpr.dest, Imm32(constants[inst->src1]));
break;
// Add gets to be special cased because we have LEA.
case IROp::Add:
if (gpr.dest.IsSimpleReg() && gpr.src1.IsSimpleReg() && gpr.src2.IsSimpleReg()) {
code_->LEA(32, gpr.dest.GetSimpleReg(), MRegSum(gpr.src1.GetSimpleReg(), gpr.src2.GetSimpleReg()));
break;
}
// Else fall through.
// 3-op arithmetic that directly corresponds to x86
// (often 2-op in practice if src1 == dst). x86 only does 2-op though so some of these will need splitting.
case IROp::Sub:
case IROp::And:
case IROp::Or:
case IROp::Xor:
if (inst->src1 != inst->dest) {
code_->MOV(32, gpr.dest, gpr.src1);
}
// Emit directly
switch (inst->op) {
case IROp::Add: code_->ADD(32, gpr.dest, gpr.src2); break;
case IROp::Sub: code_->SUB(32, gpr.dest, gpr.src2); break;
case IROp::And: code_->AND(32, gpr.dest, gpr.src2); break;
case IROp::Or: code_->OR(32, gpr.dest, gpr.src2); break;
case IROp::Xor: code_->XOR(32, gpr.dest, gpr.src2); break;
}
break;
// Variable shifts.
case IROp::Shl:
case IROp::Shr:
case IROp::Sar:
case IROp::Ror:
case IROp::Slt:
case IROp::SltU:
case IROp::MovZ:
case IROp::MovNZ:
case IROp::Max:
case IROp::Min:
break;
// 2-op arithmetic with constant
case IROp::AddConst:
case IROp::SubConst:
case IROp::AndConst:
case IROp::OrConst:
case IROp::XorConst:
case IROp::SltConst:
case IROp::SltUConst:
// 2-op arithmetic with immediate
case IROp::ShlImm:
case IROp::ShrImm:
case IROp::SarImm:
case IROp::RorImm:
// 2-op arithmetic
case IROp::Mov:
code_->MOV(32, gpr.dest, gpr.src1);
break;
case IROp::Neg:
case IROp::Not:
case IROp::Ext8to32:
case IROp::Ext16to32:
case IROp::ReverseBits:
case IROp::BSwap16:
case IROp::BSwap32:
case IROp::Clz:
if (inst->dest != inst->src1) {
code_->NEG(32, gpr.dest); break;
}
break;
// Multiplier control
case IROp::MtLo:
case IROp::MtHi:
case IROp::MfLo:
case IROp::MfHi:
case IROp::Mult:
case IROp::MultU:
case IROp::Madd:
case IROp::MaddU:
case IROp::Msub:
case IROp::MsubU:
case IROp::Div:
case IROp::DivU:
// Memory access
case IROp::Load8:
case IROp::Load8Ext:
case IROp::Load16:
case IROp::Load16Ext:
case IROp::Load32:
case IROp::LoadFloat:
case IROp::Store8:
case IROp::Store16:
case IROp::Store32:
case IROp::StoreFloat:
case IROp::LoadVec4:
case IROp::StoreVec4:
// Output-only SIMD functions
case IROp::Vec4Init:
case IROp::Vec4Shuffle:
// 2-op SIMD functions
case IROp::Vec4Mov:
code_->MOVAPS(fpr.dest.GetSimpleReg(), fpr.src1);
break;
case IROp::Vec4Neg:
case IROp::Vec4Abs:
break;
case IROp::Vec4ClampToZero:
code_->PXOR(XMM0, R(XMM0));
code_->PMAXSW(XMM0, fpr.src1);
code_->MOVAPD(fpr.dest, XMM0);
break;
case IROp::Vec4DuplicateUpperBitsAndShift1:
case IROp::Vec2ClampToZero:
// 3-op SIMD functions
case IROp::Vec4Add:
case IROp::Vec4Sub:
case IROp::Vec4Mul:
case IROp::Vec4Div:
case IROp::Vec4Scale:
case IROp::Vec4Dot:
// Pack-unpack
case IROp::Vec2Unpack16To31:
case IROp::Vec2Unpack16To32:
case IROp::Vec4Unpack8To32:
case IROp::Vec2Pack32To16:
case IROp::Vec2Pack31To16:
case IROp::Vec4Pack32To8:
case IROp::Vec4Pack31To8:
case IROp::FCmpVfpuBit:
case IROp::FCmpVfpuAggregate:
case IROp::FCmovVfpuCC:
// Trancendental functions (non-simd)
case IROp::FSin:
case IROp::FCos:
case IROp::FRSqrt:
case IROp::FRecip:
case IROp::FAsin:
// 3-Op FP
case IROp::FAdd:
case IROp::FSub:
case IROp::FMul:
case IROp::FDiv:
case IROp::FMin:
case IROp::FMax:
// 2-Op FP
case IROp::FMov:
case IROp::FAbs:
case IROp::FSqrt:
case IROp::FNeg:
case IROp::FSat0_1:
case IROp::FSatMinus1_1:
case IROp::FSign:
case IROp::FCeil:
case IROp::FFloor:
case IROp::FCmp:
case IROp::FCvtSW:
case IROp::FCvtWS:
case IROp::FRound:
case IROp::FTrunc:
// Cross moves
case IROp::FMovFromGPR:
case IROp::FMovToGPR:
case IROp::FpCondFromReg:
case IROp::FpCondToReg:
case IROp::VfpuCtrlToReg:
// VFPU flag/control
case IROp::SetCtrlVFPU:
case IROp::SetCtrlVFPUReg:
case IROp::SetCtrlVFPUFReg:
// Block Exits
case IROp::ExitToConst:
case IROp::ExitToReg:
case IROp::ExitToConstIfEq:
case IROp::ExitToConstIfNeq:
case IROp::ExitToConstIfGtZ:
case IROp::ExitToConstIfGeZ:
case IROp::ExitToConstIfLtZ:
case IROp::ExitToConstIfLeZ:
case IROp::ExitToPC:
// Utilities
case IROp::Downcount:
case IROp::SetPC:
case IROp::SetPCConst:
case IROp::Syscall:
case IROp::Interpret: // SLOW fallback. Can be made faster.
case IROp::CallReplacement:
case IROp::Break:
default:
break;
}
}
}
} // namespace
#endif // PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)

View File

@ -1,23 +0,0 @@
#include "Core/MIPS/IR/IRInst.h"
#include "Common/x64Emitter.h"
namespace MIPSComp {
class IRToNativeInterface {
public:
virtual ~IRToNativeInterface() {}
virtual void ConvertIRToNative(const IRInst *instructions, int count, const u32 *constants) = 0;
};
class IRToX86 : public IRToNativeInterface {
public:
void SetCodeBlock(Gen::XCodeBlock *code) { code_ = code; }
void ConvertIRToNative(const IRInst *instructions, int count, const u32 *constants) override;
private:
Gen::XCodeBlock *code_;
};
} // namespace

View File

@ -58,6 +58,7 @@ void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
//regs_.EmitSaveStaticRegisters();
RET();
// Note: needs to not modify EAX, or to save it if it does.
loadStaticRegisters_ = AlignCode16();
//regs_.EmitLoadStaticRegisters();
//MOV(32, R(DOWNCOUNTREG), MDisp(CTXREG, downcountOffset));

View File

@ -124,6 +124,14 @@ void X64JitBackend::CompIR_System(IRInst inst) {
break;
case IROp::CallReplacement:
FlushAll();
SaveStaticRegisters();
ABI_CallFunction(GetReplacementFunc(inst.constant)->replaceFunc);
LoadStaticRegisters();
//SUB(32, R(DOWNCOUNTREG), R(DOWNCOUNTREG), R(EAX));
SUB(32, MDisp(CTXREG, downcountOffset), R(EAX));
break;
case IROp::Break:
CompIR_Generic(inst);
break;

View File

@ -242,9 +242,12 @@ void X64JitBackend::FlushAll() {
}
bool X64JitBackend::DescribeCodePtr(const u8 *ptr, std::string &name) const {
// Used in disassembly viewer.
// Used in disassembly viewer and profiling tools.
// Don't use spaces; profilers get confused or truncate them.
if (ptr == dispatcherPCInSCRATCH1_) {
name = "dispatcher (PC in SCRATCH1)";
name = "dispatcherPCInSCRATCH1";
} else if (ptr == outerLoopPCInSCRATCH1_) {
name = "outerLoopPCInSCRATCH1";
} else if (ptr == dispatcherNoCheck_) {
name = "dispatcherNoCheck";
} else if (ptr == saveStaticRegisters_) {
@ -255,6 +258,8 @@ bool X64JitBackend::DescribeCodePtr(const u8 *ptr, std::string &name) const {
name = "restoreRoundingMode";
} else if (ptr == applyRoundingMode_) {
name = "applyRoundingMode";
} else if (ptr >= GetBasePtr() && ptr < GetBasePtr() + jitStartOffset_) {
name = "fixedCode";
} else {
return IRNativeBackend::DescribeCodePtr(ptr, name);
}

View File

@ -299,7 +299,6 @@
<ClInclude Include="..\..\Core\MIPS\MIPSTables.h" />
<ClInclude Include="..\..\Core\MIPS\MIPSVFPUFallbacks.h" />
<ClInclude Include="..\..\Core\MIPS\MIPSVFPUUtils.h" />
<ClInclude Include="..\..\Core\MIPS\x86\IRToX86.h" />
<ClInclude Include="..\..\Core\MIPS\x86\Jit.h" />
<ClInclude Include="..\..\Core\MIPS\x86\JitSafeMem.h" />
<ClInclude Include="..\..\Core\MIPS\x86\RegCache.h" />
@ -570,7 +569,6 @@
<ClCompile Include="..\..\Core\MIPS\x86\CompLoadStore.cpp" />
<ClCompile Include="..\..\Core\MIPS\x86\CompReplace.cpp" />
<ClCompile Include="..\..\Core\MIPS\x86\CompVFPU.cpp" />
<ClCompile Include="..\..\Core\MIPS\x86\IRToX86.cpp" />
<ClCompile Include="..\..\Core\MIPS\x86\Jit.cpp" />
<ClCompile Include="..\..\Core\MIPS\x86\JitSafeMem.cpp" />
<ClCompile Include="..\..\Core\MIPS\x86\RegCache.cpp" />

View File

@ -174,9 +174,6 @@
<ClCompile Include="..\..\Core\MIPS\x86\CompVFPU.cpp">
<Filter>MIPS\x86</Filter>
</ClCompile>
<ClCompile Include="..\..\Core\MIPS\x86\IRToX86.cpp">
<Filter>MIPS\x86</Filter>
</ClCompile>
<ClCompile Include="..\..\Core\MIPS\x86\Jit.cpp">
<Filter>MIPS\x86</Filter>
</ClCompile>
@ -1212,9 +1209,6 @@
<ClInclude Include="..\..\Core\MIPS\JitCommon\JitState.h">
<Filter>MIPS\JitCommon</Filter>
</ClInclude>
<ClInclude Include="..\..\Core\MIPS\x86\IRToX86.h">
<Filter>MIPS\x86</Filter>
</ClInclude>
<ClInclude Include="..\..\Core\MIPS\x86\Jit.h">
<Filter>MIPS\x86</Filter>
</ClInclude>