mirror of
https://github.com/hrydgard/ppsspp.git
synced 2024-11-27 07:20:49 +00:00
Merge pull request #17993 from unknownbrackets/x86-jit-minor
x86jit: Replacements, expose for profiling better
This commit is contained in:
commit
dddf63d057
@ -18,6 +18,7 @@
|
||||
#include "Common/Profiler/Profiler.h"
|
||||
#include "Common/StringUtils.h"
|
||||
#include "Common/TimeUtil.h"
|
||||
#include "Core/Debugger/SymbolMap.h"
|
||||
#include "Core/MIPS/MIPSTables.h"
|
||||
#include "Core/MIPS/IR/IRNativeCommon.h"
|
||||
|
||||
@ -451,8 +452,9 @@ bool IRNativeJit::DescribeCodePtr(const u8 *ptr, std::string &name) {
|
||||
break;
|
||||
}
|
||||
|
||||
// Used by profiling tools that don't like spaces.
|
||||
if (block_num == -1) {
|
||||
name = "(unknown or deleted block)";
|
||||
name = "unknownOrDeletedBlock";
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -460,7 +462,13 @@ bool IRNativeJit::DescribeCodePtr(const u8 *ptr, std::string &name) {
|
||||
if (block) {
|
||||
u32 start = 0, size = 0;
|
||||
block->GetRange(start, size);
|
||||
name = StringFromFormat("(block %d at %08x)", block_num, start);
|
||||
|
||||
// It helps to know which func this block is inside.
|
||||
const std::string label = g_symbolMap ? g_symbolMap->GetDescription(start) : "";
|
||||
if (!label.empty())
|
||||
name = StringFromFormat("block%d_%08x_%s", block_num, start, label.c_str());
|
||||
else
|
||||
name = StringFromFormat("block%d_%08x", block_num, start);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
@ -236,7 +236,7 @@ void MIPSState::UpdateCore(CPUCore desired) {
|
||||
switch (PSP_CoreParameter().cpuCore) {
|
||||
case CPUCore::JIT:
|
||||
case CPUCore::JIT_IR:
|
||||
INFO_LOG(CPU, "Switching to JIT");
|
||||
INFO_LOG(CPU, "Switching to JIT%s", PSP_CoreParameter().cpuCore == CPUCore::JIT_IR ? " IR" : "");
|
||||
if (oldjit) {
|
||||
std::lock_guard<std::recursive_mutex> guard(MIPSComp::jitLock);
|
||||
MIPSComp::jit = nullptr;
|
||||
@ -246,7 +246,7 @@ void MIPSState::UpdateCore(CPUCore desired) {
|
||||
break;
|
||||
|
||||
case CPUCore::IR_INTERPRETER:
|
||||
INFO_LOG(CPU, "Switching to IRJIT");
|
||||
INFO_LOG(CPU, "Switching to IR interpreter");
|
||||
if (oldjit) {
|
||||
std::lock_guard<std::recursive_mutex> guard(MIPSComp::jitLock);
|
||||
MIPSComp::jit = nullptr;
|
||||
|
@ -252,8 +252,11 @@ void RiscVJitBackend::FlushAll() {
|
||||
|
||||
bool RiscVJitBackend::DescribeCodePtr(const u8 *ptr, std::string &name) const {
|
||||
// Used in disassembly viewer.
|
||||
// Don't use spaces; profilers get confused or truncate them.
|
||||
if (ptr == dispatcherPCInSCRATCH1_) {
|
||||
name = "dispatcher (PC in SCRATCH1)";
|
||||
name = "dispatcherPCInSCRATCH1";
|
||||
} else if (ptr == outerLoopPCInSCRATCH1_) {
|
||||
name = "outerLoopPCInSCRATCH1";
|
||||
} else if (ptr == dispatcherNoCheck_) {
|
||||
name = "dispatcherNoCheck";
|
||||
} else if (ptr == saveStaticRegisters_) {
|
||||
@ -262,6 +265,8 @@ bool RiscVJitBackend::DescribeCodePtr(const u8 *ptr, std::string &name) const {
|
||||
name = "loadStaticRegisters";
|
||||
} else if (ptr == applyRoundingMode_) {
|
||||
name = "applyRoundingMode";
|
||||
} else if (ptr >= GetBasePtr() && ptr < GetBasePtr() + jitStartOffset_) {
|
||||
name = "fixedCode";
|
||||
} else {
|
||||
return IRNativeBackend::DescribeCodePtr(ptr, name);
|
||||
}
|
||||
|
@ -1,299 +0,0 @@
|
||||
#include "ppsspp_config.h"
|
||||
#if PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
|
||||
|
||||
#include "Core/MIPS/x86/IRToX86.h"
|
||||
|
||||
namespace MIPSComp {
|
||||
|
||||
// Initial attempt at converting IR directly to x86.
|
||||
// This is intended to be an easy way to benefit from the IR with the current infrastructure.
|
||||
// Later tries may go across multiple blocks and a different representation.
|
||||
|
||||
struct GPRMapping {
|
||||
Gen::OpArg dest;
|
||||
Gen::OpArg src1;
|
||||
Gen::OpArg src2;
|
||||
};
|
||||
|
||||
struct FPRMapping {
|
||||
Gen::OpArg dest;
|
||||
Gen::OpArg src1;
|
||||
Gen::OpArg src2;
|
||||
};
|
||||
|
||||
|
||||
class GreedyRegallocGPR {
|
||||
public:
|
||||
GPRMapping Map(IRInst inst, const IRMeta &meta);
|
||||
|
||||
private:
|
||||
|
||||
};
|
||||
|
||||
|
||||
GPRMapping GreedyRegallocGPR::Map(IRInst inst, const IRMeta &meta) {
|
||||
GPRMapping mapping;
|
||||
if (meta.types[0] == 'G') {
|
||||
|
||||
}
|
||||
// etc..
|
||||
return mapping;
|
||||
}
|
||||
|
||||
// Every 4 registers can also be mapped into an SSE register.
|
||||
// When changing from single to vec4 mapping, we'll just flush, for now.
|
||||
class GreedyRegallocFPR {
|
||||
public:
|
||||
FPRMapping Map(IRInst inst, const IRMeta &meta);
|
||||
};
|
||||
|
||||
FPRMapping GreedyRegallocFPR::Map(IRInst inst, const IRMeta &meta) {
|
||||
FPRMapping mapping;
|
||||
|
||||
return mapping;
|
||||
}
|
||||
|
||||
|
||||
// This requires that ThreeOpToTwoOp has been run as the last pass.
|
||||
void IRToX86::ConvertIRToNative(const IRInst *instructions, int count, const u32 *constants) {
|
||||
// Set up regcaches
|
||||
using namespace Gen;
|
||||
|
||||
GreedyRegallocGPR gprAlloc;
|
||||
GreedyRegallocFPR fprAlloc;
|
||||
|
||||
// Loop through all the instructions, emitting code as we go.
|
||||
// Note that we do need to implement them all - fallbacks are not permitted.
|
||||
for (int i = 0; i < count; i++) {
|
||||
const IRInst *inst = &instructions[i];
|
||||
const IRMeta &meta = *GetIRMeta(inst->op);
|
||||
GPRMapping gpr = gprAlloc.Map(*inst, meta);
|
||||
FPRMapping fpr = fprAlloc.Map(*inst, meta);
|
||||
|
||||
bool symmetric = false;
|
||||
switch (inst->op) {
|
||||
case IROp::Nop:
|
||||
_assert_(false);
|
||||
break;
|
||||
|
||||
// Output-only
|
||||
case IROp::SetConst:
|
||||
code_->MOV(32, gpr.dest, Imm32(constants[inst->src1]));
|
||||
break;
|
||||
case IROp::SetConstF:
|
||||
code_->MOV(32, gpr.dest, Imm32(constants[inst->src1]));
|
||||
break;
|
||||
|
||||
// Add gets to be special cased because we have LEA.
|
||||
case IROp::Add:
|
||||
if (gpr.dest.IsSimpleReg() && gpr.src1.IsSimpleReg() && gpr.src2.IsSimpleReg()) {
|
||||
code_->LEA(32, gpr.dest.GetSimpleReg(), MRegSum(gpr.src1.GetSimpleReg(), gpr.src2.GetSimpleReg()));
|
||||
break;
|
||||
}
|
||||
// Else fall through.
|
||||
// 3-op arithmetic that directly corresponds to x86
|
||||
// (often 2-op in practice if src1 == dst). x86 only does 2-op though so some of these will need splitting.
|
||||
case IROp::Sub:
|
||||
case IROp::And:
|
||||
case IROp::Or:
|
||||
case IROp::Xor:
|
||||
if (inst->src1 != inst->dest) {
|
||||
code_->MOV(32, gpr.dest, gpr.src1);
|
||||
}
|
||||
// Emit directly
|
||||
switch (inst->op) {
|
||||
case IROp::Add: code_->ADD(32, gpr.dest, gpr.src2); break;
|
||||
case IROp::Sub: code_->SUB(32, gpr.dest, gpr.src2); break;
|
||||
case IROp::And: code_->AND(32, gpr.dest, gpr.src2); break;
|
||||
case IROp::Or: code_->OR(32, gpr.dest, gpr.src2); break;
|
||||
case IROp::Xor: code_->XOR(32, gpr.dest, gpr.src2); break;
|
||||
}
|
||||
break;
|
||||
|
||||
// Variable shifts.
|
||||
case IROp::Shl:
|
||||
case IROp::Shr:
|
||||
case IROp::Sar:
|
||||
case IROp::Ror:
|
||||
|
||||
case IROp::Slt:
|
||||
case IROp::SltU:
|
||||
case IROp::MovZ:
|
||||
case IROp::MovNZ:
|
||||
case IROp::Max:
|
||||
case IROp::Min:
|
||||
break;
|
||||
|
||||
// 2-op arithmetic with constant
|
||||
case IROp::AddConst:
|
||||
case IROp::SubConst:
|
||||
case IROp::AndConst:
|
||||
case IROp::OrConst:
|
||||
case IROp::XorConst:
|
||||
case IROp::SltConst:
|
||||
case IROp::SltUConst:
|
||||
|
||||
// 2-op arithmetic with immediate
|
||||
case IROp::ShlImm:
|
||||
case IROp::ShrImm:
|
||||
case IROp::SarImm:
|
||||
case IROp::RorImm:
|
||||
|
||||
// 2-op arithmetic
|
||||
case IROp::Mov:
|
||||
code_->MOV(32, gpr.dest, gpr.src1);
|
||||
break;
|
||||
|
||||
case IROp::Neg:
|
||||
case IROp::Not:
|
||||
case IROp::Ext8to32:
|
||||
case IROp::Ext16to32:
|
||||
case IROp::ReverseBits:
|
||||
case IROp::BSwap16:
|
||||
case IROp::BSwap32:
|
||||
case IROp::Clz:
|
||||
if (inst->dest != inst->src1) {
|
||||
code_->NEG(32, gpr.dest); break;
|
||||
}
|
||||
break;
|
||||
// Multiplier control
|
||||
case IROp::MtLo:
|
||||
case IROp::MtHi:
|
||||
case IROp::MfLo:
|
||||
case IROp::MfHi:
|
||||
case IROp::Mult:
|
||||
case IROp::MultU:
|
||||
case IROp::Madd:
|
||||
case IROp::MaddU:
|
||||
case IROp::Msub:
|
||||
case IROp::MsubU:
|
||||
case IROp::Div:
|
||||
case IROp::DivU:
|
||||
|
||||
// Memory access
|
||||
case IROp::Load8:
|
||||
case IROp::Load8Ext:
|
||||
case IROp::Load16:
|
||||
case IROp::Load16Ext:
|
||||
case IROp::Load32:
|
||||
case IROp::LoadFloat:
|
||||
case IROp::Store8:
|
||||
case IROp::Store16:
|
||||
case IROp::Store32:
|
||||
case IROp::StoreFloat:
|
||||
case IROp::LoadVec4:
|
||||
case IROp::StoreVec4:
|
||||
|
||||
// Output-only SIMD functions
|
||||
case IROp::Vec4Init:
|
||||
case IROp::Vec4Shuffle:
|
||||
|
||||
// 2-op SIMD functions
|
||||
case IROp::Vec4Mov:
|
||||
code_->MOVAPS(fpr.dest.GetSimpleReg(), fpr.src1);
|
||||
break;
|
||||
case IROp::Vec4Neg:
|
||||
case IROp::Vec4Abs:
|
||||
break;
|
||||
case IROp::Vec4ClampToZero:
|
||||
code_->PXOR(XMM0, R(XMM0));
|
||||
code_->PMAXSW(XMM0, fpr.src1);
|
||||
code_->MOVAPD(fpr.dest, XMM0);
|
||||
break;
|
||||
case IROp::Vec4DuplicateUpperBitsAndShift1:
|
||||
case IROp::Vec2ClampToZero:
|
||||
|
||||
// 3-op SIMD functions
|
||||
case IROp::Vec4Add:
|
||||
case IROp::Vec4Sub:
|
||||
case IROp::Vec4Mul:
|
||||
case IROp::Vec4Div:
|
||||
|
||||
case IROp::Vec4Scale:
|
||||
case IROp::Vec4Dot:
|
||||
|
||||
// Pack-unpack
|
||||
case IROp::Vec2Unpack16To31:
|
||||
case IROp::Vec2Unpack16To32:
|
||||
case IROp::Vec4Unpack8To32:
|
||||
case IROp::Vec2Pack32To16:
|
||||
case IROp::Vec2Pack31To16:
|
||||
case IROp::Vec4Pack32To8:
|
||||
case IROp::Vec4Pack31To8:
|
||||
|
||||
case IROp::FCmpVfpuBit:
|
||||
case IROp::FCmpVfpuAggregate:
|
||||
case IROp::FCmovVfpuCC:
|
||||
|
||||
// Trancendental functions (non-simd)
|
||||
case IROp::FSin:
|
||||
case IROp::FCos:
|
||||
case IROp::FRSqrt:
|
||||
case IROp::FRecip:
|
||||
case IROp::FAsin:
|
||||
|
||||
// 3-Op FP
|
||||
case IROp::FAdd:
|
||||
case IROp::FSub:
|
||||
case IROp::FMul:
|
||||
case IROp::FDiv:
|
||||
case IROp::FMin:
|
||||
case IROp::FMax:
|
||||
|
||||
// 2-Op FP
|
||||
case IROp::FMov:
|
||||
case IROp::FAbs:
|
||||
case IROp::FSqrt:
|
||||
case IROp::FNeg:
|
||||
case IROp::FSat0_1:
|
||||
case IROp::FSatMinus1_1:
|
||||
case IROp::FSign:
|
||||
case IROp::FCeil:
|
||||
case IROp::FFloor:
|
||||
case IROp::FCmp:
|
||||
case IROp::FCvtSW:
|
||||
case IROp::FCvtWS:
|
||||
case IROp::FRound:
|
||||
case IROp::FTrunc:
|
||||
|
||||
// Cross moves
|
||||
case IROp::FMovFromGPR:
|
||||
case IROp::FMovToGPR:
|
||||
case IROp::FpCondFromReg:
|
||||
case IROp::FpCondToReg:
|
||||
case IROp::VfpuCtrlToReg:
|
||||
|
||||
// VFPU flag/control
|
||||
case IROp::SetCtrlVFPU:
|
||||
case IROp::SetCtrlVFPUReg:
|
||||
case IROp::SetCtrlVFPUFReg:
|
||||
|
||||
// Block Exits
|
||||
case IROp::ExitToConst:
|
||||
case IROp::ExitToReg:
|
||||
case IROp::ExitToConstIfEq:
|
||||
case IROp::ExitToConstIfNeq:
|
||||
case IROp::ExitToConstIfGtZ:
|
||||
case IROp::ExitToConstIfGeZ:
|
||||
case IROp::ExitToConstIfLtZ:
|
||||
case IROp::ExitToConstIfLeZ:
|
||||
case IROp::ExitToPC:
|
||||
|
||||
// Utilities
|
||||
case IROp::Downcount:
|
||||
case IROp::SetPC:
|
||||
case IROp::SetPCConst:
|
||||
case IROp::Syscall:
|
||||
case IROp::Interpret: // SLOW fallback. Can be made faster.
|
||||
case IROp::CallReplacement:
|
||||
case IROp::Break:
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} // namespace
|
||||
|
||||
#endif // PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
|
@ -1,23 +0,0 @@
|
||||
#include "Core/MIPS/IR/IRInst.h"
|
||||
#include "Common/x64Emitter.h"
|
||||
|
||||
namespace MIPSComp {
|
||||
|
||||
class IRToNativeInterface {
|
||||
public:
|
||||
virtual ~IRToNativeInterface() {}
|
||||
|
||||
virtual void ConvertIRToNative(const IRInst *instructions, int count, const u32 *constants) = 0;
|
||||
};
|
||||
|
||||
|
||||
class IRToX86 : public IRToNativeInterface {
|
||||
public:
|
||||
void SetCodeBlock(Gen::XCodeBlock *code) { code_ = code; }
|
||||
void ConvertIRToNative(const IRInst *instructions, int count, const u32 *constants) override;
|
||||
|
||||
private:
|
||||
Gen::XCodeBlock *code_;
|
||||
};
|
||||
|
||||
} // namespace
|
@ -58,6 +58,7 @@ void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) {
|
||||
//regs_.EmitSaveStaticRegisters();
|
||||
RET();
|
||||
|
||||
// Note: needs to not modify EAX, or to save it if it does.
|
||||
loadStaticRegisters_ = AlignCode16();
|
||||
//regs_.EmitLoadStaticRegisters();
|
||||
//MOV(32, R(DOWNCOUNTREG), MDisp(CTXREG, downcountOffset));
|
||||
|
@ -124,6 +124,14 @@ void X64JitBackend::CompIR_System(IRInst inst) {
|
||||
break;
|
||||
|
||||
case IROp::CallReplacement:
|
||||
FlushAll();
|
||||
SaveStaticRegisters();
|
||||
ABI_CallFunction(GetReplacementFunc(inst.constant)->replaceFunc);
|
||||
LoadStaticRegisters();
|
||||
//SUB(32, R(DOWNCOUNTREG), R(DOWNCOUNTREG), R(EAX));
|
||||
SUB(32, MDisp(CTXREG, downcountOffset), R(EAX));
|
||||
break;
|
||||
|
||||
case IROp::Break:
|
||||
CompIR_Generic(inst);
|
||||
break;
|
||||
|
@ -242,9 +242,12 @@ void X64JitBackend::FlushAll() {
|
||||
}
|
||||
|
||||
bool X64JitBackend::DescribeCodePtr(const u8 *ptr, std::string &name) const {
|
||||
// Used in disassembly viewer.
|
||||
// Used in disassembly viewer and profiling tools.
|
||||
// Don't use spaces; profilers get confused or truncate them.
|
||||
if (ptr == dispatcherPCInSCRATCH1_) {
|
||||
name = "dispatcher (PC in SCRATCH1)";
|
||||
name = "dispatcherPCInSCRATCH1";
|
||||
} else if (ptr == outerLoopPCInSCRATCH1_) {
|
||||
name = "outerLoopPCInSCRATCH1";
|
||||
} else if (ptr == dispatcherNoCheck_) {
|
||||
name = "dispatcherNoCheck";
|
||||
} else if (ptr == saveStaticRegisters_) {
|
||||
@ -255,6 +258,8 @@ bool X64JitBackend::DescribeCodePtr(const u8 *ptr, std::string &name) const {
|
||||
name = "restoreRoundingMode";
|
||||
} else if (ptr == applyRoundingMode_) {
|
||||
name = "applyRoundingMode";
|
||||
} else if (ptr >= GetBasePtr() && ptr < GetBasePtr() + jitStartOffset_) {
|
||||
name = "fixedCode";
|
||||
} else {
|
||||
return IRNativeBackend::DescribeCodePtr(ptr, name);
|
||||
}
|
||||
|
@ -299,7 +299,6 @@
|
||||
<ClInclude Include="..\..\Core\MIPS\MIPSTables.h" />
|
||||
<ClInclude Include="..\..\Core\MIPS\MIPSVFPUFallbacks.h" />
|
||||
<ClInclude Include="..\..\Core\MIPS\MIPSVFPUUtils.h" />
|
||||
<ClInclude Include="..\..\Core\MIPS\x86\IRToX86.h" />
|
||||
<ClInclude Include="..\..\Core\MIPS\x86\Jit.h" />
|
||||
<ClInclude Include="..\..\Core\MIPS\x86\JitSafeMem.h" />
|
||||
<ClInclude Include="..\..\Core\MIPS\x86\RegCache.h" />
|
||||
@ -570,7 +569,6 @@
|
||||
<ClCompile Include="..\..\Core\MIPS\x86\CompLoadStore.cpp" />
|
||||
<ClCompile Include="..\..\Core\MIPS\x86\CompReplace.cpp" />
|
||||
<ClCompile Include="..\..\Core\MIPS\x86\CompVFPU.cpp" />
|
||||
<ClCompile Include="..\..\Core\MIPS\x86\IRToX86.cpp" />
|
||||
<ClCompile Include="..\..\Core\MIPS\x86\Jit.cpp" />
|
||||
<ClCompile Include="..\..\Core\MIPS\x86\JitSafeMem.cpp" />
|
||||
<ClCompile Include="..\..\Core\MIPS\x86\RegCache.cpp" />
|
||||
|
@ -174,9 +174,6 @@
|
||||
<ClCompile Include="..\..\Core\MIPS\x86\CompVFPU.cpp">
|
||||
<Filter>MIPS\x86</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\Core\MIPS\x86\IRToX86.cpp">
|
||||
<Filter>MIPS\x86</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="..\..\Core\MIPS\x86\Jit.cpp">
|
||||
<Filter>MIPS\x86</Filter>
|
||||
</ClCompile>
|
||||
@ -1212,9 +1209,6 @@
|
||||
<ClInclude Include="..\..\Core\MIPS\JitCommon\JitState.h">
|
||||
<Filter>MIPS\JitCommon</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\Core\MIPS\x86\IRToX86.h">
|
||||
<Filter>MIPS\x86</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="..\..\Core\MIPS\x86\Jit.h">
|
||||
<Filter>MIPS\x86</Filter>
|
||||
</ClInclude>
|
||||
|
Loading…
Reference in New Issue
Block a user