Merge pull request #10514 from unknownbrackets/irjit

irjit: Add ini option to precompile functions
This commit is contained in:
Henrik Rydgård 2018-01-08 17:47:54 +01:00 committed by GitHub
commit 62242601ef
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
13 changed files with 257 additions and 47 deletions

View File

@ -406,6 +406,7 @@ static ConfigSetting cpuSettings[] = {
ConfigSetting("FastMemoryAccess", &g_Config.bFastMemory, true, true, true),
ReportedConfigSetting("FuncReplacements", &g_Config.bFuncReplacements, true, true, true),
ConfigSetting("HideSlowWarnings", &g_Config.bHideSlowWarnings, false, true, false),
ConfigSetting("PreloadFunctions", &g_Config.bPreloadFunctions, false, true, true),
ReportedConfigSetting("CPUSpeed", &g_Config.iLockedCPUSpeed, 0, true, true),
ConfigSetting(false),

View File

@ -126,6 +126,7 @@ public:
bool bForceLagSync;
bool bFuncReplacements;
bool bHideSlowWarnings;
bool bPreloadFunctions;
bool bSeparateSASThread;
bool bSeparateIOThread;

View File

@ -712,6 +712,7 @@ void ImportFuncSymbol(const FuncSymbolImport &func, bool reimporting) {
}
WriteSyscall(func.moduleName, func.nid, func.stubAddr);
currentMIPS->InvalidateICache(func.stubAddr, 8);
MIPSAnalyst::PrecompileFunction(func.stubAddr, 8);
return;
}
@ -730,6 +731,7 @@ void ImportFuncSymbol(const FuncSymbolImport &func, bool reimporting) {
}
WriteFuncStub(func.stubAddr, it->symAddr);
currentMIPS->InvalidateICache(func.stubAddr, 8);
MIPSAnalyst::PrecompileFunction(func.stubAddr, 8);
return;
}
}
@ -768,6 +770,7 @@ void ExportFuncSymbol(const FuncSymbolExport &func) {
INFO_LOG(LOADER, "Resolving function %s/%08x", func.moduleName, func.nid);
WriteFuncStub(it->stubAddr, func.symAddr);
currentMIPS->InvalidateICache(it->stubAddr, 8);
MIPSAnalyst::PrecompileFunction(it->stubAddr, 8);
}
}
}
@ -1450,6 +1453,9 @@ static Module *__KernelLoadELFFromPtr(const u8 *ptr, size_t elfSize, u32 loadAdd
// use module_start_func instead of entry_addr if entry_addr is 0
if (module->nm.entry_addr == 0)
module->nm.entry_addr = module->nm.module_start_func;
MIPSAnalyst::PrecompileFunctions();
} else {
module->nm.entry_addr = -1;
}

View File

@ -26,7 +26,7 @@
#include "Common/CommonTypes.h"
#include "Core/HLE/HLE.h"
#include "Core/HLE/HLETables.h"
#include "Core/MIPS/MIPSInt.h"
#include "Core/MIPS/MIPSAnalyst.h"
#include "Core/MIPS/MIPSCodeUtils.h"
#include "Core/MIPS/MIPS.h"
#include "Core/CoreTiming.h"
@ -887,6 +887,7 @@ static void __KernelWriteFakeSysCall(u32 nid, u32 *ptr, u32 &pos)
*ptr = pos;
pos += 8;
WriteSyscall("FakeSysCalls", nid, *ptr);
MIPSAnalyst::PrecompileFunction(*ptr, 8);
}
void __KernelThreadingInit()

View File

@ -295,11 +295,12 @@ void IRFrontend::Comp_Jump(MIPSOpcode op) {
// Might be a stubbed address or something?
if (!Memory::IsValidAddress(targetAddr)) {
if (js.nextExit == 0) {
// If preloading, flush - this block will likely be fixed later.
if (js.preloading)
js.cancel = true;
else
ERROR_LOG_REPORT(JIT, "Jump to invalid address: %08x", targetAddr);
} else {
js.compiling = false;
}
js.compiling = false;
// TODO: Mark this block dirty or something? May be indication it will be changed by imports.
return;
}

View File

@ -219,8 +219,9 @@ MIPSOpcode IRFrontend::GetOffsetInstruction(int offset) {
return Memory::Read_Instruction(GetCompilerPC() + 4 * offset);
}
void IRFrontend::DoJit(u32 em_address, std::vector<IRInst> &instructions, u32 &mipsBytes) {
void IRFrontend::DoJit(u32 em_address, std::vector<IRInst> &instructions, u32 &mipsBytes, bool preload) {
js.cancel = false;
js.preloading = preload;
js.blockStart = em_address;
js.compilerPC = em_address;
js.lastContinuedPC = 0;
@ -246,6 +247,11 @@ void IRFrontend::DoJit(u32 em_address, std::vector<IRInst> &instructions, u32 &m
js.numInstructions++;
}
if (js.cancel) {
// Clear the instructions to signal this was not compiled.
ir.Clear();
}
mipsBytes = js.compilerPC - em_address;
IRWriter simplified;

View File

@ -88,7 +88,7 @@ public:
void DoState(PointerWrap &p);
bool CheckRounding(u32 blockAddress); // returns true if we need a do-over
void DoJit(u32 em_address, std::vector<IRInst> &instructions, u32 &mipsBytes);
void DoJit(u32 em_address, std::vector<IRInst> &instructions, u32 &mipsBytes, bool preload);
void EatPrefix() override {
js.EatPrefix();

View File

@ -16,6 +16,7 @@
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
#include "base/logging.h"
#include "ext/xxhash.h"
#include "profiler/profiler.h"
#include "Common/ChunkFile.h"
#include "Common/StringUtils.h"
@ -66,27 +67,136 @@ void IRJit::InvalidateCacheAt(u32 em_address, int length) {
void IRJit::Compile(u32 em_address) {
PROFILE_THIS_SCOPE("jitc");
int block_num = blocks_.AllocateBlock(em_address);
if ((block_num & ~MIPS_EMUHACK_VALUE_MASK) != 0) {
// Ran out of block numbers - need to reset.
ERROR_LOG(JIT, "Ran out of block numbers, clearing cache");
ClearCache();
block_num = blocks_.AllocateBlock(em_address);
if (g_Config.bPreloadFunctions) {
// Look to see if we've preloaded this block.
int block_num = blocks_.FindPreloadBlock(em_address);
if (block_num != -1) {
IRBlock *b = blocks_.GetBlock(block_num);
// Okay, let's link and finalize the block now.
b->Finalize(block_num);
if (b->IsValid()) {
// Success, we're done.
return;
}
}
}
IRBlock *b = blocks_.GetBlock(block_num);
std::vector<IRInst> instructions;
u32 mipsBytes;
frontend_.DoJit(em_address, instructions, mipsBytes);
b->SetInstructions(instructions);
b->SetOriginalSize(mipsBytes);
// Overwrites the first instruction, and also updates stats.
blocks_.FinalizeBlock(block_num);
if (!CompileBlock(em_address, instructions, mipsBytes, false)) {
// Ran out of block numbers - need to reset.
ERROR_LOG(JIT, "Ran out of block numbers, clearing cache");
ClearCache();
CompileBlock(em_address, instructions, mipsBytes, false);
}
if (frontend_.CheckRounding(em_address)) {
// Our assumptions are all wrong so it's clean-slate time.
ClearCache();
Compile(em_address);
CompileBlock(em_address, instructions, mipsBytes, false);
}
}
bool IRJit::CompileBlock(u32 em_address, std::vector<IRInst> &instructions, u32 &mipsBytes, bool preload) {
frontend_.DoJit(em_address, instructions, mipsBytes, preload);
if (instructions.empty()) {
_dbg_assert_(JIT, preload);
// We return true when preloading so it doesn't abort.
return preload;
}
int block_num = blocks_.AllocateBlock(em_address);
if ((block_num & ~MIPS_EMUHACK_VALUE_MASK) != 0) {
// Out of block numbers. Caller will handle.
return false;
}
IRBlock *b = blocks_.GetBlock(block_num);
b->SetInstructions(instructions);
b->SetOriginalSize(mipsBytes);
if (preload) {
// Hash, then only update page stats, don't link yet.
b->UpdateHash();
blocks_.FinalizeBlock(block_num, true);
} else {
// Overwrites the first instruction, and also updates stats.
// TODO: Should we always hash? Then we can reuse blocks.
blocks_.FinalizeBlock(block_num);
}
return true;
}
void IRJit::CompileFunction(u32 start_address, u32 length) {
PROFILE_THIS_SCOPE("jitc");
// Note: we don't actually write emuhacks yet, so we can validate hashes.
// This way, if the game changes the code afterward, we'll catch even without icache invalidation.
// We may go up and down from branches, so track all block starts done here.
std::set<u32> doneAddresses;
std::vector<u32> pendingAddresses;
pendingAddresses.push_back(start_address);
while (!pendingAddresses.empty()) {
u32 em_address = pendingAddresses.back();
pendingAddresses.pop_back();
// To be safe, also check if a real block is there. This can be a runtime module load.
u32 inst = Memory::ReadUnchecked_U32(em_address);
if (MIPS_IS_RUNBLOCK(inst) || doneAddresses.find(em_address) != doneAddresses.end()) {
// Already compiled this address.
continue;
}
std::vector<IRInst> instructions;
u32 mipsBytes;
if (!CompileBlock(em_address, instructions, mipsBytes, true)) {
// Ran out of block numbers - let's hope there's no more code it needs to run.
// Will flush when actually compiling.
ERROR_LOG(JIT, "Ran out of block numbers while compiling function");
return;
}
doneAddresses.insert(em_address);
for (const IRInst &inst : instructions) {
u32 exit = 0;
switch (inst.op) {
case IROp::ExitToConst:
case IROp::ExitToConstIfEq:
case IROp::ExitToConstIfNeq:
case IROp::ExitToConstIfGtZ:
case IROp::ExitToConstIfGeZ:
case IROp::ExitToConstIfLtZ:
case IROp::ExitToConstIfLeZ:
case IROp::ExitToConstIfFpTrue:
case IROp::ExitToConstIfFpFalse:
exit = inst.constant;
break;
case IROp::ExitToPC:
case IROp::Break:
// Don't add any, we'll do block end anyway (for jal, etc.)
exit = 0;
break;
default:
exit = 0;
break;
}
// Only follow jumps internal to the function.
if (exit != 0 && exit >= start_address && exit < start_address + length) {
// Even if it's a duplicate, we check at loop start.
pendingAddresses.push_back(exit);
}
}
// Also include after the block for jal returns.
if (em_address + mipsBytes < start_address + length) {
pendingAddresses.push_back(em_address + mipsBytes);
}
}
}
@ -166,8 +276,10 @@ void IRBlockCache::InvalidateICache(u32 address, u32 length) {
}
}
void IRBlockCache::FinalizeBlock(int i) {
blocks_[i].Finalize(i);
void IRBlockCache::FinalizeBlock(int i, bool preload) {
if (!preload) {
blocks_[i].Finalize(i);
}
u32 startAddr, size;
blocks_[i].GetRange(startAddr, size);
@ -185,6 +297,27 @@ u32 IRBlockCache::AddressToPage(u32 addr) const {
return (addr & 0x3FFFFFFF) >> 10;
}
int IRBlockCache::FindPreloadBlock(u32 em_address) {
u32 page = AddressToPage(em_address);
auto iter = byPage_.find(page);
if (iter == byPage_.end())
return -1;
const std::vector<int> &blocksInPage = iter->second;
for (int i : blocksInPage) {
u32 start, mipsBytes;
blocks_[i].GetRange(start, mipsBytes);
if (start == em_address) {
if (blocks_[i].HashMatches()) {
return i;
}
}
}
return -1;
}
std::vector<u32> IRBlockCache::SaveAndClearEmuHackOps() {
std::vector<u32> result;
result.resize(blocks_.size());
@ -277,14 +410,18 @@ int IRBlockCache::GetBlockNumberFromStartAddress(u32 em_address, bool realBlocks
return -1;
const std::vector<int> &blocksInPage = iter->second;
int best = -1;
for (int i : blocksInPage) {
uint32_t start, size;
blocks_[i].GetRange(start, size);
if (start == em_address) {
return i;
best = i;
if (blocks_[i].IsValid()) {
return i;
}
}
}
return -1;
return best;
}
bool IRBlock::HasOriginalFirstOp() const {
@ -301,9 +438,13 @@ bool IRBlock::RestoreOriginalFirstOp(int number) {
}
void IRBlock::Finalize(int number) {
origFirstOpcode_ = Memory::Read_Opcode_JIT(origAddr_);
MIPSOpcode opcode = MIPSOpcode(MIPS_EMUHACK_OPCODE | number);
Memory::Write_Opcode_JIT(origAddr_, opcode);
// Check it wasn't invalidated, in case this is after preload.
// TODO: Allow reusing blocks when the code matches hash_ again, instead.
if (origAddr_) {
origFirstOpcode_ = Memory::Read_Opcode_JIT(origAddr_);
MIPSOpcode opcode = MIPSOpcode(MIPS_EMUHACK_OPCODE | number);
Memory::Write_Opcode_JIT(origAddr_, opcode);
}
}
void IRBlock::Destroy(int number) {
@ -317,6 +458,24 @@ void IRBlock::Destroy(int number) {
}
}
u64 IRBlock::CalculateHash() const {
if (origAddr_) {
// This is unfortunate. In case of emuhacks, we have to make a copy.
std::vector<u32> buffer;
buffer.resize(origSize_ / 4);
size_t pos = 0;
for (u32 off = 0; off < origSize_; off += 4) {
// Let's actually hash the replacement, if any.
MIPSOpcode instr = Memory::ReadUnchecked_Instruction(origAddr_ + off, false);
buffer[pos++] = instr.encoding;
}
return XXH64(&buffer[0], origSize_, 0x9A5C33B8);
}
return 0;
}
bool IRBlock::OverlapsRange(u32 addr, u32 size) const {
addr &= 0x3FFFFFFF;
u32 origAddr = origAddr_ & 0x3FFFFFFF;

View File

@ -46,6 +46,7 @@ public:
origAddr_ = b.origAddr_;
origSize_ = b.origSize_;
origFirstOpcode_ = b.origFirstOpcode_;
hash_ = b.hash_;
b.instr_ = nullptr;
}
@ -66,10 +67,16 @@ public:
MIPSOpcode GetOriginalFirstOp() const { return origFirstOpcode_; }
bool HasOriginalFirstOp() const;
bool RestoreOriginalFirstOp(int number);
bool IsValid() const { return origAddr_ != 0; }
bool IsValid() const { return origAddr_ != 0 && origFirstOpcode_.encoding != 0x68FFFFFF; }
void SetOriginalSize(u32 size) {
origSize_ = size;
}
void UpdateHash() {
hash_ = CalculateHash();
}
bool HashMatches() const {
return origAddr_ && hash_ == CalculateHash();
}
bool OverlapsRange(u32 addr, u32 size) const;
void GetRange(u32 &start, u32 &size) const {
@ -81,11 +88,14 @@ public:
void Destroy(int number);
private:
u64 CalculateHash() const;
IRInst *instr_;
u16 numInstructions_;
u32 origAddr_;
u32 origSize_;
MIPSOpcode origFirstOpcode_;
u64 hash_ = 0;
MIPSOpcode origFirstOpcode_ = MIPSOpcode(0x68FFFFFF);
};
class IRBlockCache : public JitBlockCacheDebugInterface {
@ -93,7 +103,7 @@ public:
IRBlockCache() {}
void Clear();
void InvalidateICache(u32 address, u32 length);
void FinalizeBlock(int i);
void FinalizeBlock(int i, bool preload = false);
int GetNumBlocks() const override { return (int)blocks_.size(); }
int AllocateBlock(int emAddr) {
blocks_.push_back(IRBlock(emAddr));
@ -107,6 +117,8 @@ public:
}
}
int FindPreloadBlock(u32 em_address);
std::vector<u32> SaveAndClearEmuHackOps();
void RestoreSavedEmuHackOps(std::vector<u32> saved);
@ -133,6 +145,7 @@ public:
void RunLoopUntil(u64 globalticks) override;
void Compile(u32 em_address) override; // Compiles a block at current MIPS PC
void CompileFunction(u32 start_address, u32 length) override;
bool DescribeCodePtr(const u8 *ptr, std::string &name) override;
// Not using a regular block cache.
@ -152,6 +165,7 @@ public:
void UnlinkBlock(u8 *checkedEntry, u32 originalAddress) override;
private:
bool CompileBlock(u32 em_address, std::vector<IRInst> &instructions, u32 &mipsBytes, bool preload);
bool ReplaceJalTo(u32 dest);
JitOptions jo;

View File

@ -129,6 +129,7 @@ namespace MIPSComp {
virtual void DoState(PointerWrap &p) = 0;
virtual void RunLoopUntil(u64 globalticks) = 0;
virtual void Compile(u32 em_address) = 0;
virtual void CompileFunction(u32 start_address, u32 length) { }
virtual void ClearCache() = 0;
virtual MIPSOpcode GetOriginalOp(MIPSOpcode op) = 0;

View File

@ -56,15 +56,6 @@ namespace MIPSComp {
AFTER_MEMCHECK_CLEANUP = 0x04,
};
JitState()
: hasSetRounding(0),
lastSetRounding(0),
currentRoundingFunc(nullptr),
startDefaultPrefix(true),
prefixSFlag(PREFIX_UNKNOWN),
prefixTFlag(PREFIX_UNKNOWN),
prefixDFlag(PREFIX_UNKNOWN) {}
u32 compilerPC;
u32 blockStart;
u32 lastContinuedPC;
@ -78,20 +69,21 @@ namespace MIPSComp {
int numInstructions;
bool compiling; // TODO: get rid of this in favor of using analysis results to determine end of block
bool hadBreakpoints;
bool preloading = false;
JitBlock *curBlock;
u8 hasSetRounding;
u8 lastSetRounding;
const u8 *currentRoundingFunc;
u8 hasSetRounding = 0;
u8 lastSetRounding = 0;
const u8 *currentRoundingFunc = nullptr;
// VFPU prefix magic
bool startDefaultPrefix;
bool startDefaultPrefix = true;
u32 prefixS;
u32 prefixT;
u32 prefixD;
PrefixState prefixSFlag;
PrefixState prefixTFlag;
PrefixState prefixDFlag;
PrefixState prefixSFlag = PREFIX_UNKNOWN;
PrefixState prefixTFlag = PREFIX_UNKNOWN;
PrefixState prefixDFlag = PREFIX_UNKNOWN;
void PrefixStart() {
if (startDefaultPrefix) {

View File

@ -21,6 +21,7 @@
#include <unordered_set>
#include <mutex>
#include "base/timeutil.h"
#include "ext/cityhash/city.h"
#include "Common/FileUtil.h"
#include "Core/Config.h"
@ -904,6 +905,32 @@ skip:
}
}
void PrecompileFunction(u32 startAddr, u32 length) {
// Direct calls to this ignore the bPreloadFunctions flag, since it's just for stubs.
if (MIPSComp::jit) {
MIPSComp::jit->CompileFunction(startAddr, length);
}
}
void PrecompileFunctions() {
if (!g_Config.bPreloadFunctions) {
return;
}
std::lock_guard<std::recursive_mutex> guard(functions_lock);
// TODO: Load from cache file if available instead.
double st = real_time_now();
for (auto iter = functions.begin(), end = functions.end(); iter != end; iter++) {
const AnalyzedFunction &f = *iter;
PrecompileFunction(f.start, f.end - f.start + 4);
}
double et = real_time_now();
NOTICE_LOG(JIT, "Precompiled %d MIPS functions in %0.2f milliseconds", (int)functions.size(), (et - st) * 1000.0);
}
static const char *DefaultFunctionName(char buffer[256], u32 startAddr) {
sprintf(buffer, "z_un_%08x", startAddr);
return buffer;

View File

@ -107,7 +107,8 @@ namespace MIPSAnalyst
void RegisterFunction(u32 startAddr, u32 size, const char *name);
void ScanForFunctions(u32 startAddr, u32 endAddr, bool insertSymbols);
void ForgetFunctions(u32 startAddr, u32 endAddr);
void CompileLeafs();
void PrecompileFunctions();
void PrecompileFunction(u32 startAddr, u32 length);
void SetHashMapFilename(const std::string& filename = "");
void LoadBuiltinHashMap();