Merge pull request #6999 from unknownbrackets/jit-minor

Clean up jit invalidation with replaced and continued blocks
This commit is contained in:
Henrik Rydgård 2014-10-13 18:31:53 +02:00
commit 54a87800d3
12 changed files with 221 additions and 70 deletions

View File

@ -94,6 +94,7 @@ void Jit::BranchRSRTComp(MIPSOpcode op, ArmGen::CCFlags cc, bool likely)
// Branch taken. Always compile the delay slot, and then go to dest.
CompileDelaySlot(DELAYSLOT_NICE);
AddContinuedBlock(targetAddr);
// Account for the increment in the loop.
js.compilerPC = targetAddr - 4;
// In case the delay slot was a break or something.
@ -209,6 +210,7 @@ void Jit::BranchRSZeroComp(MIPSOpcode op, ArmGen::CCFlags cc, bool andLink, bool
if (andLink)
gpr.SetImm(MIPS_REG_RA, js.compilerPC + 8);
AddContinuedBlock(targetAddr);
// Account for the increment in the loop.
js.compilerPC = targetAddr - 4;
// In case the delay slot was a break or something.
@ -461,6 +463,7 @@ void Jit::Comp_Jump(MIPSOpcode op) {
case 2: //j
CompileDelaySlot(DELAYSLOT_NICE);
if (jo.continueJumps && js.numInstructions < jo.continueMaxInstructions) {
AddContinuedBlock(targetAddr);
// Account for the increment in the loop.
js.compilerPC = targetAddr - 4;
// In case the delay slot was a break or something.
@ -478,6 +481,7 @@ void Jit::Comp_Jump(MIPSOpcode op) {
gpr.SetImm(MIPS_REG_RA, js.compilerPC + 8);
CompileDelaySlot(DELAYSLOT_NICE);
if (jo.continueJumps && js.numInstructions < jo.continueMaxInstructions) {
AddContinuedBlock(targetAddr);
// Account for the increment in the loop.
js.compilerPC = targetAddr - 4;
// In case the delay slot was a break or something.
@ -537,6 +541,7 @@ void Jit::Comp_JumpReg(MIPSOpcode op)
}
if (jo.continueJumps && gpr.IsImm(rs) && js.numInstructions < jo.continueMaxInstructions) {
AddContinuedBlock(gpr.GetImm(rs));
// Account for the increment in the loop.
js.compilerPC = gpr.GetImm(rs) - 4;
// In case the delay slot was a break or something.

View File

@ -1252,8 +1252,6 @@ namespace MIPSComp
gpr.MapReg(rt);
STR(gpr.R(rt), CTXREG, offsetof(MIPSState, vfpuCtrl) + 4 * (imm - 128));
}
//gpr.BindToRegister(rt, true, false);
//MOV(32, M(&currentMIPS->vfpuCtrl[imm - 128]), gpr.R(rt));
// TODO: Optimization if rt is Imm?
// Set these BEFORE disable!

View File

@ -21,6 +21,7 @@
#include "Core/Config.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/Debugger/SymbolMap.h"
#include "Core/MemMap.h"
#include "Core/MIPS/MIPS.h"
#include "Core/MIPS/MIPSCodeUtils.h"
@ -67,8 +68,6 @@ ArmJitOptions::ArmJitOptions() {
useBackJump = false;
useForwardJump = false;
cachePointers = true;
// WARNING: These options don't work properly with cache clearing or jit compare.
// Need to find a smart way to handle before enabling.
immBranches = false;
continueBranches = false;
continueJumps = false;
@ -246,6 +245,8 @@ const u8 *Jit::DoJit(u32 em_address, JitBlock *b)
{
js.cancel = false;
js.blockStart = js.compilerPC = mips_->pc;
js.lastContinuedPC = 0;
js.initialBlockSize = 0;
js.nextExit = 0;
js.downcountAmount = 0;
js.curBlock = b;
@ -355,10 +356,27 @@ const u8 *Jit::DoJit(u32 em_address, JitBlock *b)
// Don't forget to zap the newly written instructions in the instruction cache!
FlushIcache();
b->originalSize = js.numInstructions;
if (js.lastContinuedPC == 0)
b->originalSize = js.numInstructions;
else
{
// We continued at least once. Add the last proxy and set the originalSize correctly.
blocks.ProxyBlock(js.blockStart, js.lastContinuedPC, (js.compilerPC - js.lastContinuedPC) / sizeof(u32), GetCodePtr());
b->originalSize = js.initialBlockSize;
}
return b->normalEntry;
}
void Jit::AddContinuedBlock(u32 dest)
{
// The first block is the root block. When we continue, we create proxy blocks after that.
if (js.lastContinuedPC == 0)
js.initialBlockSize = js.numInstructions;
else
blocks.ProxyBlock(js.blockStart, js.lastContinuedPC, (js.compilerPC - js.lastContinuedPC) / sizeof(u32), GetCodePtr());
js.lastContinuedPC = dest;
}
bool Jit::DescribeCodePtr(const u8 *ptr, std::string &name)
{
// TODO: Not used by anything yet.
@ -417,8 +435,7 @@ bool Jit::ReplaceJalTo(u32 dest) {
// No writing exits, keep going!
// Add a trigger so that if the inlined code changes, we invalidate this block.
// TODO: Correctly determine the size of this block.
blocks.ProxyBlock(js.blockStart, dest, 4, GetCodePtr());
blocks.ProxyBlock(js.blockStart, dest, symbolMap.GetFunctionSize(dest) / sizeof(u32), GetCodePtr());
return true;
}

View File

@ -68,6 +68,8 @@ public:
void CompileDelaySlot(int flags);
void EatInstruction(MIPSOpcode op);
void AddContinuedBlock(u32 dest);
void Comp_RunBlock(MIPSOpcode op);
void Comp_ReplacementFunc(MIPSOpcode op);

View File

@ -119,11 +119,11 @@ void JitBlockCache::Shutdown() {
// This clears the JIT cache. It's called from JitCache.cpp when the JIT cache
// is full and when saving and loading states.
void JitBlockCache::Clear() {
block_map_.clear();
proxyBlockMap_.clear();
for (int i = 0; i < num_blocks_; i++)
DestroyBlock(i, false);
links_to_.clear();
block_map_.clear();
proxyBlockIndices_.clear();
num_blocks_ = 0;
blockMemRanges_[JITBLOCK_RANGE_SCRATCH] = std::make_pair(0xFFFFFFFF, 0x00000000);
@ -149,6 +149,7 @@ int JitBlockCache::AllocateBlock(u32 startAddress) {
int num = GetBlockNumberFromStartAddress(startAddress, false);
if (num >= 0) {
if (blocks_[num].IsPureProxy()) {
RemoveBlockMap(num);
blocks_[num].invalid = true;
b.proxyFor = new std::vector<u32>();
*b.proxyFor = *blocks_[num].proxyFor;
@ -175,7 +176,7 @@ void JitBlockCache::ProxyBlock(u32 rootAddress, u32 startAddress, u32 size, cons
// instead of creating a new block.
int num = GetBlockNumberFromStartAddress(startAddress, false);
if (num != -1) {
INFO_LOG(HLE, "Adding proxy root %08x to block at %08x", rootAddress, startAddress);
DEBUG_LOG(HLE, "Adding proxy root %08x to block at %08x", rootAddress, startAddress);
if (!blocks_[num].proxyFor) {
blocks_[num].proxyFor = new std::vector<u32>();
}
@ -199,10 +200,41 @@ void JitBlockCache::ProxyBlock(u32 rootAddress, u32 startAddress, u32 size, cons
// Make binary searches and stuff work ok
b.normalEntry = codePtr;
b.checkedEntry = codePtr;
proxyBlockIndices_.push_back(num_blocks_);
proxyBlockMap_.insert(std::make_pair(startAddress, num_blocks_));
AddBlockMap(num_blocks_);
num_blocks_++; //commit the current block
}
void JitBlockCache::AddBlockMap(int block_num) {
const JitBlock &b = blocks_[block_num];
// Convert the logical address to a physical address for the block map
// Yeah, this'll work fine for PSP too I think.
u32 pAddr = b.originalAddress & 0x1FFFFFFF;
block_map_[std::make_pair(pAddr + 4 * b.originalSize, pAddr)] = block_num;
}
void JitBlockCache::RemoveBlockMap(int block_num) {
const JitBlock &b = blocks_[block_num];
if (b.invalid) {
return;
}
const u32 pAddr = b.originalAddress & 0x1FFFFFFF;
auto it = block_map_.find(std::make_pair(pAddr + 4 * b.originalSize - 1, pAddr));
if (it != block_map_.end() && it->second == block_num) {
block_map_.erase(it);
} else {
// It wasn't in there, or it has the wrong key. Let's search...
for (auto it = block_map_.begin(); it != block_map_.end(); ++it) {
if (it->second == block_num) {
block_map_.erase(it);
break;
}
}
}
}
static void ExpandRange(std::pair<u32, u32> &range, u32 newStart, u32 newEnd) {
range.first = std::min(range.first, newStart);
range.second = std::max(range.second, newEnd);
@ -215,12 +247,9 @@ void JitBlockCache::FinalizeBlock(int block_num, bool block_link) {
MIPSOpcode opcode = GetEmuHackOpForBlock(block_num);
Memory::Write_Opcode_JIT(b.originalAddress, opcode);
// Convert the logical address to a physical address for the block map
// Yeah, this'll work fine for PSP too I think.
u32 pAddr = b.originalAddress & 0x1FFFFFFF;
AddBlockMap(block_num);
u32 latestExit = 0;
block_map_[std::make_pair(pAddr + 4 * b.originalSize - 1, pAddr)] = block_num;
if (block_link) {
for (int i = 0; i < MAX_JIT_BLOCK_EXITS; i++) {
if (b.exitAddress[i] != INVALID_EXIT) {
@ -323,9 +352,10 @@ int JitBlockCache::GetBlockNumberFromStartAddress(u32 addr, bool realBlocksOnly)
int bl = GetBlockNumberFromEmuHackOp(inst);
if (bl < 0) {
if (!realBlocksOnly) {
// Wasn't an emu hack op, look through proxyBlockIndices_.
for (size_t i = 0; i < proxyBlockIndices_.size(); i++) {
int blockIndex = proxyBlockIndices_[i];
// Wasn't an emu hack op, look through proxyBlockMap_.
auto range = proxyBlockMap_.equal_range(addr);
for (auto it = range.first; it != range.second; ++it) {
const int blockIndex = it->second;
if (blocks_[blockIndex].originalAddress == addr && !blocks_[blockIndex].proxyFor && !blocks_[blockIndex].invalid)
return blockIndex;
}
@ -386,6 +416,12 @@ void JitBlockCache::LinkBlockExits(int i) {
#elif defined(_M_IX86) || defined(_M_X64)
XEmitter emit(b.exitPtrs[e]);
emit.JMP(blocks_[destinationBlock].checkedEntry, true);
ptrdiff_t actualSize = emit.GetWritableCodePtr() - b.exitPtrs[e];
int pad = JitBlockCache::GetBlockExitSize() - (int)actualSize;
for (int i = 0; i < pad; ++i) {
emit.INT3();
}
#elif defined(PPC)
PPCXEmitter emit(b.exitPtrs[e]);
emit.B(blocks_[destinationBlock].checkedEntry);
@ -397,9 +433,8 @@ void JitBlockCache::LinkBlockExits(int i) {
}
}
using namespace std;
void JitBlockCache::LinkBlock(int i) {
using namespace std;
LinkBlockExits(i);
JitBlock &b = blocks_[i];
pair<multimap<u32, int>::iterator, multimap<u32, int>::iterator> ppp;
@ -415,6 +450,7 @@ void JitBlockCache::LinkBlock(int i) {
}
void JitBlockCache::UnlinkBlock(int i) {
using namespace std;
JitBlock &b = blocks_[i];
pair<multimap<u32, int>::iterator, multimap<u32, int>::iterator> ppp;
ppp = links_to_.equal_range(b.originalAddress);
@ -473,6 +509,8 @@ void JitBlockCache::DestroyBlock(int block_num, bool invalidate) {
return;
}
JitBlock *b = &blocks_[block_num];
// No point it being in there anymore.
RemoveBlockMap(block_num);
// Pure proxy blocks always point directly to a real block, there should be no chains of
// proxy-only blocks pointing to proxy-only blocks.
@ -491,7 +529,14 @@ void JitBlockCache::DestroyBlock(int block_num, bool invalidate) {
delete b->proxyFor;
b->proxyFor = 0;
}
// TODO: Remove from proxyBlockIndices_.
auto range = proxyBlockMap_.equal_range(b->originalAddress);
for (auto it = range.first; it != range.second; ++it) {
if (it->second == block_num) {
// Found it. Delete and bail.
proxyBlockMap_.erase(it);
break;
}
}
// TODO: Handle the case when there's a proxy block and a regular JIT block at the same location.
// In this case we probably "leak" the proxy block currently (no memory leak but it'll stay enabled).
@ -542,17 +587,44 @@ void JitBlockCache::DestroyBlock(int block_num, bool invalidate) {
void JitBlockCache::InvalidateICache(u32 address, const u32 length) {
// Convert the logical address to a physical address for the block map
u32 pAddr = address & 0x1FFFFFFF;
const u32 pAddr = address & 0x1FFFFFFF;
const u32 pEnd = pAddr + length;
// destroy JIT blocks
// !! this works correctly under assumption that any two overlapping blocks end at the same address
// TODO: This may not be a safe assumption with jit continuing enabled.
std::map<pair<u32,u32>, u32>::iterator it1 = block_map_.lower_bound(std::make_pair(pAddr, 0)), it2 = it1;
while (it2 != block_map_.end() && it2->first.second < pAddr + length) {
DestroyBlock(it2->second, true);
it2++;
// Blocks may start and end in overlapping ways, and destroying one invalidates iterators.
// So after destroying one, we start over.
while (true) {
auto next = block_map_.lower_bound(std::make_pair(pAddr, 0));
// End is inclusive, so a matching end won't be included.
auto last = block_map_.lower_bound(std::make_pair(pEnd, 0));
if (next == last) {
// It wasn't in the map at all (or anymore.)
// This includes if both were end(), which should be uncommon.
break;
}
for (; next != last; ++next) {
const u32 blockStart = next->first.second;
const u32 blockEnd = next->first.first;
if (blockStart < pEnd && blockEnd > pAddr) {
DestroyBlock(next->second, true);
// Our iterator is now invalid. Break and search again.
// Most of the time there shouldn't be a bunch of matching blocks.
break;
}
}
if (next == last) {
break;
}
}
if (it1 != it2)
block_map_.erase(it1, it2);
}
int JitBlockCache::GetBlockExitSize() {
#if defined(ARM)
// TODO
return 0;
#elif defined(_M_IX86) || defined(_M_X64)
return 15;
#elif defined(PPC)
// TODO
return 0;
#endif
}

View File

@ -141,17 +141,22 @@ public:
int GetNumBlocks() const { return num_blocks_; }
static int GetBlockExitSize();
private:
void LinkBlockExits(int i);
void LinkBlock(int i);
void UnlinkBlock(int i);
void AddBlockMap(int block_num);
void RemoveBlockMap(int block_num);
MIPSOpcode GetEmuHackOpForBlock(int block_num) const;
MIPSState *mips_;
CodeBlock *codeBlock_;
JitBlock *blocks_;
std::vector<int> proxyBlockIndices_;
std::multimap<u32, int> proxyBlockMap_;
int num_blocks_;
std::multimap<u32, int> links_to_;

View File

@ -64,6 +64,8 @@ namespace MIPSComp {
u32 compilerPC;
u32 blockStart;
u32 lastContinuedPC;
u32 initialBlockSize;
int nextExit;
bool cancel;
bool inDelaySlot;

View File

@ -671,7 +671,7 @@ namespace MIPSComp
// In case we have a saved prefix.
//FlushPrefixV();
//gpr.BindToRegister(rt, false, true);
//MOV(32, gpr.R(rt), M(&currentMIPS->vfpuCtrl[imm - 128]));
//MOV(32, gpr.R(rt), M(&mips_->vfpuCtrl[imm - 128]));
} else {
//ERROR - maybe need to make this value too an "interlock" value?
ERROR_LOG(CPU, "mfv - invalid register %i", imm);
@ -688,7 +688,7 @@ namespace MIPSComp
gpr.MapReg(rt);
STW(gpr.R(rt), CTXREG, offsetof(MIPSState, vfpuCtrl) + 4 * (imm - 128));
//gpr.BindToRegister(rt, true, false);
//MOV(32, M(&currentMIPS->vfpuCtrl[imm - 128]), gpr.R(rt));
//MOV(32, M(&mips_->vfpuCtrl[imm - 128]), gpr.R(rt));
// TODO: Optimization if rt is Imm?
// Set these BEFORE disable!

View File

@ -168,9 +168,9 @@ bool Jit::PredictTakeBranch(u32 targetAddr, bool likely) {
void Jit::CompBranchExits(CCFlags cc, u32 targetAddr, u32 notTakenAddr, bool delaySlotIsNice, bool likely, bool andLink) {
// We may want to try to continue along this branch a little while, to reduce reg flushing.
if (CanContinueBranch())
bool predictTakeBranch = PredictTakeBranch(targetAddr, likely);
if (CanContinueBranch(predictTakeBranch ? targetAddr : notTakenAddr))
{
bool predictTakeBranch = PredictTakeBranch(targetAddr, likely);
if (predictTakeBranch)
cc = FlipCCFlag(cc);
@ -215,6 +215,7 @@ void Jit::CompBranchExits(CCFlags cc, u32 targetAddr, u32 notTakenAddr, bool del
if (likely)
CompileDelaySlot(DELAYSLOT_NICE);
AddContinuedBlock(targetAddr);
// Account for the increment in the loop.
js.compilerPC = targetAddr - 4;
// In case the delay slot was a break or something.
@ -224,7 +225,7 @@ void Jit::CompBranchExits(CCFlags cc, u32 targetAddr, u32 notTakenAddr, bool del
{
// Take the branch
if (andLink)
MOV(32, M(&mips_->r[MIPS_REG_RA]), Imm32(js.compilerPC + 8));
MOV(32, gpr.GetDefaultLocation(MIPS_REG_RA), Imm32(js.compilerPC + 8));
CONDITIONAL_LOG_EXIT(targetAddr);
WriteExit(targetAddr, js.nextExit++);
@ -259,7 +260,7 @@ void Jit::CompBranchExits(CCFlags cc, u32 targetAddr, u32 notTakenAddr, bool del
// Take the branch
if (andLink)
MOV(32, M(&mips_->r[MIPS_REG_RA]), Imm32(js.compilerPC + 8));
MOV(32, gpr.GetDefaultLocation(MIPS_REG_RA), Imm32(js.compilerPC + 8));
CONDITIONAL_LOG_EXIT(targetAddr);
WriteExit(targetAddr, js.nextExit++);
@ -328,6 +329,7 @@ void Jit::BranchRSRTComp(MIPSOpcode op, Gen::CCFlags cc, bool likely)
// Branch taken. Always compile the delay slot, and then go to dest.
CompileDelaySlot(DELAYSLOT_NICE);
AddContinuedBlock(targetAddr);
// Account for the increment in the loop.
js.compilerPC = targetAddr - 4;
// In case the delay slot was a break or something.
@ -406,6 +408,7 @@ void Jit::BranchRSZeroComp(MIPSOpcode op, Gen::CCFlags cc, bool andLink, bool li
if (andLink)
gpr.SetImm(MIPS_REG_RA, js.compilerPC + 8);
AddContinuedBlock(targetAddr);
// Account for the increment in the loop.
js.compilerPC = targetAddr - 4;
// In case the delay slot was a break or something.
@ -583,8 +586,9 @@ void Jit::Comp_Jump(MIPSOpcode op) {
switch (op >> 26) {
case 2: //j
CompileDelaySlot(DELAYSLOT_NICE);
if (jo.continueJumps && js.numInstructions < jo.continueMaxInstructions)
if (CanContinueJump(targetAddr))
{
AddContinuedBlock(targetAddr);
// Account for the increment in the loop.
js.compilerPC = targetAddr - 4;
// In case the delay slot was a break or something.
@ -607,8 +611,9 @@ void Jit::Comp_Jump(MIPSOpcode op) {
// Save return address - might be overwritten by delay slot.
gpr.SetImm(MIPS_REG_RA, js.compilerPC + 8);
CompileDelaySlot(DELAYSLOT_NICE);
if (jo.continueJumps && js.numInstructions < jo.continueMaxInstructions)
if (CanContinueJump(targetAddr))
{
AddContinuedBlock(targetAddr);
// Account for the increment in the loop.
js.compilerPC = targetAddr - 4;
// In case the delay slot was a break or something.
@ -650,7 +655,7 @@ void Jit::Comp_JumpReg(MIPSOpcode op)
{
// If this is a syscall, write the pc (for thread switching and other good reasons.)
gpr.MapReg(rs, true, false);
MOV(32, M(&currentMIPS->pc), gpr.R(rs));
MOV(32, M(&mips_->pc), gpr.R(rs));
if (andLink)
gpr.SetImm(rd, js.compilerPC + 8);
CompileDelaySlot(DELAYSLOT_FLUSH);
@ -677,8 +682,9 @@ void Jit::Comp_JumpReg(MIPSOpcode op)
gpr.DiscardRegContentsIfCached(MIPS_REG_T9);
}
if (jo.continueJumps && gpr.IsImm(rs) && js.numInstructions < jo.continueMaxInstructions)
if (gpr.IsImm(rs) && CanContinueJump(gpr.GetImm(rs)))
{
AddContinuedBlock(gpr.GetImm(rs));
// Account for the increment in the loop.
js.compilerPC = gpr.GetImm(rs) - 4;
// In case the delay slot was a break or something.

View File

@ -1697,7 +1697,7 @@ void Jit::Comp_Mftv(MIPSOpcode op) {
// In case we have a saved prefix.
FlushPrefixV();
gpr.MapReg(rt, false, true);
MOV(32, gpr.R(rt), M(&currentMIPS->vfpuCtrl[imm - 128]));
MOV(32, gpr.R(rt), M(&mips_->vfpuCtrl[imm - 128]));
}
} else {
//ERROR - maybe need to make this value too an "interlock" value?
@ -1724,7 +1724,7 @@ void Jit::Comp_Mftv(MIPSOpcode op) {
}
} else {
gpr.MapReg(rt, true, false);
MOV(32, M(&currentMIPS->vfpuCtrl[imm - 128]), gpr.R(rt));
MOV(32, M(&mips_->vfpuCtrl[imm - 128]), gpr.R(rt));
}
// TODO: Optimization if rt is Imm?
@ -1756,7 +1756,7 @@ void Jit::Comp_Vmfvc(MIPSOpcode op) {
gpr.MapReg(MIPS_REG_VFPUCC, true, false);
MOVD_xmm(fpr.VX(vs), gpr.R(MIPS_REG_VFPUCC));
} else {
MOVSS(fpr.VX(vs), M(&currentMIPS->vfpuCtrl[imm - 128]));
MOVSS(fpr.VX(vs), M(&mips_->vfpuCtrl[imm - 128]));
}
fpr.ReleaseSpillLocks();
}
@ -1772,7 +1772,7 @@ void Jit::Comp_Vmtvc(MIPSOpcode op) {
gpr.MapReg(MIPS_REG_VFPUCC, false, true);
MOVD_xmm(gpr.R(MIPS_REG_VFPUCC), fpr.VX(vs));
} else {
MOVSS(M(&currentMIPS->vfpuCtrl[imm - 128]), fpr.VX(vs));
MOVSS(M(&mips_->vfpuCtrl[imm - 128]), fpr.VX(vs));
}
fpr.ReleaseSpillLocks();

View File

@ -116,8 +116,6 @@ static void JitLogMiss(MIPSOpcode op)
JitOptions::JitOptions()
{
enableBlocklink = true;
// WARNING: These options don't work properly with cache clearing.
// Need to find a smart way to handle before enabling.
immBranches = false;
continueBranches = false;
continueJumps = false;
@ -218,7 +216,7 @@ void Jit::FlushPrefixV()
void Jit::WriteDowncount(int offset)
{
const int downcount = js.downcountAmount + offset;
SUB(32, M(&currentMIPS->downcount), downcount > 127 ? Imm32(downcount) : Imm8(downcount));
SUB(32, M(&mips_->downcount), downcount > 127 ? Imm32(downcount) : Imm8(downcount));
}
void Jit::RestoreRoundingMode(bool force, XEmitter *emitter)
@ -228,10 +226,10 @@ void Jit::RestoreRoundingMode(bool force, XEmitter *emitter)
{
if (emitter == NULL)
emitter = this;
emitter->STMXCSR(M(&currentMIPS->temp));
emitter->STMXCSR(M(&mips_->temp));
// Clear the rounding mode and flush-to-zero bits back to 0.
emitter->AND(32, M(&currentMIPS->temp), Imm32(~(7 << 13)));
emitter->LDMXCSR(M(&currentMIPS->temp));
emitter->AND(32, M(&mips_->temp), Imm32(~(7 << 13)));
emitter->LDMXCSR(M(&mips_->temp));
}
}
@ -252,7 +250,7 @@ void Jit::ApplyRoundingMode(bool force, XEmitter *emitter)
if (!g_Config.bForceFlushToZero)
skip = emitter->J_CC(CC_Z);
emitter->STMXCSR(M(&currentMIPS->temp));
emitter->STMXCSR(M(&mips_->temp));
// The MIPS bits don't correspond exactly, so we have to adjust.
// 0 -> 0 (skip2), 1 -> 3, 2 -> 2 (skip2), 3 -> 1
@ -262,18 +260,18 @@ void Jit::ApplyRoundingMode(bool force, XEmitter *emitter)
emitter->SetJumpTarget(skip2);
emitter->SHL(32, R(EAX), Imm8(13));
emitter->OR(32, M(&currentMIPS->temp), R(EAX));
emitter->OR(32, M(&mips_->temp), R(EAX));
if (g_Config.bForceFlushToZero) {
emitter->OR(32, M(&currentMIPS->temp), Imm32(1 << 15));
emitter->OR(32, M(&mips_->temp), Imm32(1 << 15));
} else {
emitter->TEST(32, M(&mips_->fcr31), Imm32(1 << 24));
FixupBranch skip3 = emitter->J_CC(CC_Z);
emitter->OR(32, M(&currentMIPS->temp), Imm32(1 << 15));
emitter->OR(32, M(&mips_->temp), Imm32(1 << 15));
emitter->SetJumpTarget(skip3);
}
emitter->LDMXCSR(M(&currentMIPS->temp));
emitter->LDMXCSR(M(&mips_->temp));
if (!g_Config.bForceFlushToZero)
emitter->SetJumpTarget(skip);
@ -396,6 +394,8 @@ const u8 *Jit::DoJit(u32 em_address, JitBlock *b)
{
js.cancel = false;
js.blockStart = js.compilerPC = mips_->pc;
js.lastContinuedPC = 0;
js.initialBlockSize = 0;
js.nextExit = 0;
js.downcountAmount = 0;
js.curBlock = b;
@ -466,10 +466,27 @@ const u8 *Jit::DoJit(u32 em_address, JitBlock *b)
b->codeSize = (u32)(GetCodePtr() - b->normalEntry);
NOP();
AlignCode4();
b->originalSize = js.numInstructions;
if (js.lastContinuedPC == 0)
b->originalSize = js.numInstructions;
else
{
// We continued at least once. Add the last proxy and set the originalSize correctly.
blocks.ProxyBlock(js.blockStart, js.lastContinuedPC, (js.compilerPC - js.lastContinuedPC) / sizeof(u32), GetCodePtr());
b->originalSize = js.initialBlockSize;
}
return b->normalEntry;
}
void Jit::AddContinuedBlock(u32 dest)
{
// The first block is the root block. When we continue, we create proxy blocks after that.
if (js.lastContinuedPC == 0)
js.initialBlockSize = js.numInstructions;
else
blocks.ProxyBlock(js.blockStart, js.lastContinuedPC, (js.compilerPC - js.lastContinuedPC) / sizeof(u32), GetCodePtr());
js.lastContinuedPC = dest;
}
bool Jit::DescribeCodePtr(const u8 *ptr, std::string &name)
{
u32 jitAddr = blocks.GetAddressFromBlockPtr(ptr);
@ -543,7 +560,7 @@ bool Jit::ReplaceJalTo(u32 dest) {
MOV(32, M(&mips_->pc), Imm32(js.compilerPC));
RestoreRoundingMode();
ABI_CallFunction(entry->replaceFunc);
SUB(32, M(&currentMIPS->downcount), R(EAX));
SUB(32, M(&mips_->downcount), R(EAX));
ApplyRoundingMode();
}
@ -551,8 +568,7 @@ bool Jit::ReplaceJalTo(u32 dest) {
// No writing exits, keep going!
// Add a trigger so that if the inlined code changes, we invalidate this block.
// TODO: Correctly determine the size of this block.
blocks.ProxyBlock(js.blockStart, dest, 4, GetCodePtr());
blocks.ProxyBlock(js.blockStart, dest, symbolMap.GetFunctionSize(dest) / sizeof(u32), GetCodePtr());
return true;
}
@ -582,7 +598,7 @@ void Jit::Comp_ReplacementFunc(MIPSOpcode op)
MIPSCompileOp(Memory::Read_Instruction(js.compilerPC, true));
} else {
FlushAll();
MOV(32, R(ECX), M(&currentMIPS->r[MIPS_REG_RA]));
MOV(32, R(ECX), M(&mips_->r[MIPS_REG_RA]));
js.downcountAmount += cycles;
WriteExitDestInReg(ECX);
js.compiling = false;
@ -601,10 +617,10 @@ void Jit::Comp_ReplacementFunc(MIPSOpcode op)
ApplyRoundingMode();
MIPSCompileOp(Memory::Read_Instruction(js.compilerPC, true));
} else {
MOV(32, R(ECX), M(&currentMIPS->r[MIPS_REG_RA]));
SUB(32, M(&currentMIPS->downcount), R(EAX));
MOV(32, R(ECX), M(&mips_->r[MIPS_REG_RA]));
SUB(32, M(&mips_->downcount), R(EAX));
ApplyRoundingMode();
SUB(32, M(&currentMIPS->downcount), Imm8(0));
SUB(32, M(&mips_->downcount), Imm8(0));
WriteExitDestInReg(ECX);
js.compiling = false;
}
@ -677,6 +693,14 @@ void Jit::WriteExit(u32 destination, int exit_num)
// No blocklinking.
MOV(32, M(&mips_->pc), Imm32(destination));
JMP(asm_.dispatcher, true);
// Normally, exits are 15 bytes (MOV + &pc + dest + JMP + dest) on 64 or 32 bit.
// But just in case we somehow optimized, pad.
ptrdiff_t actualSize = GetWritableCodePtr() - b->exitPtrs[exit_num];
int pad = JitBlockCache::GetBlockExitSize() - (int)actualSize;
for (int i = 0; i < pad; ++i) {
INT3();
}
}
}
@ -707,7 +731,7 @@ void Jit::WriteExitDestInReg(X64Reg reg)
FixupBranch tooHigh = J_CC(CC_AE);
// Need to set neg flag again if necessary.
SUB(32, M(&currentMIPS->downcount), Imm32(0));
SUB(32, M(&mips_->downcount), Imm32(0));
JMP(asm_.dispatcher, true);
SetJumpTarget(tooLow);
@ -721,11 +745,11 @@ void Jit::WriteExitDestInReg(X64Reg reg)
if (g_Config.bIgnoreBadMemAccess)
CallProtectedFunction(Core_UpdateState, Imm32(CORE_ERROR));
SUB(32, M(&currentMIPS->downcount), Imm32(0));
SUB(32, M(&mips_->downcount), Imm32(0));
JMP(asm_.dispatcherCheckCoreState, true);
SetJumpTarget(skip);
SUB(32, M(&currentMIPS->downcount), Imm32(0));
SUB(32, M(&mips_->downcount), Imm32(0));
J_CC(CC_NE, asm_.dispatcher, true);
}
else

View File

@ -193,6 +193,7 @@ private:
CompileDelaySlot(flags, &state);
}
void EatInstruction(MIPSOpcode op);
void AddContinuedBlock(u32 dest);
void WriteExit(u32 destination, int exit_num);
void WriteExitDestInReg(X64Reg reg);
@ -259,7 +260,7 @@ private:
}
bool PredictTakeBranch(u32 targetAddr, bool likely);
bool CanContinueBranch() {
bool CanContinueBranch(u32 targetAddr) {
if (!jo.continueBranches || js.numInstructions >= jo.continueMaxInstructions) {
return false;
}
@ -267,6 +268,25 @@ private:
if (js.nextExit >= MAX_JIT_BLOCK_EXITS - 2) {
return false;
}
// Sometimes we predict wrong and get into impossible conditions where games have jumps to 0.
if (!targetAddr) {
return false;
}
return true;
}
bool CanContinueJump(u32 targetAddr) {
if (!jo.continueJumps || js.numInstructions >= jo.continueMaxInstructions) {
return false;
}
if (!targetAddr) {
return false;
}
return true;
}
bool CanContinueImmBranch(u32 targetAddr) {
if (!jo.immBranches || js.numInstructions >= jo.continueMaxInstructions) {
return false;
}
return true;
}