mirror of
https://github.com/hrydgard/ppsspp.git
synced 2024-12-11 15:44:15 +00:00
4113fd940c
Otherwise they just crash, and crash often. Special thanks to Jaaan for numerous trials to try to find the best way to solve the crashes.
656 lines
20 KiB
C++
656 lines
20 KiB
C++
// Copyright (c) 2012- PPSSPP Project.
|
|
|
|
// This program is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License as published by
|
|
// the Free Software Foundation, version 2.0 or later versions.
|
|
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License 2.0 for more details.
|
|
|
|
// A copy of the GPL 2.0 should have been included with the program.
|
|
// If not, see http://www.gnu.org/licenses/
|
|
|
|
// Official git repository and contact information can be found at
|
|
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
|
|
|
#include "base/logging.h"
|
|
#include "profiler/profiler.h"
|
|
#include "Common/ChunkFile.h"
|
|
#include "Common/CPUDetect.h"
|
|
#include "Common/StringUtils.h"
|
|
|
|
#include "Core/Reporting.h"
|
|
#include "Core/Config.h"
|
|
#include "Core/Core.h"
|
|
#include "Core/CoreTiming.h"
|
|
#include "Core/Debugger/SymbolMap.h"
|
|
#include "Core/MemMap.h"
|
|
|
|
#include "Core/MIPS/MIPS.h"
|
|
#include "Core/MIPS/MIPSCodeUtils.h"
|
|
#include "Core/MIPS/MIPSInt.h"
|
|
#include "Core/MIPS/MIPSTables.h"
|
|
#include "Core/HLE/ReplaceTables.h"
|
|
#include "Core/MIPS/ARM64/Arm64RegCache.h"
|
|
#include "Core/MIPS/ARM64/Arm64RegCacheFPU.h"
|
|
|
|
#include "Core/MIPS/ARM64/Arm64Jit.h"
|
|
#include "Core/MIPS/JitCommon/JitCommon.h"
|
|
|
|
using namespace Arm64JitConstants;
|
|
|
|
void DisassembleArm64Print(const u8 *data, int size) {
|
|
std::vector<std::string> lines = DisassembleArm64(data, size);
|
|
for (auto s : lines) {
|
|
ILOG("%s", s.c_str());
|
|
}
|
|
/*
|
|
ILOG("+++");
|
|
// A format friendly to Online Disassembler which gets endianness wrong
|
|
for (size_t i = 0; i < lines.size(); i++) {
|
|
uint32_t opcode = ((const uint32_t *)data)[i];
|
|
ILOG("%d/%d: %08x", (int)(i+1), (int)lines.size(), swap32(opcode));
|
|
}
|
|
ILOG("===");
|
|
ILOG("===");*/
|
|
}
|
|
|
|
namespace MIPSComp
|
|
{
|
|
using namespace Arm64Gen;
|
|
using namespace Arm64JitConstants;
|
|
|
|
Arm64Jit::Arm64Jit(MIPSState *mips) : blocks(mips, this), gpr(mips, &js, &jo), fpr(mips, &js, &jo), mips_(mips), fp(this) {
|
|
logBlocks = 0;
|
|
dontLogBlocks = 0;
|
|
blocks.Init();
|
|
gpr.SetEmitter(this);
|
|
fpr.SetEmitter(this, &fp);
|
|
AllocCodeSpace(1024 * 1024 * 16); // 32MB is the absolute max because that's what an ARM branch instruction can reach, backwards and forwards.
|
|
GenerateFixedCode(jo);
|
|
js.startDefaultPrefix = mips_->HasDefaultPrefix();
|
|
js.currentRoundingFunc = convertS0ToSCRATCH1[0];
|
|
}
|
|
|
|
Arm64Jit::~Arm64Jit() {
|
|
}
|
|
|
|
void Arm64Jit::DoState(PointerWrap &p) {
|
|
auto s = p.Section("Jit", 1, 2);
|
|
if (!s)
|
|
return;
|
|
|
|
p.Do(js.startDefaultPrefix);
|
|
if (s >= 2) {
|
|
p.Do(js.hasSetRounding);
|
|
js.lastSetRounding = 0;
|
|
} else {
|
|
js.hasSetRounding = 1;
|
|
}
|
|
|
|
if (p.GetMode() == PointerWrap::MODE_READ) {
|
|
js.currentRoundingFunc = convertS0ToSCRATCH1[(mips_->fcr31) & 3];
|
|
}
|
|
}
|
|
|
|
// This is here so the savestate matches between jit and non-jit.
|
|
void Arm64Jit::DoDummyState(PointerWrap &p) {
|
|
auto s = p.Section("Jit", 1, 2);
|
|
if (!s)
|
|
return;
|
|
|
|
bool dummy = false;
|
|
p.Do(dummy);
|
|
if (s >= 2) {
|
|
dummy = true;
|
|
p.Do(dummy);
|
|
}
|
|
}
|
|
|
|
void Arm64Jit::FlushAll()
|
|
{
|
|
gpr.FlushAll();
|
|
fpr.FlushAll();
|
|
FlushPrefixV();
|
|
}
|
|
|
|
void Arm64Jit::FlushPrefixV() {
|
|
if ((js.prefixSFlag & JitState::PREFIX_DIRTY) != 0) {
|
|
gpr.SetRegImm(SCRATCH1, js.prefixS);
|
|
STR(INDEX_UNSIGNED, SCRATCH1, CTXREG, offsetof(MIPSState, vfpuCtrl[VFPU_CTRL_SPREFIX]));
|
|
js.prefixSFlag = (JitState::PrefixState) (js.prefixSFlag & ~JitState::PREFIX_DIRTY);
|
|
}
|
|
|
|
if ((js.prefixTFlag & JitState::PREFIX_DIRTY) != 0) {
|
|
gpr.SetRegImm(SCRATCH1, js.prefixT);
|
|
STR(INDEX_UNSIGNED, SCRATCH1, CTXREG, offsetof(MIPSState, vfpuCtrl[VFPU_CTRL_TPREFIX]));
|
|
js.prefixTFlag = (JitState::PrefixState) (js.prefixTFlag & ~JitState::PREFIX_DIRTY);
|
|
}
|
|
|
|
if ((js.prefixDFlag & JitState::PREFIX_DIRTY) != 0) {
|
|
gpr.SetRegImm(SCRATCH1, js.prefixD);
|
|
STR(INDEX_UNSIGNED, SCRATCH1, CTXREG, offsetof(MIPSState, vfpuCtrl[VFPU_CTRL_DPREFIX]));
|
|
js.prefixDFlag = (JitState::PrefixState) (js.prefixDFlag & ~JitState::PREFIX_DIRTY);
|
|
}
|
|
}
|
|
|
|
void Arm64Jit::ClearCache() {
|
|
ILOG("ARM64Jit: Clearing the cache!");
|
|
blocks.Clear();
|
|
ClearCodeSpace();
|
|
GenerateFixedCode(jo);
|
|
}
|
|
|
|
void Arm64Jit::InvalidateCache() {
|
|
blocks.Clear();
|
|
}
|
|
|
|
void Arm64Jit::InvalidateCacheAt(u32 em_address, int length) {
|
|
blocks.InvalidateICache(em_address, length);
|
|
}
|
|
|
|
void Arm64Jit::EatInstruction(MIPSOpcode op) {
|
|
MIPSInfo info = MIPSGetInfo(op);
|
|
if (info & DELAYSLOT) {
|
|
ERROR_LOG_REPORT_ONCE(ateDelaySlot, JIT, "Ate a branch op.");
|
|
}
|
|
if (js.inDelaySlot) {
|
|
ERROR_LOG_REPORT_ONCE(ateInDelaySlot, JIT, "Ate an instruction inside a delay slot.");
|
|
}
|
|
|
|
js.numInstructions++;
|
|
js.compilerPC += 4;
|
|
js.downcountAmount += MIPSGetInstructionCycleEstimate(op);
|
|
}
|
|
|
|
void Arm64Jit::CompileDelaySlot(int flags) {
|
|
// preserve flag around the delay slot! Maybe this is not always necessary on ARM where
|
|
// we can (mostly) control whether we set the flag or not. Of course, if someone puts an slt in to the
|
|
// delay slot, we're screwed.
|
|
if (flags & DELAYSLOT_SAFE)
|
|
MRS(FLAGTEMPREG, FIELD_NZCV); // Save flags register. FLAGTEMPREG is preserved through function calls and is not allocated.
|
|
|
|
js.inDelaySlot = true;
|
|
MIPSOpcode op = GetOffsetInstruction(1);
|
|
MIPSCompileOp(op, this);
|
|
js.inDelaySlot = false;
|
|
|
|
if (flags & DELAYSLOT_FLUSH)
|
|
FlushAll();
|
|
if (flags & DELAYSLOT_SAFE)
|
|
_MSR(FIELD_NZCV, FLAGTEMPREG); // Restore flags register
|
|
}
|
|
|
|
|
|
void Arm64Jit::Compile(u32 em_address) {
|
|
PROFILE_THIS_SCOPE("jitc");
|
|
if (GetSpaceLeft() < 0x10000 || blocks.IsFull()) {
|
|
INFO_LOG(JIT, "Space left: %i", GetSpaceLeft());
|
|
ClearCache();
|
|
}
|
|
|
|
int block_num = blocks.AllocateBlock(em_address);
|
|
JitBlock *b = blocks.GetBlock(block_num);
|
|
DoJit(em_address, b);
|
|
blocks.FinalizeBlock(block_num, jo.enableBlocklink);
|
|
|
|
bool cleanSlate = false;
|
|
|
|
if (js.hasSetRounding && !js.lastSetRounding) {
|
|
WARN_LOG(JIT, "Detected rounding mode usage, rebuilding jit with checks");
|
|
// Won't loop, since hasSetRounding is only ever set to 1.
|
|
js.lastSetRounding = js.hasSetRounding;
|
|
cleanSlate = true;
|
|
}
|
|
|
|
// Drat. The VFPU hit an uneaten prefix at the end of a block.
|
|
if (js.startDefaultPrefix && js.MayHavePrefix()) {
|
|
WARN_LOG(JIT, "An uneaten prefix at end of block: %08x", GetCompilerPC() - 4);
|
|
js.LogPrefix();
|
|
|
|
// Let's try that one more time. We won't get back here because we toggled the value.
|
|
js.startDefaultPrefix = false;
|
|
// TODO ARM64: This crashes.
|
|
//cleanSlate = true;
|
|
}
|
|
|
|
if (cleanSlate) {
|
|
// Our assumptions are all wrong so it's clean-slate time.
|
|
ClearCache();
|
|
Compile(em_address);
|
|
}
|
|
}
|
|
|
|
void Arm64Jit::RunLoopUntil(u64 globalticks) {
|
|
PROFILE_THIS_SCOPE("jit");
|
|
((void (*)())enterDispatcher)();
|
|
}
|
|
|
|
u32 Arm64Jit::GetCompilerPC() {
|
|
return js.compilerPC;
|
|
}
|
|
|
|
MIPSOpcode Arm64Jit::GetOffsetInstruction(int offset) {
|
|
return Memory::Read_Instruction(GetCompilerPC() + 4 * offset);
|
|
}
|
|
|
|
const u8 *Arm64Jit::DoJit(u32 em_address, JitBlock *b) {
|
|
js.cancel = false;
|
|
js.blockStart = mips_->pc;
|
|
js.compilerPC = mips_->pc;
|
|
js.lastContinuedPC = 0;
|
|
js.initialBlockSize = 0;
|
|
js.nextExit = 0;
|
|
js.downcountAmount = 0;
|
|
js.curBlock = b;
|
|
js.compiling = true;
|
|
js.inDelaySlot = false;
|
|
js.PrefixStart();
|
|
|
|
// We add a downcount flag check before the block, used when entering from a linked block.
|
|
// The last block decremented downcounter, and the flag should still be available.
|
|
// Got three variants here of where we position the code, needs detailed benchmarking.
|
|
|
|
FixupBranch bail;
|
|
if (jo.useBackJump) {
|
|
// Moves the MOVI2R and B *before* checkedEntry, and just branch backwards there.
|
|
// Speedup seems to be zero unfortunately but I guess it may vary from device to device.
|
|
// Not intrusive so keeping it around here to experiment with, may help on ARMv6 due to
|
|
// large/slow construction of 32-bit immediates?
|
|
const u8 *backJump = GetCodePtr();
|
|
MOVI2R(SCRATCH1, js.blockStart);
|
|
B((const void *)outerLoopPCInSCRATCH1);
|
|
b->checkedEntry = (u8 *)GetCodePtr();
|
|
B(CC_LT, backJump);
|
|
} else if (jo.useForwardJump) {
|
|
b->checkedEntry = (u8 *)GetCodePtr();
|
|
bail = B(CC_LT);
|
|
} else if (jo.enableBlocklink) {
|
|
b->checkedEntry = (u8 *)GetCodePtr();
|
|
MOVI2R(SCRATCH1, js.blockStart);
|
|
FixupBranch skip = B(CC_GE);
|
|
B((const void *)outerLoopPCInSCRATCH1);
|
|
SetJumpTarget(skip);
|
|
} else {
|
|
// No block linking, no need to add headers to blocks.
|
|
}
|
|
|
|
b->normalEntry = GetCodePtr();
|
|
// TODO: this needs work
|
|
MIPSAnalyst::AnalysisResults analysis; // = MIPSAnalyst::Analyze(em_address);
|
|
|
|
gpr.Start(analysis);
|
|
fpr.Start(analysis);
|
|
|
|
js.numInstructions = 0;
|
|
while (js.compiling) {
|
|
gpr.SetCompilerPC(GetCompilerPC()); // Let it know for log messages
|
|
MIPSOpcode inst = Memory::Read_Opcode_JIT(GetCompilerPC());
|
|
|
|
js.downcountAmount += MIPSGetInstructionCycleEstimate(inst);
|
|
|
|
MIPSCompileOp(inst, this);
|
|
|
|
js.compilerPC += 4;
|
|
js.numInstructions++;
|
|
|
|
// Safety check, in case we get a bunch of really large jit ops without a lot of branching.
|
|
if (GetSpaceLeft() < 0x800 || js.numInstructions >= JitBlockCache::MAX_BLOCK_INSTRUCTIONS) {
|
|
FlushAll();
|
|
WriteExit(GetCompilerPC(), js.nextExit++);
|
|
js.compiling = false;
|
|
}
|
|
}
|
|
|
|
if (jo.useForwardJump) {
|
|
SetJumpTarget(bail);
|
|
gpr.SetRegImm(SCRATCH1, js.blockStart);
|
|
B((const void *)outerLoopPCInSCRATCH1);
|
|
}
|
|
|
|
char temp[256];
|
|
if (logBlocks > 0 && dontLogBlocks == 0) {
|
|
ILOG("=============== mips %d ===============", blocks.GetNumBlocks());
|
|
for (u32 cpc = em_address; cpc != GetCompilerPC() + 4; cpc += 4) {
|
|
MIPSDisAsm(Memory::Read_Opcode_JIT(cpc), cpc, temp, true);
|
|
ILOG("M: %08x %s", cpc, temp);
|
|
}
|
|
}
|
|
|
|
b->codeSize = GetCodePtr() - b->normalEntry;
|
|
if (logBlocks > 0 && dontLogBlocks == 0) {
|
|
ILOG("=============== ARM (%d instructions -> %d bytes) ===============", js.numInstructions, b->codeSize);
|
|
DisassembleArm64Print(b->normalEntry, GetCodePtr() - b->normalEntry);
|
|
}
|
|
if (logBlocks > 0)
|
|
logBlocks--;
|
|
if (dontLogBlocks > 0)
|
|
dontLogBlocks--;
|
|
|
|
if (cpu_info.sBugs.bExynos8890Invalidation) {
|
|
// What a waste. If we don't do both this and over-invalidate, the device crashes.
|
|
// This space won't ever get run, but it's wasted jit cache space.
|
|
for (int i = 0; i < 32; ++i) {
|
|
HINT(HINT_NOP);
|
|
}
|
|
}
|
|
|
|
// Don't forget to zap the newly written instructions in the instruction cache!
|
|
FlushIcache();
|
|
|
|
if (js.lastContinuedPC == 0) {
|
|
b->originalSize = js.numInstructions;
|
|
} else {
|
|
// We continued at least once. Add the last proxy and set the originalSize correctly.
|
|
blocks.ProxyBlock(js.blockStart, js.lastContinuedPC, (GetCompilerPC() - js.lastContinuedPC) / sizeof(u32), GetCodePtr());
|
|
b->originalSize = js.initialBlockSize;
|
|
}
|
|
|
|
return b->normalEntry;
|
|
}
|
|
|
|
void Arm64Jit::AddContinuedBlock(u32 dest) {
|
|
// The first block is the root block. When we continue, we create proxy blocks after that.
|
|
if (js.lastContinuedPC == 0)
|
|
js.initialBlockSize = js.numInstructions;
|
|
else
|
|
blocks.ProxyBlock(js.blockStart, js.lastContinuedPC, (GetCompilerPC() - js.lastContinuedPC) / sizeof(u32), GetCodePtr());
|
|
js.lastContinuedPC = dest;
|
|
}
|
|
|
|
bool Arm64Jit::DescribeCodePtr(const u8 *ptr, std::string &name) {
|
|
// Used in disassembly viewer.
|
|
if (ptr == applyRoundingMode)
|
|
name = "applyRoundingMode";
|
|
else if (ptr == updateRoundingMode)
|
|
name = "updateRoundingMode";
|
|
else if (ptr == dispatcher)
|
|
name = "dispatcher";
|
|
else if (ptr == dispatcherPCInSCRATCH1)
|
|
name = "dispatcher (PC in SCRATCH1)";
|
|
else if (ptr == dispatcherNoCheck)
|
|
name = "dispatcherNoCheck";
|
|
else if (ptr == enterDispatcher)
|
|
name = "enterDispatcher";
|
|
else if (ptr == restoreRoundingMode)
|
|
name = "restoreRoundingMode";
|
|
else if (ptr == saveStaticRegisters)
|
|
name = "saveStaticRegisters";
|
|
else if (ptr == loadStaticRegisters)
|
|
name = "loadStaticRegisters";
|
|
else {
|
|
u32 addr = blocks.GetAddressFromBlockPtr(ptr);
|
|
std::vector<int> numbers;
|
|
blocks.GetBlockNumbersFromAddress(addr, &numbers);
|
|
if (!numbers.empty()) {
|
|
const JitBlock *block = blocks.GetBlock(numbers[0]);
|
|
if (block) {
|
|
name = StringFromFormat("(block %d at %08x)", numbers[0], block->originalAddress);
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
void Arm64Jit::Comp_RunBlock(MIPSOpcode op) {
|
|
// This shouldn't be necessary, the dispatcher should catch us before we get here.
|
|
ERROR_LOG(JIT, "Comp_RunBlock should never be reached!");
|
|
}
|
|
|
|
void Arm64Jit::LinkBlock(u8 *exitPoint, const u8 *checkedEntry) {
|
|
ARM64XEmitter emit(exitPoint);
|
|
emit.B(checkedEntry);
|
|
// TODO: Write stuff after.
|
|
emit.FlushIcache();
|
|
}
|
|
|
|
void Arm64Jit::UnlinkBlock(u8 *checkedEntry, u32 originalAddress) {
|
|
// Send anyone who tries to run this block back to the dispatcher.
|
|
// Not entirely ideal, but .. works.
|
|
// Spurious entrances from previously linked blocks can only come through checkedEntry
|
|
ARM64XEmitter emit(checkedEntry);
|
|
emit.MOVI2R(SCRATCH1, originalAddress);
|
|
emit.STR(INDEX_UNSIGNED, SCRATCH1, CTXREG, offsetof(MIPSState, pc));
|
|
emit.B(MIPSComp::jit->GetDispatcher());
|
|
emit.FlushIcache();
|
|
}
|
|
|
|
bool Arm64Jit::ReplaceJalTo(u32 dest) {
|
|
#ifdef ARM64
|
|
const ReplacementTableEntry *entry = nullptr;
|
|
u32 funcSize = 0;
|
|
if (!CanReplaceJalTo(dest, &entry, &funcSize)) {
|
|
return false;
|
|
}
|
|
|
|
// Warning - this might be bad if the code at the destination changes...
|
|
if (entry->flags & REPFLAG_ALLOWINLINE) {
|
|
// Jackpot! Just do it, no flushing. The code will be entirely inlined.
|
|
// First, compile the delay slot. It's unconditional so no issues.
|
|
CompileDelaySlot(DELAYSLOT_NICE);
|
|
// Technically, we should write the unused return address to RA, but meh.
|
|
MIPSReplaceFunc repl = entry->jitReplaceFunc;
|
|
int cycles = (this->*repl)();
|
|
js.downcountAmount += cycles;
|
|
} else {
|
|
gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 8);
|
|
CompileDelaySlot(DELAYSLOT_NICE);
|
|
FlushAll();
|
|
SaveStaticRegisters();
|
|
RestoreRoundingMode();
|
|
QuickCallFunction(SCRATCH1_64, (const void *)(entry->replaceFunc));
|
|
ApplyRoundingMode();
|
|
LoadStaticRegisters();
|
|
WriteDownCountR(W0); // W0 is the return value from entry->replaceFunc. Neither LoadStaticRegisters nor ApplyRoundingMode can trash it.
|
|
}
|
|
|
|
js.compilerPC += 4;
|
|
// No writing exits, keep going!
|
|
|
|
// Add a trigger so that if the inlined code changes, we invalidate this block.
|
|
blocks.ProxyBlock(js.blockStart, dest, funcSize / sizeof(u32), GetCodePtr());
|
|
#endif
|
|
return true;
|
|
}
|
|
|
|
void Arm64Jit::Comp_ReplacementFunc(MIPSOpcode op)
|
|
{
|
|
// We get here if we execute the first instruction of a replaced function. This means
|
|
// that we do need to return to RA.
|
|
|
|
// Inlined function calls (caught in jal) are handled differently.
|
|
|
|
int index = op.encoding & MIPS_EMUHACK_VALUE_MASK;
|
|
|
|
const ReplacementTableEntry *entry = GetReplacementFunc(index);
|
|
if (!entry) {
|
|
ERROR_LOG(HLE, "Invalid replacement op %08x", op.encoding);
|
|
return;
|
|
}
|
|
|
|
if (entry->flags & REPFLAG_DISABLED) {
|
|
MIPSCompileOp(Memory::Read_Instruction(GetCompilerPC(), true), this);
|
|
} else if (entry->jitReplaceFunc) {
|
|
MIPSReplaceFunc repl = entry->jitReplaceFunc;
|
|
int cycles = (this->*repl)();
|
|
|
|
if (entry->flags & (REPFLAG_HOOKENTER | REPFLAG_HOOKEXIT)) {
|
|
// Compile the original instruction at this address. We ignore cycles for hooks.
|
|
MIPSCompileOp(Memory::Read_Instruction(GetCompilerPC(), true), this);
|
|
} else {
|
|
FlushAll();
|
|
// Flushed, so R1 is safe.
|
|
LDR(INDEX_UNSIGNED, SCRATCH1, CTXREG, MIPS_REG_RA * 4);
|
|
js.downcountAmount += cycles;
|
|
WriteExitDestInR(SCRATCH1);
|
|
js.compiling = false;
|
|
}
|
|
} else if (entry->replaceFunc) {
|
|
FlushAll();
|
|
SaveStaticRegisters();
|
|
RestoreRoundingMode();
|
|
gpr.SetRegImm(SCRATCH1, GetCompilerPC());
|
|
MovToPC(SCRATCH1);
|
|
|
|
// Standard function call, nothing fancy.
|
|
// The function returns the number of cycles it took in EAX.
|
|
QuickCallFunction(SCRATCH1_64, (const void *)(entry->replaceFunc));
|
|
|
|
if (entry->flags & (REPFLAG_HOOKENTER | REPFLAG_HOOKEXIT)) {
|
|
// Compile the original instruction at this address. We ignore cycles for hooks.
|
|
ApplyRoundingMode();
|
|
LoadStaticRegisters();
|
|
MIPSCompileOp(Memory::Read_Instruction(GetCompilerPC(), true), this);
|
|
} else {
|
|
ApplyRoundingMode();
|
|
LoadStaticRegisters();
|
|
LDR(INDEX_UNSIGNED, W1, CTXREG, MIPS_REG_RA * 4);
|
|
WriteDownCountR(W0);
|
|
WriteExitDestInR(W1);
|
|
js.compiling = false;
|
|
}
|
|
} else {
|
|
ERROR_LOG(HLE, "Replacement function %s has neither jit nor regular impl", entry->name);
|
|
}
|
|
}
|
|
|
|
void Arm64Jit::Comp_Generic(MIPSOpcode op) {
|
|
FlushAll();
|
|
MIPSInterpretFunc func = MIPSGetInterpretFunc(op);
|
|
if (func) {
|
|
SaveStaticRegisters();
|
|
// TODO: Perhaps keep the rounding mode for interp? Should probably, right?
|
|
RestoreRoundingMode();
|
|
MOVI2R(SCRATCH1, GetCompilerPC());
|
|
MovToPC(SCRATCH1);
|
|
MOVI2R(W0, op.encoding);
|
|
QuickCallFunction(SCRATCH2_64, (void *)func);
|
|
ApplyRoundingMode();
|
|
LoadStaticRegisters();
|
|
}
|
|
|
|
const MIPSInfo info = MIPSGetInfo(op);
|
|
if ((info & IS_VFPU) != 0 && (info & VFPU_NO_PREFIX) == 0) {
|
|
// If it does eat them, it'll happen in MIPSCompileOp().
|
|
if ((info & OUT_EAT_PREFIX) == 0)
|
|
js.PrefixUnknown();
|
|
}
|
|
}
|
|
|
|
void Arm64Jit::MovFromPC(ARM64Reg r) {
|
|
LDR(INDEX_UNSIGNED, r, CTXREG, offsetof(MIPSState, pc));
|
|
}
|
|
|
|
void Arm64Jit::MovToPC(ARM64Reg r) {
|
|
STR(INDEX_UNSIGNED, r, CTXREG, offsetof(MIPSState, pc));
|
|
}
|
|
|
|
// Should not really be necessary except when entering Advance
|
|
void Arm64Jit::SaveStaticRegisters() {
|
|
if (jo.useStaticAlloc) {
|
|
QuickCallFunction(SCRATCH2_64, saveStaticRegisters);
|
|
} else {
|
|
// Inline the single operation
|
|
STR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount));
|
|
}
|
|
}
|
|
|
|
void Arm64Jit::LoadStaticRegisters() {
|
|
if (jo.useStaticAlloc) {
|
|
QuickCallFunction(SCRATCH2_64, loadStaticRegisters);
|
|
} else {
|
|
LDR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount));
|
|
}
|
|
}
|
|
|
|
void Arm64Jit::WriteDownCount(int offset, bool updateFlags) {
|
|
int theDowncount = js.downcountAmount + offset;
|
|
if (updateFlags) {
|
|
SUBSI2R(DOWNCOUNTREG, DOWNCOUNTREG, theDowncount, SCRATCH1);
|
|
} else {
|
|
SUBI2R(DOWNCOUNTREG, DOWNCOUNTREG, theDowncount, SCRATCH1);
|
|
}
|
|
}
|
|
|
|
void Arm64Jit::WriteDownCountR(ARM64Reg reg, bool updateFlags) {
|
|
if (updateFlags) {
|
|
SUBS(DOWNCOUNTREG, DOWNCOUNTREG, reg);
|
|
} else {
|
|
SUB(DOWNCOUNTREG, DOWNCOUNTREG, reg);
|
|
}
|
|
}
|
|
|
|
// Destroys SCRATCH2
|
|
void Arm64Jit::RestoreRoundingMode(bool force) {
|
|
// If the game has never set an interesting rounding mode, we can safely skip this.
|
|
if (force || js.hasSetRounding) {
|
|
QuickCallFunction(SCRATCH2_64, restoreRoundingMode);
|
|
}
|
|
}
|
|
|
|
// Destroys SCRATCH1 and SCRATCH2
|
|
void Arm64Jit::ApplyRoundingMode(bool force) {
|
|
// If the game has never set an interesting rounding mode, we can safely skip this.
|
|
if (force || js.hasSetRounding) {
|
|
QuickCallFunction(SCRATCH2_64, applyRoundingMode);
|
|
}
|
|
}
|
|
|
|
// Destroys SCRATCH1 and SCRATCH2
|
|
void Arm64Jit::UpdateRoundingMode() {
|
|
QuickCallFunction(SCRATCH2_64, updateRoundingMode);
|
|
}
|
|
|
|
// IDEA - could have a WriteDualExit that takes two destinations and two condition flags,
|
|
// and just have conditional that set PC "twice". This only works when we fall back to dispatcher
|
|
// though, as we need to have the SUBS flag set in the end. So with block linking in the mix,
|
|
// I don't think this gives us that much benefit.
|
|
void Arm64Jit::WriteExit(u32 destination, int exit_num) {
|
|
WriteDownCount();
|
|
//If nobody has taken care of this yet (this can be removed when all branches are done)
|
|
JitBlock *b = js.curBlock;
|
|
b->exitAddress[exit_num] = destination;
|
|
b->exitPtrs[exit_num] = GetWritableCodePtr();
|
|
|
|
// Link opportunity!
|
|
int block = blocks.GetBlockNumberFromStartAddress(destination);
|
|
if (block >= 0 && jo.enableBlocklink) {
|
|
// The target block exists! Directly link to its checked entrypoint.
|
|
B(blocks.GetBlock(block)->checkedEntry);
|
|
b->linkStatus[exit_num] = true;
|
|
} else {
|
|
MOVI2R(SCRATCH1, destination);
|
|
B((const void *)dispatcherPCInSCRATCH1);
|
|
}
|
|
}
|
|
|
|
void Arm64Jit::WriteExitDestInR(ARM64Reg Reg) {
|
|
MovToPC(Reg);
|
|
WriteDownCount();
|
|
// TODO: shouldn't need an indirect branch here...
|
|
B((const void *)dispatcher);
|
|
}
|
|
|
|
void Arm64Jit::WriteSyscallExit() {
|
|
WriteDownCount();
|
|
B((const void *)dispatcherCheckCoreState);
|
|
}
|
|
|
|
void Arm64Jit::Comp_DoNothing(MIPSOpcode op) { }
|
|
|
|
MIPSOpcode Arm64Jit::GetOriginalOp(MIPSOpcode op) {
|
|
JitBlockCache *bc = GetBlockCache();
|
|
int block_num = bc->GetBlockNumberFromEmuHackOp(op, true);
|
|
if (block_num >= 0) {
|
|
return bc->GetOriginalFirstOp(block_num);
|
|
} else {
|
|
return op;
|
|
}
|
|
}
|
|
|
|
} // namespace
|