mirror of
https://github.com/hrydgard/ppsspp.git
synced 2024-11-28 16:00:58 +00:00
a1f5c537d4
More x86jit micro optimizations for the FPU
532 lines
14 KiB
C++
532 lines
14 KiB
C++
// Copyright (c) 2012- PPSSPP Project.
|
|
|
|
// This program is free software: you can redistribute it and/or modify
|
|
// it under the terms of the GNU General Public License as published by
|
|
// the Free Software Foundation, version 2.0 or later versions.
|
|
|
|
// This program is distributed in the hope that it will be useful,
|
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
// GNU General Public License 2.0 for more details.
|
|
|
|
// A copy of the GPL 2.0 should have been included with the program.
|
|
// If not, see http://www.gnu.org/licenses/
|
|
|
|
// Official git repository and contact information can be found at
|
|
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
|
|
|
|
#include "Core/Config.h"
|
|
#include "Core/Debugger/Breakpoints.h"
|
|
#include "Core/MemMap.h"
|
|
#include "Core/MIPS/JitCommon/JitCommon.h"
|
|
#include "Core/MIPS/x86/Jit.h"
|
|
#include "Core/MIPS/x86/JitSafeMem.h"
|
|
#include "Core/System.h"
|
|
|
|
namespace MIPSComp
|
|
{
|
|
using namespace Gen;
|
|
using namespace X64JitConstants;
|
|
|
|
void JitMemCheck(u32 addr, int size, int isWrite)
|
|
{
|
|
// Should we skip this breakpoint?
|
|
if (CBreakPoints::CheckSkipFirst() == currentMIPS->pc)
|
|
return;
|
|
|
|
// Did we already hit one?
|
|
if (coreState != CORE_RUNNING && coreState != CORE_NEXTFRAME)
|
|
return;
|
|
|
|
CBreakPoints::ExecMemCheckJitBefore(addr, isWrite == 1, size, currentMIPS->pc);
|
|
}
|
|
|
|
void JitMemCheckCleanup()
|
|
{
|
|
CBreakPoints::ExecMemCheckJitCleanup();
|
|
}
|
|
|
|
JitSafeMem::JitSafeMem(Jit *jit, MIPSGPReg raddr, s32 offset, u32 alignMask)
|
|
: jit_(jit), raddr_(raddr), offset_(offset), needsCheck_(false), needsSkip_(false), alignMask_(alignMask)
|
|
{
|
|
// This makes it more instructions, so let's play it safe and say we need a far jump.
|
|
far_ = !g_Config.bIgnoreBadMemAccess || !CBreakPoints::GetMemChecks().empty();
|
|
// Mask out the kernel RAM bit, because we'll end up with a negative offset to MEMBASEREG.
|
|
if (jit_->gpr.IsImm(raddr_))
|
|
iaddr_ = (jit_->gpr.GetImm(raddr_) + offset_) & 0x7FFFFFFF;
|
|
else
|
|
iaddr_ = (u32) -1;
|
|
|
|
fast_ = g_Config.bFastMemory || raddr == MIPS_REG_SP;
|
|
|
|
// If raddr_ is going to get loaded soon, load it now for more optimal code.
|
|
// We assume that it was already locked.
|
|
const int LOOKAHEAD_OPS = 3;
|
|
if (!jit_->gpr.R(raddr_).IsImm() && MIPSAnalyst::IsRegisterUsed(raddr_, jit_->GetCompilerPC() + 4, LOOKAHEAD_OPS))
|
|
jit_->gpr.MapReg(raddr_, true, false);
|
|
}
|
|
|
|
void JitSafeMem::SetFar()
|
|
{
|
|
_dbg_assert_msg_(JIT, !needsSkip_, "Sorry, you need to call SetFar() earlier.");
|
|
far_ = true;
|
|
}
|
|
|
|
bool JitSafeMem::PrepareWrite(OpArg &dest, int size)
|
|
{
|
|
size_ = size;
|
|
// If it's an immediate, we can do the write if valid.
|
|
if (iaddr_ != (u32) -1)
|
|
{
|
|
if (ImmValid())
|
|
{
|
|
MemCheckImm(MEM_WRITE);
|
|
|
|
#ifdef _M_IX86
|
|
dest = M(Memory::base + (iaddr_ & Memory::MEMVIEW32_MASK & alignMask_));
|
|
#else
|
|
dest = MDisp(MEMBASEREG, iaddr_ & alignMask_);
|
|
#endif
|
|
return true;
|
|
}
|
|
else
|
|
return false;
|
|
}
|
|
// Otherwise, we always can do the write (conditionally.)
|
|
else
|
|
dest = PrepareMemoryOpArg(MEM_WRITE);
|
|
return true;
|
|
}
|
|
|
|
bool JitSafeMem::PrepareRead(OpArg &src, int size)
|
|
{
|
|
size_ = size;
|
|
if (iaddr_ != (u32) -1)
|
|
{
|
|
if (ImmValid())
|
|
{
|
|
MemCheckImm(MEM_READ);
|
|
|
|
#ifdef _M_IX86
|
|
src = M(Memory::base + (iaddr_ & Memory::MEMVIEW32_MASK & alignMask_));
|
|
#else
|
|
src = MDisp(MEMBASEREG, iaddr_ & alignMask_);
|
|
#endif
|
|
return true;
|
|
}
|
|
else
|
|
return false;
|
|
}
|
|
else
|
|
src = PrepareMemoryOpArg(MEM_READ);
|
|
return true;
|
|
}
|
|
|
|
OpArg JitSafeMem::NextFastAddress(int suboffset)
|
|
{
|
|
if (iaddr_ != (u32) -1)
|
|
{
|
|
u32 addr = (iaddr_ + suboffset) & alignMask_;
|
|
|
|
#ifdef _M_IX86
|
|
return M(Memory::base + (addr & Memory::MEMVIEW32_MASK));
|
|
#else
|
|
return MDisp(MEMBASEREG, addr);
|
|
#endif
|
|
}
|
|
|
|
_dbg_assert_msg_(JIT, (suboffset & alignMask_) == suboffset, "suboffset must be aligned");
|
|
|
|
#ifdef _M_IX86
|
|
return MDisp(xaddr_, (u32) Memory::base + offset_ + suboffset);
|
|
#else
|
|
return MComplex(MEMBASEREG, xaddr_, SCALE_1, offset_ + suboffset);
|
|
#endif
|
|
}
|
|
|
|
OpArg JitSafeMem::PrepareMemoryOpArg(MemoryOpType type)
|
|
{
|
|
// We may not even need to move into EAX as a temporary.
|
|
bool needTemp = alignMask_ != 0xFFFFFFFF;
|
|
|
|
#ifdef _M_IX86
|
|
bool needMask = true; // raddr_ != MIPS_REG_SP; // Commented out this speedhack due to low impact
|
|
// We always mask on 32 bit in fast memory mode.
|
|
needTemp = needTemp || (fast_ && needMask);
|
|
#endif
|
|
|
|
if (jit_->gpr.R(raddr_).IsSimpleReg() && !needTemp)
|
|
{
|
|
jit_->gpr.MapReg(raddr_, true, false);
|
|
xaddr_ = jit_->gpr.RX(raddr_);
|
|
}
|
|
else
|
|
{
|
|
jit_->MOV(32, R(EAX), jit_->gpr.R(raddr_));
|
|
xaddr_ = EAX;
|
|
}
|
|
|
|
MemCheckAsm(type);
|
|
|
|
if (!fast_)
|
|
{
|
|
// Is it in physical ram?
|
|
jit_->CMP(32, R(xaddr_), Imm32(PSP_GetKernelMemoryBase() - offset_));
|
|
tooLow_ = jit_->J_CC(CC_B);
|
|
jit_->CMP(32, R(xaddr_), Imm32(PSP_GetUserMemoryEnd() - offset_ - (size_ - 1)));
|
|
tooHigh_ = jit_->J_CC(CC_AE);
|
|
|
|
// We may need to jump back up here.
|
|
safe_ = jit_->GetCodePtr();
|
|
}
|
|
else
|
|
{
|
|
#ifdef _M_IX86
|
|
if (needMask) {
|
|
jit_->AND(32, R(EAX), Imm32(Memory::MEMVIEW32_MASK));
|
|
}
|
|
#endif
|
|
}
|
|
|
|
// TODO: This could be more optimal, but the common case is that we want xaddr_ not to include offset_.
|
|
// Since we need to align them after add, we add and subtract.
|
|
if (alignMask_ != 0xFFFFFFFF)
|
|
{
|
|
jit_->ADD(32, R(xaddr_), Imm32(offset_));
|
|
jit_->AND(32, R(xaddr_), Imm32(alignMask_));
|
|
jit_->SUB(32, R(xaddr_), Imm32(offset_));
|
|
}
|
|
|
|
#ifdef _M_IX86
|
|
return MDisp(xaddr_, (u32) Memory::base + offset_);
|
|
#else
|
|
return MComplex(MEMBASEREG, xaddr_, SCALE_1, offset_);
|
|
#endif
|
|
}
|
|
|
|
void JitSafeMem::PrepareSlowAccess()
|
|
{
|
|
// Skip the fast path (which the caller wrote just now.)
|
|
skip_ = jit_->J(far_);
|
|
needsSkip_ = true;
|
|
jit_->SetJumpTarget(tooLow_);
|
|
jit_->SetJumpTarget(tooHigh_);
|
|
|
|
// Might also be the scratchpad.
|
|
jit_->CMP(32, R(xaddr_), Imm32(PSP_GetScratchpadMemoryBase() - offset_));
|
|
FixupBranch tooLow = jit_->J_CC(CC_B);
|
|
jit_->CMP(32, R(xaddr_), Imm32(PSP_GetScratchpadMemoryEnd() - offset_ - (size_ - 1)));
|
|
jit_->J_CC(CC_B, safe_);
|
|
jit_->SetJumpTarget(tooLow);
|
|
}
|
|
|
|
bool JitSafeMem::PrepareSlowWrite()
|
|
{
|
|
// If it's immediate, we only need a slow write on invalid.
|
|
if (iaddr_ != (u32) -1)
|
|
return !fast_ && !ImmValid();
|
|
|
|
if (!fast_)
|
|
{
|
|
PrepareSlowAccess();
|
|
return true;
|
|
}
|
|
else
|
|
return false;
|
|
}
|
|
|
|
void JitSafeMem::DoSlowWrite(const void *safeFunc, const OpArg& src, int suboffset)
|
|
{
|
|
if (iaddr_ != (u32) -1)
|
|
jit_->MOV(32, R(EAX), Imm32((iaddr_ + suboffset) & alignMask_));
|
|
else
|
|
{
|
|
jit_->LEA(32, EAX, MDisp(xaddr_, offset_ + suboffset));
|
|
if (alignMask_ != 0xFFFFFFFF)
|
|
jit_->AND(32, R(EAX), Imm32(alignMask_));
|
|
}
|
|
|
|
#ifdef _M_IX86
|
|
jit_->PUSH(EDX);
|
|
#endif
|
|
if (!src.IsSimpleReg(EDX)) {
|
|
jit_->MOV(32, R(EDX), src);
|
|
}
|
|
if (!g_Config.bIgnoreBadMemAccess) {
|
|
jit_->MOV(32, M(&jit_->mips_->pc), Imm32(jit_->GetCompilerPC()));
|
|
}
|
|
// This is a special jit-ABI'd function.
|
|
jit_->CALL(safeFunc);
|
|
#ifdef _M_IX86
|
|
jit_->POP(EDX);
|
|
#endif
|
|
needsCheck_ = true;
|
|
}
|
|
|
|
bool JitSafeMem::PrepareSlowRead(const void *safeFunc)
|
|
{
|
|
if (!fast_)
|
|
{
|
|
if (iaddr_ != (u32) -1)
|
|
{
|
|
// No slow read necessary.
|
|
if (ImmValid())
|
|
return false;
|
|
jit_->MOV(32, R(EAX), Imm32(iaddr_ & alignMask_));
|
|
}
|
|
else
|
|
{
|
|
PrepareSlowAccess();
|
|
jit_->LEA(32, EAX, MDisp(xaddr_, offset_));
|
|
if (alignMask_ != 0xFFFFFFFF)
|
|
jit_->AND(32, R(EAX), Imm32(alignMask_));
|
|
}
|
|
|
|
if (!g_Config.bIgnoreBadMemAccess) {
|
|
jit_->MOV(32, M(&jit_->mips_->pc), Imm32(jit_->GetCompilerPC()));
|
|
}
|
|
// This is a special jit-ABI'd function.
|
|
jit_->CALL(safeFunc);
|
|
needsCheck_ = true;
|
|
return true;
|
|
}
|
|
else
|
|
return false;
|
|
}
|
|
|
|
void JitSafeMem::NextSlowRead(const void *safeFunc, int suboffset)
|
|
{
|
|
_dbg_assert_msg_(JIT, !fast_, "NextSlowRead() called in fast memory mode?");
|
|
|
|
// For simplicity, do nothing for 0. We already read in PrepareSlowRead().
|
|
if (suboffset == 0)
|
|
return;
|
|
|
|
if (jit_->gpr.IsImm(raddr_))
|
|
{
|
|
_dbg_assert_msg_(JIT, !Memory::IsValidAddress(iaddr_ + suboffset), "NextSlowRead() for an invalid immediate address?");
|
|
|
|
jit_->MOV(32, R(EAX), Imm32((iaddr_ + suboffset) & alignMask_));
|
|
}
|
|
// For GPR, if xaddr_ was the dest register, this will be wrong. Don't use in GPR.
|
|
else
|
|
{
|
|
jit_->LEA(32, EAX, MDisp(xaddr_, offset_ + suboffset));
|
|
if (alignMask_ != 0xFFFFFFFF)
|
|
jit_->AND(32, R(EAX), Imm32(alignMask_));
|
|
}
|
|
|
|
if (!g_Config.bIgnoreBadMemAccess) {
|
|
jit_->MOV(32, M(&jit_->mips_->pc), Imm32(jit_->GetCompilerPC()));
|
|
}
|
|
// This is a special jit-ABI'd function.
|
|
jit_->CALL(safeFunc);
|
|
}
|
|
|
|
bool JitSafeMem::ImmValid()
|
|
{
|
|
return iaddr_ != (u32) -1 && Memory::IsValidAddress(iaddr_) && Memory::IsValidAddress(iaddr_ + size_ - 1);
|
|
}
|
|
|
|
void JitSafeMem::Finish()
|
|
{
|
|
// Memory::Read_U32/etc. may have tripped coreState.
|
|
if (needsCheck_ && !g_Config.bIgnoreBadMemAccess)
|
|
jit_->js.afterOp |= JitState::AFTER_CORE_STATE;
|
|
if (needsSkip_)
|
|
jit_->SetJumpTarget(skip_);
|
|
for (auto it = skipChecks_.begin(), end = skipChecks_.end(); it != end; ++it)
|
|
jit_->SetJumpTarget(*it);
|
|
}
|
|
|
|
void JitSafeMem::MemCheckImm(MemoryOpType type)
|
|
{
|
|
MemCheck *check = CBreakPoints::GetMemCheck(iaddr_, size_);
|
|
if (check)
|
|
{
|
|
if (!(check->cond & MEMCHECK_READ) && type == MEM_READ)
|
|
return;
|
|
if (!(check->cond & MEMCHECK_WRITE) && type == MEM_WRITE)
|
|
return;
|
|
|
|
jit_->MOV(32, M(&jit_->mips_->pc), Imm32(jit_->GetCompilerPC()));
|
|
jit_->CallProtectedFunction(&JitMemCheck, iaddr_, size_, type == MEM_WRITE ? 1 : 0);
|
|
|
|
// CORE_RUNNING is <= CORE_NEXTFRAME.
|
|
jit_->CMP(32, M(&coreState), Imm32(CORE_NEXTFRAME));
|
|
skipChecks_.push_back(jit_->J_CC(CC_G, true));
|
|
jit_->js.afterOp |= JitState::AFTER_CORE_STATE | JitState::AFTER_REWIND_PC_BAD_STATE | JitState::AFTER_MEMCHECK_CLEANUP;
|
|
}
|
|
}
|
|
|
|
void JitSafeMem::MemCheckAsm(MemoryOpType type)
|
|
{
|
|
const auto memchecks = CBreakPoints::GetMemCheckRanges();
|
|
bool possible = false;
|
|
for (auto it = memchecks.begin(), end = memchecks.end(); it != end; ++it)
|
|
{
|
|
if (!(it->cond & MEMCHECK_READ) && type == MEM_READ)
|
|
continue;
|
|
if (!(it->cond & MEMCHECK_WRITE) && type == MEM_WRITE)
|
|
continue;
|
|
|
|
possible = true;
|
|
|
|
FixupBranch skipNext, skipNextRange;
|
|
if (it->end != 0)
|
|
{
|
|
jit_->CMP(32, R(xaddr_), Imm32(it->start - offset_ - size_));
|
|
skipNext = jit_->J_CC(CC_BE);
|
|
jit_->CMP(32, R(xaddr_), Imm32(it->end - offset_));
|
|
skipNextRange = jit_->J_CC(CC_AE);
|
|
}
|
|
else
|
|
{
|
|
jit_->CMP(32, R(xaddr_), Imm32(it->start - offset_));
|
|
skipNext = jit_->J_CC(CC_NE);
|
|
}
|
|
|
|
// Keep the stack 16-byte aligned, just PUSH/POP 4 times.
|
|
for (int i = 0; i < 4; ++i)
|
|
jit_->PUSH(xaddr_);
|
|
jit_->MOV(32, M(&jit_->mips_->pc), Imm32(jit_->GetCompilerPC()));
|
|
jit_->ADD(32, R(xaddr_), Imm32(offset_));
|
|
jit_->CallProtectedFunction(&JitMemCheck, R(xaddr_), size_, type == MEM_WRITE ? 1 : 0);
|
|
for (int i = 0; i < 4; ++i)
|
|
jit_->POP(xaddr_);
|
|
|
|
jit_->SetJumpTarget(skipNext);
|
|
if (it->end != 0)
|
|
jit_->SetJumpTarget(skipNextRange);
|
|
}
|
|
|
|
if (possible)
|
|
{
|
|
// CORE_RUNNING is <= CORE_NEXTFRAME.
|
|
jit_->CMP(32, M(&coreState), Imm32(CORE_NEXTFRAME));
|
|
skipChecks_.push_back(jit_->J_CC(CC_G, true));
|
|
jit_->js.afterOp |= JitState::AFTER_CORE_STATE | JitState::AFTER_REWIND_PC_BAD_STATE | JitState::AFTER_MEMCHECK_CLEANUP;
|
|
}
|
|
}
|
|
|
|
static const int FUNCS_ARENA_SIZE = 512 * 1024;
|
|
|
|
void JitSafeMemFuncs::Init(ThunkManager *thunks) {
|
|
using namespace Gen;
|
|
|
|
AllocCodeSpace(FUNCS_ARENA_SIZE);
|
|
thunks_ = thunks;
|
|
|
|
readU32 = GetCodePtr();
|
|
CreateReadFunc(32, (const void *)&Memory::Read_U32);
|
|
readU16 = GetCodePtr();
|
|
CreateReadFunc(16, (const void *)&Memory::Read_U16);
|
|
readU8 = GetCodePtr();
|
|
CreateReadFunc(8, (const void *)&Memory::Read_U8);
|
|
|
|
writeU32 = GetCodePtr();
|
|
CreateWriteFunc(32, (const void *)&Memory::Write_U32);
|
|
writeU16 = GetCodePtr();
|
|
CreateWriteFunc(16, (const void *)&Memory::Write_U16);
|
|
writeU8 = GetCodePtr();
|
|
CreateWriteFunc(8, (const void *)&Memory::Write_U8);
|
|
}
|
|
|
|
void JitSafeMemFuncs::Shutdown() {
|
|
ResetCodePtr();
|
|
FreeCodeSpace();
|
|
}
|
|
|
|
// Mini ABI:
|
|
// Read funcs take address in EAX, return in RAX.
|
|
// Write funcs take address in EAX, data in RDX.
|
|
// On x86-32, Write funcs also have an extra 4 bytes on the stack.
|
|
|
|
void JitSafeMemFuncs::CreateReadFunc(int bits, const void *fallbackFunc) {
|
|
CheckDirectEAX();
|
|
|
|
// Since we were CALLed, we need to align the stack before calling C++.
|
|
#ifdef _M_IX86
|
|
SUB(32, R(ESP), Imm8(16 - 4));
|
|
ABI_CallFunctionA(thunks_->ProtectFunction(fallbackFunc, 1), R(EAX));
|
|
ADD(32, R(ESP), Imm8(16 - 4));
|
|
#else
|
|
SUB(64, R(RSP), Imm8(0x28));
|
|
ABI_CallFunctionA(thunks_->ProtectFunction(fallbackFunc, 1), R(EAX));
|
|
ADD(64, R(RSP), Imm8(0x28));
|
|
#endif
|
|
|
|
RET();
|
|
|
|
StartDirectAccess();
|
|
|
|
#ifdef _M_IX86
|
|
MOVZX(32, bits, EAX, MDisp(EAX, (u32)Memory::base));
|
|
#else
|
|
MOVZX(32, bits, EAX, MRegSum(MEMBASEREG, EAX));
|
|
#endif
|
|
|
|
RET();
|
|
}
|
|
|
|
void JitSafeMemFuncs::CreateWriteFunc(int bits, const void *fallbackFunc) {
|
|
CheckDirectEAX();
|
|
|
|
// Since we were CALLed, we need to align the stack before calling C++.
|
|
#ifdef _M_IX86
|
|
// 4 for return, 4 for saved reg on stack.
|
|
SUB(32, R(ESP), Imm8(16 - 4 - 4));
|
|
ABI_CallFunctionAA(thunks_->ProtectFunction(fallbackFunc, 2), R(EDX), R(EAX));
|
|
ADD(32, R(ESP), Imm8(16 - 4 - 4));
|
|
#else
|
|
SUB(64, R(RSP), Imm8(0x28));
|
|
ABI_CallFunctionAA(thunks_->ProtectFunction(fallbackFunc, 2), R(EDX), R(EAX));
|
|
ADD(64, R(RSP), Imm8(0x28));
|
|
#endif
|
|
|
|
RET();
|
|
|
|
StartDirectAccess();
|
|
|
|
#ifdef _M_IX86
|
|
MOV(bits, MDisp(EAX, (u32)Memory::base), R(EDX));
|
|
#else
|
|
MOV(bits, MRegSum(MEMBASEREG, EAX), R(EDX));
|
|
#endif
|
|
|
|
RET();
|
|
}
|
|
|
|
void JitSafeMemFuncs::CheckDirectEAX() {
|
|
// Clear any cache/kernel bits.
|
|
AND(32, R(EAX), Imm32(0x3FFFFFFF));
|
|
|
|
CMP(32, R(EAX), Imm32(PSP_GetUserMemoryEnd()));
|
|
FixupBranch tooHighRAM = J_CC(CC_AE);
|
|
CMP(32, R(EAX), Imm32(PSP_GetKernelMemoryBase()));
|
|
skips_.push_back(J_CC(CC_AE));
|
|
|
|
CMP(32, R(EAX), Imm32(PSP_GetVidMemEnd()));
|
|
FixupBranch tooHighVid = J_CC(CC_AE);
|
|
CMP(32, R(EAX), Imm32(PSP_GetVidMemBase()));
|
|
skips_.push_back(J_CC(CC_AE));
|
|
|
|
CMP(32, R(EAX), Imm32(PSP_GetScratchpadMemoryEnd()));
|
|
FixupBranch tooHighScratch = J_CC(CC_AE);
|
|
CMP(32, R(EAX), Imm32(PSP_GetScratchpadMemoryBase()));
|
|
skips_.push_back(J_CC(CC_AE));
|
|
|
|
SetJumpTarget(tooHighRAM);
|
|
SetJumpTarget(tooHighVid);
|
|
SetJumpTarget(tooHighScratch);
|
|
}
|
|
|
|
void JitSafeMemFuncs::StartDirectAccess() {
|
|
for (auto it = skips_.begin(), end = skips_.end(); it != end; ++it) {
|
|
SetJumpTarget(*it);
|
|
}
|
|
skips_.clear();
|
|
}
|
|
|
|
};
|