Merge pull request #6762 from unknownbrackets/fpu-rounding

Handle fpu rounding mode at least in jits
This commit is contained in:
Henrik Rydgård 2014-08-23 10:43:22 +02:00
commit b7da82eebb
14 changed files with 150 additions and 21 deletions

View File

@ -324,6 +324,7 @@ static ConfigSetting cpuSettings[] = {
ConfigSetting("FastMemoryAccess", &g_Config.bFastMemory, true),
ReportedConfigSetting("FuncReplacements", &g_Config.bFuncReplacements, true),
ReportedConfigSetting("CPUSpeed", &g_Config.iLockedCPUSpeed, 0),
ReportedConfigSetting("SetRoundingMode", &g_Config.bSetRoundingMode, true),
ConfigSetting(false),
};

View File

@ -86,6 +86,7 @@ public:
bool bCheckForNewVersion;
bool bForceLagSync;
bool bFuncReplacements;
bool bSetRoundingMode;
// Definitely cannot be changed while game is running.
bool bSeparateCPUThread;

View File

@ -114,7 +114,9 @@ void Jit::GenerateFixedCode()
MovToPC(R0);
outerLoop = GetCodePtr();
SaveDowncount();
ClearRoundingMode();
QuickCallFunction(R0, &CoreTiming::Advance);
SetRoundingMode();
RestoreDowncount();
FixupBranch skipToRealDispatch = B(); //skip the sync and compare first time
@ -173,7 +175,9 @@ void Jit::GenerateFixedCode()
// No block found, let's jit
SaveDowncount();
ClearRoundingMode();
QuickCallFunction(R2, (void *)&JitAt);
SetRoundingMode();
RestoreDowncount();
B(dispatcherNoCheck); // no point in special casing this
@ -195,6 +199,7 @@ void Jit::GenerateFixedCode()
}
SaveDowncount();
ClearRoundingMode();
ADD(R_SP, R_SP, 4);

View File

@ -538,6 +538,7 @@ void Jit::Comp_Syscall(MIPSOpcode op)
// If we're in a delay slot, this is off by one.
const int offset = js.inDelaySlot ? -1 : 0;
WriteDownCount(offset);
ClearRoundingMode();
js.downcountAmount = -offset;
// TODO: Maybe discard v0, v1, and some temps? Definitely at?
@ -558,6 +559,7 @@ void Jit::Comp_Syscall(MIPSOpcode op)
QuickCallFunction(R1, (void *)&CallSyscall);
}
RestoreDowncount();
SetRoundingMode();
WriteSyscallExit();
js.compiling = false;

View File

@ -50,6 +50,7 @@ namespace MIPSComp
void Jit::Comp_FPU3op(MIPSOpcode op)
{
CONDITIONAL_DISABLE;
SetRoundingMode();
int ft = _FT;
int fs = _FS;
@ -190,6 +191,9 @@ void Jit::Comp_FPULS(MIPSOpcode op)
void Jit::Comp_FPUComp(MIPSOpcode op) {
CONDITIONAL_DISABLE;
// TODO: Does this matter here?
SetRoundingMode();
int opc = op & 0xF;
if (opc >= 8) opc -= 8; // alias
if (opc == 0) { // f, sf (signalling false)
@ -279,6 +283,7 @@ void Jit::Comp_FPU2op(MIPSOpcode op) {
VNEG(fpr.R(fd), fpr.R(fs));
break;
case 12: //FsI(fd) = (int)floorf(F(fs)+0.5f); break; //round.w.s
ClearRoundingMode();
fpr.MapDirtyIn(fd, fs);
VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED);
break;
@ -293,9 +298,10 @@ void Jit::Comp_FPU2op(MIPSOpcode op) {
break;
case 14: //FsI(fd) = (int)ceilf (F(fs)); break; //ceil.w.s
{
ClearRoundingMode();
fpr.MapDirtyIn(fd, fs);
VMRS(SCRATCHREG2);
// Assume we're always in round-to-zero mode.
// Assume we're always in round-to-nearest mode.
ORR(SCRATCHREG1, SCRATCHREG2, AssumeMakeOperand2(1 << 22));
VMSR(SCRATCHREG1);
VCMP(fpr.R(fs), fpr.R(fs));
@ -310,9 +316,10 @@ void Jit::Comp_FPU2op(MIPSOpcode op) {
}
case 15: //FsI(fd) = (int)floorf(F(fs)); break; //floor.w.s
{
ClearRoundingMode();
fpr.MapDirtyIn(fd, fs);
VMRS(SCRATCHREG2);
// Assume we're always in round-to-zero mode.
// Assume we're always in round-to-nearest mode.
ORR(SCRATCHREG1, SCRATCHREG2, AssumeMakeOperand2(2 << 22));
VMSR(SCRATCHREG1);
VCMP(fpr.R(fs), fpr.R(fs));
@ -331,30 +338,13 @@ void Jit::Comp_FPU2op(MIPSOpcode op) {
break;
case 36: //FsI(fd) = (int) F(fs); break; //cvt.w.s
fpr.MapDirtyIn(fd, fs);
LDR(SCRATCHREG1, CTXREG, offsetof(MIPSState, fcr31));
AND(SCRATCHREG1, SCRATCHREG1, Operand2(3));
// MIPS Rounding Mode: ARM Rounding Mode
// 0: Round nearest 0
// 1: Round to zero 3
// 2: Round up (ceil) 1
// 3: Round down (floor) 2
CMP(SCRATCHREG1, Operand2(1));
SetCC(CC_EQ); ADD(SCRATCHREG1, SCRATCHREG1, Operand2(2));
SetCC(CC_GT); SUB(SCRATCHREG1, SCRATCHREG1, Operand2(1));
SetCC(CC_AL);
VMRS(SCRATCHREG2);
// Assume we're always in round-to-zero mode beforehand.
ORR(SCRATCHREG1, SCRATCHREG2, Operand2(SCRATCHREG1, ST_LSL, 22));
VMSR(SCRATCHREG1);
SetRoundingMode();
VCMP(fpr.R(fs), fpr.R(fs));
VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED);
VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags).
SetCC(CC_VS);
MOVIU2F(fpr.R(fd), 0x7FFFFFFF, SCRATCHREG1);
SetCC(CC_AL);
// Set the rounding mode back. TODO: Keep it? Dirty?
VMSR(SCRATCHREG2);
break;
default:
DISABLE;

View File

@ -18,6 +18,7 @@
#include "base/logging.h"
#include "Common/ChunkFile.h"
#include "Core/Reporting.h"
#include "Core/Config.h"
#include "Core/Core.h"
#include "Core/CoreTiming.h"
#include "Core/MemMap.h"
@ -465,10 +466,13 @@ void Jit::Comp_Generic(MIPSOpcode op)
if (func)
{
SaveDowncount();
// TODO: Perhaps keep the rounding mode for interp?
ClearRoundingMode();
gpr.SetRegImm(SCRATCHREG1, js.compilerPC);
MovToPC(SCRATCHREG1);
gpr.SetRegImm(R0, op.encoding);
QuickCallFunction(R1, (void *)func);
SetRoundingMode();
RestoreDowncount();
}
@ -540,6 +544,40 @@ void Jit::WriteDownCountR(ARMReg reg)
}
}
void Jit::ClearRoundingMode()
{
if (g_Config.bSetRoundingMode)
{
VMRS(SCRATCHREG2);
// Assume we're always in round-to-nearest mode beforehand.
BIC(SCRATCHREG1, SCRATCHREG2, AssumeMakeOperand2(3 << 22));
VMSR(SCRATCHREG1);
}
}
void Jit::SetRoundingMode()
{
if (g_Config.bSetRoundingMode)
{
LDR(SCRATCHREG1, CTXREG, offsetof(MIPSState, fcr31));
AND(SCRATCHREG1, SCRATCHREG1, Operand2(3));
// MIPS Rounding Mode: ARM Rounding Mode
// 0: Round nearest 0
// 1: Round to zero 3
// 2: Round up (ceil) 1
// 3: Round down (floor) 2
CMP(SCRATCHREG1, Operand2(1));
SetCC(CC_EQ); ADD(SCRATCHREG1, SCRATCHREG1, Operand2(2));
SetCC(CC_GT); SUB(SCRATCHREG1, SCRATCHREG1, Operand2(1));
SetCC(CC_AL);
VMRS(SCRATCHREG2);
// Assume we're always in round-to-nearest mode beforehand.
ORR(SCRATCHREG1, SCRATCHREG2, Operand2(SCRATCHREG1, ST_LSL, 22));
VMSR(SCRATCHREG1);
}
}
// IDEA - could have a WriteDualExit that takes two destinations and two condition flags,
// and just have conditional that set PC "twice". This only works when we fall back to dispatcher
// though, as we need to have the SUBS flag set in the end. So with block linking in the mix,

View File

@ -190,6 +190,8 @@ private:
void WriteDownCount(int offset = 0);
void WriteDownCountR(ARMReg reg);
void ClearRoundingMode();
void SetRoundingMode();
void MovFromPC(ARMReg r);
void MovToPC(ARMReg r);

View File

@ -77,7 +77,9 @@ void AsmRoutineManager::Generate(MIPSState *mips, MIPSComp::Jit *jit)
#endif
outerLoop = GetCodePtr();
jit->ClearRoundingMode(this);
ABI_CallFunction(reinterpret_cast<void *>(&CoreTiming::Advance));
jit->SetRoundingMode(this);
FixupBranch skipToRealDispatch = J(); //skip the sync and compare first time
dispatcherCheckCoreState = GetCodePtr();
@ -129,7 +131,9 @@ void AsmRoutineManager::Generate(MIPSState *mips, MIPSComp::Jit *jit)
SetJumpTarget(notfound);
//Ok, no block, let's jit
jit->ClearRoundingMode(this);
ABI_CallFunction(&Jit);
jit->SetRoundingMode(this);
JMP(dispatcherNoCheck); // Let's just dispatch again, we'll enter the block since we know it's there.
SetJumpTarget(bail);
@ -139,10 +143,12 @@ void AsmRoutineManager::Generate(MIPSState *mips, MIPSComp::Jit *jit)
J_CC(CC_Z, outerLoop, true);
SetJumpTarget(badCoreState);
jit->ClearRoundingMode(this);
ABI_PopAllCalleeSavedRegsAndAdjustStack();
RET();
breakpointBailout = GetCodePtr();
jit->ClearRoundingMode(this);
ABI_PopAllCalleeSavedRegsAndAdjustStack();
RET();
}

View File

@ -681,6 +681,7 @@ void Jit::Comp_Syscall(MIPSOpcode op)
// If we're in a delay slot, this is off by one.
const int offset = js.inDelaySlot ? -1 : 0;
WriteDowncount(offset);
ClearRoundingMode();
js.downcountAmount = -offset;
// Skip the CallSyscall where possible.
@ -690,6 +691,7 @@ void Jit::Comp_Syscall(MIPSOpcode op)
else
ABI_CallFunctionC(&CallSyscall, op.encoding);
SetRoundingMode();
WriteSyscallExit();
js.compiling = false;
}

View File

@ -291,10 +291,32 @@ void Jit::Comp_FPU2op(MIPSOpcode op) {
MOVSS(fpr.R(fd), XMM0);
break;
case 36: //FsI(fd) = (int) F(fs); break; //cvt.w.s
{
fpr.SpillLock(fs, fd);
fpr.StoreFromRegister(fd);
CVTSS2SI(EAX, fpr.R(fs));
// Did we get an indefinite integer value?
CMP(32, R(EAX), Imm32(0x80000000));
FixupBranch skip = J_CC(CC_NE);
MOVSS(XMM0, fpr.R(fs));
XORPS(XMM1, R(XMM1));
CMPSS(XMM0, R(XMM1), CMP_LT);
// At this point, -inf = 0xffffffff, inf/nan = 0x00000000.
// We want -inf to be 0x80000000 inf/nan to be 0x7fffffff, so we flip those bits.
MOVD_xmm(R(EAX), XMM0);
XOR(32, R(EAX), Imm32(0x7fffffff));
SetJumpTarget(skip);
MOV(32, fpr.R(fd), R(EAX));
}
break;
case 12: //FsI(fd) = (int)floorf(F(fs)+0.5f); break; //round.w.s
case 14: //FsI(fd) = (int)ceilf (F(fs)); break; //ceil.w.s
case 15: //FsI(fd) = (int)floorf(F(fs)); break; //floor.w.s
case 36: //FsI(fd) = (int) F(fs); break; //cvt.w.s
default:
DISABLE;
return;
@ -357,6 +379,7 @@ void Jit::Comp_mxc1(MIPSOpcode op)
case 6: //currentMIPS->WriteFCR(fs, R(rt)); break; //ctc1
if (fs == 31) {
ClearRoundingMode();
if (gpr.IsImm(rt)) {
gpr.SetImm(MIPS_REG_FPCOND, (gpr.GetImm(rt) >> 23) & 1);
MOV(32, M(&mips_->fcr31), Imm32(gpr.GetImm(rt) & 0x0181FFFF));

View File

@ -211,6 +211,49 @@ void Jit::WriteDowncount(int offset)
SUB(32, M(&currentMIPS->downcount), downcount > 127 ? Imm32(downcount) : Imm8(downcount));
}
void Jit::ClearRoundingMode(XEmitter *emitter)
{
if (g_Config.bSetRoundingMode)
{
if (emitter == NULL)
emitter = this;
emitter->STMXCSR(M(&currentMIPS->temp));
// Clear the rounding mode bits back to 0.
emitter->AND(32, M(&currentMIPS->temp), Imm32(~(3 << 13)));
emitter->LDMXCSR(M(&currentMIPS->temp));
}
}
void Jit::SetRoundingMode(XEmitter *emitter)
{
if (g_Config.bSetRoundingMode)
{
if (emitter == NULL)
emitter = this;
emitter->MOV(32, R(EAX), M(&mips_->fcr31));
emitter->AND(32, R(EAX), Imm8(3));
// If it's 0, we don't actually bother setting. This is the most common.
// We always use nearest as the default rounding mode.
FixupBranch skip = emitter->J_CC(CC_Z);
emitter->STMXCSR(M(&currentMIPS->temp));
// The MIPS bits don't correspond exactly, so we have to adjust.
// 0 -> 0 (skip), 1 -> 3, 2 -> 2 (skip2), 3 -> 1
emitter->CMP(32, R(EAX), Imm8(2));
FixupBranch skip2 = emitter->J_CC(CC_Z);
emitter->XOR(32, R(EAX), Imm8(2));
emitter->SetJumpTarget(skip2);
emitter->SHL(32, R(EAX), Imm8(13));
emitter->OR(32, M(&currentMIPS->temp), R(EAX));
emitter->LDMXCSR(M(&currentMIPS->temp));
emitter->SetJumpTarget(skip);
}
}
void Jit::ClearCache()
{
blocks.Clear();
@ -442,7 +485,9 @@ bool Jit::ReplaceJalTo(u32 dest) {
CompileDelaySlot(DELAYSLOT_NICE);
FlushAll();
MOV(32, M(&mips_->pc), Imm32(js.compilerPC));
ClearRoundingMode();
ABI_CallFunction(entry->replaceFunc);
SetRoundingMode();
SUB(32, M(&currentMIPS->downcount), R(EAX));
}
@ -492,7 +537,9 @@ void Jit::Comp_ReplacementFunc(MIPSOpcode op)
// Standard function call, nothing fancy.
// The function returns the number of cycles it took in EAX.
MOV(32, M(&mips_->pc), Imm32(js.compilerPC));
ClearRoundingMode();
ABI_CallFunction(entry->replaceFunc);
SetRoundingMode();
if (entry->flags & (REPFLAG_HOOKENTER | REPFLAG_HOOKEXIT)) {
// Compile the original instruction at this address. We ignore cycles for hooks.
@ -516,11 +563,14 @@ void Jit::Comp_Generic(MIPSOpcode op)
if (func)
{
// TODO: Maybe we'd be better off keeping the rounding mode within interp?
ClearRoundingMode();
MOV(32, M(&mips_->pc), Imm32(js.compilerPC));
if (USE_JIT_MISSMAP)
ABI_CallFunctionC(&JitLogMiss, op.encoding);
else
ABI_CallFunctionC(func, op.encoding);
SetRoundingMode();
}
else
ERROR_LOG_REPORT(JIT, "Trying to compile instruction %08x that can't be interpreted", op.encoding);
@ -628,7 +678,9 @@ void Jit::WriteSyscallExit()
{
WriteDowncount();
if (js.afterOp & JitState::AFTER_MEMCHECK_CLEANUP) {
ClearRoundingMode();
ABI_CallFunction(&JitMemCheckCleanup);
SetRoundingMode();
}
JMP(asm_.dispatcherCheckCoreState, true);
}
@ -640,7 +692,9 @@ bool Jit::CheckJitBreakpoint(u32 addr, int downcountOffset)
SAVE_FLAGS;
FlushAll();
MOV(32, M(&mips_->pc), Imm32(js.compilerPC));
ClearRoundingMode();
ABI_CallFunction(&JitBreakpoint);
SetRoundingMode();
// If 0, the conditional breakpoint wasn't taken.
CMP(32, R(EAX), Imm32(0));

View File

@ -164,6 +164,9 @@ public:
void GetVectorRegsPrefixD(u8 *regs, VectorSize sz, int vectorReg);
void EatPrefix() { js.EatPrefix(); }
void ClearRoundingMode(XEmitter *emitter = NULL);
void SetRoundingMode(XEmitter *emitter = NULL);
JitBlockCache *GetBlockCache() { return &blocks; }
AsmRoutineManager &Asm() { return asm_; }

View File

@ -375,6 +375,7 @@ void GameSettingsScreen::CreateViews() {
#ifndef MOBILE_DEVICE
systemSettings->Add(new PopupSliderChoice(&g_Config.iRewindFlipFrequency, 0, 1800, s->T("Rewind Snapshot Frequency", "Rewind Snapshot Frequency (0 = off, mem hog)"), screenManager()));
#endif
systemSettings->Add(new CheckBox(&g_Config.bSetRoundingMode, s->T("Respect FPU rounding (disable for old GEB saves)")))->OnClick.Handle(this, &GameSettingsScreen::OnJitAffectingSetting);
systemSettings->Add(new CheckBox(&g_Config.bAtomicAudioLocks, s->T("Atomic Audio locks (experimental)")))->SetEnabled(!PSP_IsInited());
#if defined(USING_WIN_UI)

View File

@ -346,6 +346,7 @@ int main(int argc, const char* argv[])
g_Config.bSoftwareSkinning = true;
g_Config.bVertexDecoderJit = true;
g_Config.bBlockTransferGPU = true;
g_Config.bSetRoundingMode = true;
#ifdef _WIN32
InitSysDirectories();