diff --git a/Core/Config.cpp b/Core/Config.cpp index 73135f1a8..7782dfada 100644 --- a/Core/Config.cpp +++ b/Core/Config.cpp @@ -324,6 +324,7 @@ static ConfigSetting cpuSettings[] = { ConfigSetting("FastMemoryAccess", &g_Config.bFastMemory, true), ReportedConfigSetting("FuncReplacements", &g_Config.bFuncReplacements, true), ReportedConfigSetting("CPUSpeed", &g_Config.iLockedCPUSpeed, 0), + ReportedConfigSetting("SetRoundingMode", &g_Config.bSetRoundingMode, true), ConfigSetting(false), }; diff --git a/Core/Config.h b/Core/Config.h index 9a87a7c95..3afc93ff1 100644 --- a/Core/Config.h +++ b/Core/Config.h @@ -86,6 +86,7 @@ public: bool bCheckForNewVersion; bool bForceLagSync; bool bFuncReplacements; + bool bSetRoundingMode; // Definitely cannot be changed while game is running. bool bSeparateCPUThread; diff --git a/Core/MIPS/ARM/ArmAsm.cpp b/Core/MIPS/ARM/ArmAsm.cpp index cc24c8596..d3c15afd1 100644 --- a/Core/MIPS/ARM/ArmAsm.cpp +++ b/Core/MIPS/ARM/ArmAsm.cpp @@ -114,7 +114,9 @@ void Jit::GenerateFixedCode() MovToPC(R0); outerLoop = GetCodePtr(); SaveDowncount(); + ClearRoundingMode(); QuickCallFunction(R0, &CoreTiming::Advance); + SetRoundingMode(); RestoreDowncount(); FixupBranch skipToRealDispatch = B(); //skip the sync and compare first time @@ -173,7 +175,9 @@ void Jit::GenerateFixedCode() // No block found, let's jit SaveDowncount(); + ClearRoundingMode(); QuickCallFunction(R2, (void *)&JitAt); + SetRoundingMode(); RestoreDowncount(); B(dispatcherNoCheck); // no point in special casing this @@ -195,6 +199,7 @@ void Jit::GenerateFixedCode() } SaveDowncount(); + ClearRoundingMode(); ADD(R_SP, R_SP, 4); diff --git a/Core/MIPS/ARM/ArmCompBranch.cpp b/Core/MIPS/ARM/ArmCompBranch.cpp index 77270651a..2bdcc151d 100644 --- a/Core/MIPS/ARM/ArmCompBranch.cpp +++ b/Core/MIPS/ARM/ArmCompBranch.cpp @@ -538,6 +538,7 @@ void Jit::Comp_Syscall(MIPSOpcode op) // If we're in a delay slot, this is off by one. const int offset = js.inDelaySlot ? -1 : 0; WriteDownCount(offset); + ClearRoundingMode(); js.downcountAmount = -offset; // TODO: Maybe discard v0, v1, and some temps? Definitely at? @@ -558,6 +559,7 @@ void Jit::Comp_Syscall(MIPSOpcode op) QuickCallFunction(R1, (void *)&CallSyscall); } RestoreDowncount(); + SetRoundingMode(); WriteSyscallExit(); js.compiling = false; diff --git a/Core/MIPS/ARM/ArmCompFPU.cpp b/Core/MIPS/ARM/ArmCompFPU.cpp index 58b777f35..f1efb1d4e 100644 --- a/Core/MIPS/ARM/ArmCompFPU.cpp +++ b/Core/MIPS/ARM/ArmCompFPU.cpp @@ -50,6 +50,7 @@ namespace MIPSComp void Jit::Comp_FPU3op(MIPSOpcode op) { CONDITIONAL_DISABLE; + SetRoundingMode(); int ft = _FT; int fs = _FS; @@ -190,6 +191,9 @@ void Jit::Comp_FPULS(MIPSOpcode op) void Jit::Comp_FPUComp(MIPSOpcode op) { CONDITIONAL_DISABLE; + // TODO: Does this matter here? + SetRoundingMode(); + int opc = op & 0xF; if (opc >= 8) opc -= 8; // alias if (opc == 0) { // f, sf (signalling false) @@ -279,6 +283,7 @@ void Jit::Comp_FPU2op(MIPSOpcode op) { VNEG(fpr.R(fd), fpr.R(fs)); break; case 12: //FsI(fd) = (int)floorf(F(fs)+0.5f); break; //round.w.s + ClearRoundingMode(); fpr.MapDirtyIn(fd, fs); VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED); break; @@ -293,9 +298,10 @@ void Jit::Comp_FPU2op(MIPSOpcode op) { break; case 14: //FsI(fd) = (int)ceilf (F(fs)); break; //ceil.w.s { + ClearRoundingMode(); fpr.MapDirtyIn(fd, fs); VMRS(SCRATCHREG2); - // Assume we're always in round-to-zero mode. + // Assume we're always in round-to-nearest mode. ORR(SCRATCHREG1, SCRATCHREG2, AssumeMakeOperand2(1 << 22)); VMSR(SCRATCHREG1); VCMP(fpr.R(fs), fpr.R(fs)); @@ -310,9 +316,10 @@ void Jit::Comp_FPU2op(MIPSOpcode op) { } case 15: //FsI(fd) = (int)floorf(F(fs)); break; //floor.w.s { + ClearRoundingMode(); fpr.MapDirtyIn(fd, fs); VMRS(SCRATCHREG2); - // Assume we're always in round-to-zero mode. + // Assume we're always in round-to-nearest mode. ORR(SCRATCHREG1, SCRATCHREG2, AssumeMakeOperand2(2 << 22)); VMSR(SCRATCHREG1); VCMP(fpr.R(fs), fpr.R(fs)); @@ -331,30 +338,13 @@ void Jit::Comp_FPU2op(MIPSOpcode op) { break; case 36: //FsI(fd) = (int) F(fs); break; //cvt.w.s fpr.MapDirtyIn(fd, fs); - LDR(SCRATCHREG1, CTXREG, offsetof(MIPSState, fcr31)); - AND(SCRATCHREG1, SCRATCHREG1, Operand2(3)); - // MIPS Rounding Mode: ARM Rounding Mode - // 0: Round nearest 0 - // 1: Round to zero 3 - // 2: Round up (ceil) 1 - // 3: Round down (floor) 2 - CMP(SCRATCHREG1, Operand2(1)); - SetCC(CC_EQ); ADD(SCRATCHREG1, SCRATCHREG1, Operand2(2)); - SetCC(CC_GT); SUB(SCRATCHREG1, SCRATCHREG1, Operand2(1)); - SetCC(CC_AL); - - VMRS(SCRATCHREG2); - // Assume we're always in round-to-zero mode beforehand. - ORR(SCRATCHREG1, SCRATCHREG2, Operand2(SCRATCHREG1, ST_LSL, 22)); - VMSR(SCRATCHREG1); + SetRoundingMode(); VCMP(fpr.R(fs), fpr.R(fs)); VCVT(fpr.R(fd), fpr.R(fs), TO_INT | IS_SIGNED); VMRS_APSR(); // Move FP flags from FPSCR to APSR (regular flags). SetCC(CC_VS); MOVIU2F(fpr.R(fd), 0x7FFFFFFF, SCRATCHREG1); SetCC(CC_AL); - // Set the rounding mode back. TODO: Keep it? Dirty? - VMSR(SCRATCHREG2); break; default: DISABLE; diff --git a/Core/MIPS/ARM/ArmJit.cpp b/Core/MIPS/ARM/ArmJit.cpp index 95118fcf1..7a7a186e8 100644 --- a/Core/MIPS/ARM/ArmJit.cpp +++ b/Core/MIPS/ARM/ArmJit.cpp @@ -18,6 +18,7 @@ #include "base/logging.h" #include "Common/ChunkFile.h" #include "Core/Reporting.h" +#include "Core/Config.h" #include "Core/Core.h" #include "Core/CoreTiming.h" #include "Core/MemMap.h" @@ -465,10 +466,13 @@ void Jit::Comp_Generic(MIPSOpcode op) if (func) { SaveDowncount(); + // TODO: Perhaps keep the rounding mode for interp? + ClearRoundingMode(); gpr.SetRegImm(SCRATCHREG1, js.compilerPC); MovToPC(SCRATCHREG1); gpr.SetRegImm(R0, op.encoding); QuickCallFunction(R1, (void *)func); + SetRoundingMode(); RestoreDowncount(); } @@ -540,6 +544,40 @@ void Jit::WriteDownCountR(ARMReg reg) } } +void Jit::ClearRoundingMode() +{ + if (g_Config.bSetRoundingMode) + { + VMRS(SCRATCHREG2); + // Assume we're always in round-to-nearest mode beforehand. + BIC(SCRATCHREG1, SCRATCHREG2, AssumeMakeOperand2(3 << 22)); + VMSR(SCRATCHREG1); + } +} + +void Jit::SetRoundingMode() +{ + if (g_Config.bSetRoundingMode) + { + LDR(SCRATCHREG1, CTXREG, offsetof(MIPSState, fcr31)); + AND(SCRATCHREG1, SCRATCHREG1, Operand2(3)); + // MIPS Rounding Mode: ARM Rounding Mode + // 0: Round nearest 0 + // 1: Round to zero 3 + // 2: Round up (ceil) 1 + // 3: Round down (floor) 2 + CMP(SCRATCHREG1, Operand2(1)); + SetCC(CC_EQ); ADD(SCRATCHREG1, SCRATCHREG1, Operand2(2)); + SetCC(CC_GT); SUB(SCRATCHREG1, SCRATCHREG1, Operand2(1)); + SetCC(CC_AL); + + VMRS(SCRATCHREG2); + // Assume we're always in round-to-nearest mode beforehand. + ORR(SCRATCHREG1, SCRATCHREG2, Operand2(SCRATCHREG1, ST_LSL, 22)); + VMSR(SCRATCHREG1); + } +} + // IDEA - could have a WriteDualExit that takes two destinations and two condition flags, // and just have conditional that set PC "twice". This only works when we fall back to dispatcher // though, as we need to have the SUBS flag set in the end. So with block linking in the mix, diff --git a/Core/MIPS/ARM/ArmJit.h b/Core/MIPS/ARM/ArmJit.h index 35fc3b83a..5fd96a4ef 100644 --- a/Core/MIPS/ARM/ArmJit.h +++ b/Core/MIPS/ARM/ArmJit.h @@ -190,6 +190,8 @@ private: void WriteDownCount(int offset = 0); void WriteDownCountR(ARMReg reg); + void ClearRoundingMode(); + void SetRoundingMode(); void MovFromPC(ARMReg r); void MovToPC(ARMReg r); diff --git a/Core/MIPS/x86/Asm.cpp b/Core/MIPS/x86/Asm.cpp index 0fdf46431..59c453389 100644 --- a/Core/MIPS/x86/Asm.cpp +++ b/Core/MIPS/x86/Asm.cpp @@ -77,7 +77,9 @@ void AsmRoutineManager::Generate(MIPSState *mips, MIPSComp::Jit *jit) #endif outerLoop = GetCodePtr(); + jit->ClearRoundingMode(this); ABI_CallFunction(reinterpret_cast(&CoreTiming::Advance)); + jit->SetRoundingMode(this); FixupBranch skipToRealDispatch = J(); //skip the sync and compare first time dispatcherCheckCoreState = GetCodePtr(); @@ -129,7 +131,9 @@ void AsmRoutineManager::Generate(MIPSState *mips, MIPSComp::Jit *jit) SetJumpTarget(notfound); //Ok, no block, let's jit + jit->ClearRoundingMode(this); ABI_CallFunction(&Jit); + jit->SetRoundingMode(this); JMP(dispatcherNoCheck); // Let's just dispatch again, we'll enter the block since we know it's there. SetJumpTarget(bail); @@ -139,10 +143,12 @@ void AsmRoutineManager::Generate(MIPSState *mips, MIPSComp::Jit *jit) J_CC(CC_Z, outerLoop, true); SetJumpTarget(badCoreState); + jit->ClearRoundingMode(this); ABI_PopAllCalleeSavedRegsAndAdjustStack(); RET(); breakpointBailout = GetCodePtr(); + jit->ClearRoundingMode(this); ABI_PopAllCalleeSavedRegsAndAdjustStack(); RET(); } \ No newline at end of file diff --git a/Core/MIPS/x86/CompBranch.cpp b/Core/MIPS/x86/CompBranch.cpp index 2a2db6a44..83ab2b298 100644 --- a/Core/MIPS/x86/CompBranch.cpp +++ b/Core/MIPS/x86/CompBranch.cpp @@ -681,6 +681,7 @@ void Jit::Comp_Syscall(MIPSOpcode op) // If we're in a delay slot, this is off by one. const int offset = js.inDelaySlot ? -1 : 0; WriteDowncount(offset); + ClearRoundingMode(); js.downcountAmount = -offset; // Skip the CallSyscall where possible. @@ -690,6 +691,7 @@ void Jit::Comp_Syscall(MIPSOpcode op) else ABI_CallFunctionC(&CallSyscall, op.encoding); + SetRoundingMode(); WriteSyscallExit(); js.compiling = false; } diff --git a/Core/MIPS/x86/CompFPU.cpp b/Core/MIPS/x86/CompFPU.cpp index 54bff6728..48fdca49b 100644 --- a/Core/MIPS/x86/CompFPU.cpp +++ b/Core/MIPS/x86/CompFPU.cpp @@ -291,10 +291,32 @@ void Jit::Comp_FPU2op(MIPSOpcode op) { MOVSS(fpr.R(fd), XMM0); break; + case 36: //FsI(fd) = (int) F(fs); break; //cvt.w.s + { + fpr.SpillLock(fs, fd); + fpr.StoreFromRegister(fd); + CVTSS2SI(EAX, fpr.R(fs)); + + // Did we get an indefinite integer value? + CMP(32, R(EAX), Imm32(0x80000000)); + FixupBranch skip = J_CC(CC_NE); + MOVSS(XMM0, fpr.R(fs)); + XORPS(XMM1, R(XMM1)); + CMPSS(XMM0, R(XMM1), CMP_LT); + + // At this point, -inf = 0xffffffff, inf/nan = 0x00000000. + // We want -inf to be 0x80000000 inf/nan to be 0x7fffffff, so we flip those bits. + MOVD_xmm(R(EAX), XMM0); + XOR(32, R(EAX), Imm32(0x7fffffff)); + + SetJumpTarget(skip); + MOV(32, fpr.R(fd), R(EAX)); + } + break; + case 12: //FsI(fd) = (int)floorf(F(fs)+0.5f); break; //round.w.s case 14: //FsI(fd) = (int)ceilf (F(fs)); break; //ceil.w.s case 15: //FsI(fd) = (int)floorf(F(fs)); break; //floor.w.s - case 36: //FsI(fd) = (int) F(fs); break; //cvt.w.s default: DISABLE; return; @@ -357,6 +379,7 @@ void Jit::Comp_mxc1(MIPSOpcode op) case 6: //currentMIPS->WriteFCR(fs, R(rt)); break; //ctc1 if (fs == 31) { + ClearRoundingMode(); if (gpr.IsImm(rt)) { gpr.SetImm(MIPS_REG_FPCOND, (gpr.GetImm(rt) >> 23) & 1); MOV(32, M(&mips_->fcr31), Imm32(gpr.GetImm(rt) & 0x0181FFFF)); diff --git a/Core/MIPS/x86/Jit.cpp b/Core/MIPS/x86/Jit.cpp index 74cfd97a6..4f5cf9820 100644 --- a/Core/MIPS/x86/Jit.cpp +++ b/Core/MIPS/x86/Jit.cpp @@ -211,6 +211,49 @@ void Jit::WriteDowncount(int offset) SUB(32, M(¤tMIPS->downcount), downcount > 127 ? Imm32(downcount) : Imm8(downcount)); } +void Jit::ClearRoundingMode(XEmitter *emitter) +{ + if (g_Config.bSetRoundingMode) + { + if (emitter == NULL) + emitter = this; + emitter->STMXCSR(M(¤tMIPS->temp)); + // Clear the rounding mode bits back to 0. + emitter->AND(32, M(¤tMIPS->temp), Imm32(~(3 << 13))); + emitter->LDMXCSR(M(¤tMIPS->temp)); + } +} + +void Jit::SetRoundingMode(XEmitter *emitter) +{ + if (g_Config.bSetRoundingMode) + { + if (emitter == NULL) + emitter = this; + emitter->MOV(32, R(EAX), M(&mips_->fcr31)); + emitter->AND(32, R(EAX), Imm8(3)); + + // If it's 0, we don't actually bother setting. This is the most common. + // We always use nearest as the default rounding mode. + FixupBranch skip = emitter->J_CC(CC_Z); + + emitter->STMXCSR(M(¤tMIPS->temp)); + + // The MIPS bits don't correspond exactly, so we have to adjust. + // 0 -> 0 (skip), 1 -> 3, 2 -> 2 (skip2), 3 -> 1 + emitter->CMP(32, R(EAX), Imm8(2)); + FixupBranch skip2 = emitter->J_CC(CC_Z); + emitter->XOR(32, R(EAX), Imm8(2)); + emitter->SetJumpTarget(skip2); + + emitter->SHL(32, R(EAX), Imm8(13)); + emitter->OR(32, M(¤tMIPS->temp), R(EAX)); + emitter->LDMXCSR(M(¤tMIPS->temp)); + + emitter->SetJumpTarget(skip); + } +} + void Jit::ClearCache() { blocks.Clear(); @@ -442,7 +485,9 @@ bool Jit::ReplaceJalTo(u32 dest) { CompileDelaySlot(DELAYSLOT_NICE); FlushAll(); MOV(32, M(&mips_->pc), Imm32(js.compilerPC)); + ClearRoundingMode(); ABI_CallFunction(entry->replaceFunc); + SetRoundingMode(); SUB(32, M(¤tMIPS->downcount), R(EAX)); } @@ -492,7 +537,9 @@ void Jit::Comp_ReplacementFunc(MIPSOpcode op) // Standard function call, nothing fancy. // The function returns the number of cycles it took in EAX. MOV(32, M(&mips_->pc), Imm32(js.compilerPC)); + ClearRoundingMode(); ABI_CallFunction(entry->replaceFunc); + SetRoundingMode(); if (entry->flags & (REPFLAG_HOOKENTER | REPFLAG_HOOKEXIT)) { // Compile the original instruction at this address. We ignore cycles for hooks. @@ -516,11 +563,14 @@ void Jit::Comp_Generic(MIPSOpcode op) if (func) { + // TODO: Maybe we'd be better off keeping the rounding mode within interp? + ClearRoundingMode(); MOV(32, M(&mips_->pc), Imm32(js.compilerPC)); if (USE_JIT_MISSMAP) ABI_CallFunctionC(&JitLogMiss, op.encoding); else ABI_CallFunctionC(func, op.encoding); + SetRoundingMode(); } else ERROR_LOG_REPORT(JIT, "Trying to compile instruction %08x that can't be interpreted", op.encoding); @@ -628,7 +678,9 @@ void Jit::WriteSyscallExit() { WriteDowncount(); if (js.afterOp & JitState::AFTER_MEMCHECK_CLEANUP) { + ClearRoundingMode(); ABI_CallFunction(&JitMemCheckCleanup); + SetRoundingMode(); } JMP(asm_.dispatcherCheckCoreState, true); } @@ -640,7 +692,9 @@ bool Jit::CheckJitBreakpoint(u32 addr, int downcountOffset) SAVE_FLAGS; FlushAll(); MOV(32, M(&mips_->pc), Imm32(js.compilerPC)); + ClearRoundingMode(); ABI_CallFunction(&JitBreakpoint); + SetRoundingMode(); // If 0, the conditional breakpoint wasn't taken. CMP(32, R(EAX), Imm32(0)); diff --git a/Core/MIPS/x86/Jit.h b/Core/MIPS/x86/Jit.h index 605015703..44ee34f92 100644 --- a/Core/MIPS/x86/Jit.h +++ b/Core/MIPS/x86/Jit.h @@ -164,6 +164,9 @@ public: void GetVectorRegsPrefixD(u8 *regs, VectorSize sz, int vectorReg); void EatPrefix() { js.EatPrefix(); } + void ClearRoundingMode(XEmitter *emitter = NULL); + void SetRoundingMode(XEmitter *emitter = NULL); + JitBlockCache *GetBlockCache() { return &blocks; } AsmRoutineManager &Asm() { return asm_; } diff --git a/UI/GameSettingsScreen.cpp b/UI/GameSettingsScreen.cpp index d0ad767b3..7ee4e2537 100644 --- a/UI/GameSettingsScreen.cpp +++ b/UI/GameSettingsScreen.cpp @@ -375,6 +375,7 @@ void GameSettingsScreen::CreateViews() { #ifndef MOBILE_DEVICE systemSettings->Add(new PopupSliderChoice(&g_Config.iRewindFlipFrequency, 0, 1800, s->T("Rewind Snapshot Frequency", "Rewind Snapshot Frequency (0 = off, mem hog)"), screenManager())); #endif + systemSettings->Add(new CheckBox(&g_Config.bSetRoundingMode, s->T("Respect FPU rounding (disable for old GEB saves)")))->OnClick.Handle(this, &GameSettingsScreen::OnJitAffectingSetting); systemSettings->Add(new CheckBox(&g_Config.bAtomicAudioLocks, s->T("Atomic Audio locks (experimental)")))->SetEnabled(!PSP_IsInited()); #if defined(USING_WIN_UI) diff --git a/headless/Headless.cpp b/headless/Headless.cpp index db2b4c691..58ba4d6ac 100644 --- a/headless/Headless.cpp +++ b/headless/Headless.cpp @@ -346,6 +346,7 @@ int main(int argc, const char* argv[]) g_Config.bSoftwareSkinning = true; g_Config.bVertexDecoderJit = true; g_Config.bBlockTransferGPU = true; + g_Config.bSetRoundingMode = true; #ifdef _WIN32 InitSysDirectories();