diff --git a/Common/BitScan.h b/Common/BitScan.h index 729c320759..4a8a8c28ac 100644 --- a/Common/BitScan.h +++ b/Common/BitScan.h @@ -1,6 +1,7 @@ #pragma once #include "ppsspp_config.h" +#include #if PPSSPP_PLATFORM(WINDOWS) #include "Common/CommonWindows.h" diff --git a/Core/Compatibility.cpp b/Core/Compatibility.cpp index 71f67ebe4a..4763cf4c25 100644 --- a/Core/Compatibility.cpp +++ b/Core/Compatibility.cpp @@ -65,6 +65,7 @@ void Compatibility::CheckSettings(IniFile &iniFile, const std::string &gameID) { CheckSetting(iniFile, gameID, "ForceMax60FPS", &flags_.ForceMax60FPS); CheckSetting(iniFile, gameID, "JitInvalidationHack", &flags_.JitInvalidationHack); CheckSetting(iniFile, gameID, "HideISOFiles", &flags_.HideISOFiles); + CheckSetting(iniFile, gameID, "MoreAccurateVMMUL", &flags_.MoreAccurateVMMUL); } void Compatibility::CheckSetting(IniFile &iniFile, const std::string &gameID, const char *option, bool *flag) { diff --git a/Core/Compatibility.h b/Core/Compatibility.h index c85267cf46..8632813e26 100644 --- a/Core/Compatibility.h +++ b/Core/Compatibility.h @@ -65,6 +65,7 @@ struct CompatFlags { bool ForceMax60FPS; bool JitInvalidationHack; bool HideISOFiles; + bool MoreAccurateVMMUL; }; class IniFile; diff --git a/Core/MIPS/ARM/ArmCompVFPU.cpp b/Core/MIPS/ARM/ArmCompVFPU.cpp index 4893820081..94cd0d811c 100644 --- a/Core/MIPS/ARM/ArmCompVFPU.cpp +++ b/Core/MIPS/ARM/ArmCompVFPU.cpp @@ -21,14 +21,16 @@ #include #include "math/math_util.h" +#include "Core/Compatibility.h" +#include "Core/Config.h" #include "Core/MemMap.h" +#include "Core/Reporting.h" +#include "Core/System.h" #include "Core/MIPS/MIPS.h" #include "Core/MIPS/MIPSTables.h" #include "Core/MIPS/MIPSAnalyst.h" #include "Core/MIPS/MIPSCodeUtils.h" #include "Common/CPUDetect.h" -#include "Core/Config.h" -#include "Core/Reporting.h" #include "Core/MIPS/ARM/ArmJit.h" #include "Core/MIPS/ARM/ArmRegCache.h" @@ -1468,12 +1470,16 @@ namespace MIPSComp void ArmJit::Comp_Vmmul(MIPSOpcode op) { CONDITIONAL_DISABLE(VFPU_MTX_VMMUL); - if (js.HasUnknownPrefix()) { + if (!js.HasNoPrefix()) { DISABLE; } NEON_IF_AVAILABLE(CompNEON_Vmmul); - // TODO: This probably ignores prefixes? + if (PSP_CoreParameter().compat.flags().MoreAccurateVMMUL) { + // Fall back to interpreter, which has the accurate implementation. + // Later we might do something more optimized here. + DISABLE; + } MatrixSize sz = GetMtxSize(op); int n = GetMatrixSide(sz); diff --git a/Core/MIPS/ARM64/Arm64CompVFPU.cpp b/Core/MIPS/ARM64/Arm64CompVFPU.cpp index 243890f1ea..69d6a302c6 100644 --- a/Core/MIPS/ARM64/Arm64CompVFPU.cpp +++ b/Core/MIPS/ARM64/Arm64CompVFPU.cpp @@ -21,15 +21,16 @@ #include #include "math/math_util.h" +#include "Core/Compatibility.h" +#include "Core/Config.h" #include "Core/MemMap.h" +#include "Core/Reporting.h" +#include "Core/System.h" #include "Core/MIPS/MIPS.h" #include "Core/MIPS/MIPSTables.h" #include "Core/MIPS/MIPSAnalyst.h" #include "Core/MIPS/MIPSCodeUtils.h" #include "Common/CPUDetect.h" -#include "Core/Config.h" -#include "Core/Reporting.h" - #include "Common/Arm64Emitter.h" #include "Core/MIPS/ARM64/Arm64Jit.h" #include "Core/MIPS/ARM64/Arm64RegCache.h" @@ -1219,6 +1220,12 @@ namespace MIPSComp { DISABLE; } + if (PSP_CoreParameter().compat.flags().MoreAccurateVMMUL) { + // Fall back to interpreter, which has the accurate implementation. + // Later we might do something more optimized here. + DISABLE; + } + MatrixSize sz = GetMtxSize(op); int n = GetMatrixSide(sz); diff --git a/Core/MIPS/IR/IRCompVFPU.cpp b/Core/MIPS/IR/IRCompVFPU.cpp index fe69d4f7e1..13fa39b2c2 100644 --- a/Core/MIPS/IR/IRCompVFPU.cpp +++ b/Core/MIPS/IR/IRCompVFPU.cpp @@ -20,6 +20,7 @@ #include "math/math_util.h" #include "Common/CPUDetect.h" +#include "Core/Compatibility.h" #include "Core/Config.h" #include "Core/MemMap.h" #include "Core/MIPS/MIPS.h" @@ -29,6 +30,7 @@ #include "Core/MIPS/IR/IRFrontend.h" #include "Core/MIPS/IR/IRRegCache.h" #include "Core/Reporting.h" +#include "Core/System.h" // All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly. @@ -1242,6 +1244,12 @@ namespace MIPSComp { DISABLE; } + if (PSP_CoreParameter().compat.flags().MoreAccurateVMMUL) { + // Fall back to interpreter, which has the accurate implementation. + // Later we might do something more optimized here. + DISABLE; + } + // Matrix multiply (weird prefixes) // D[0 .. N, 0 .. M] = S[0 .. N, 0 .. M]' * T[0 .. N, 0 .. M] // Note: Behaves as if it's implemented through a series of vdots. diff --git a/Core/MIPS/MIPSIntVFPU.cpp b/Core/MIPS/MIPSIntVFPU.cpp index cf5edba112..8e3ad0ba01 100644 --- a/Core/MIPS/MIPSIntVFPU.cpp +++ b/Core/MIPS/MIPSIntVFPU.cpp @@ -23,9 +23,11 @@ #include "math/math_util.h" +#include "Core/Compatibility.h" #include "Core/Core.h" -#include "Core/Reporting.h" #include "Core/MemMap.h" +#include "Core/Reporting.h" +#include "Core/System.h" #include "Core/MIPS/MIPS.h" #include "Core/MIPS/MIPSInt.h" @@ -467,6 +469,8 @@ namespace MIPSInt ReadMatrix(s, sz, vs); ReadMatrix(t, sz, vt); + // TODO: Always use the more accurate path in interpreter? + bool useAccurateDot = USE_VFPU_DOT || PSP_CoreParameter().compat.flags().MoreAccurateVMMUL; for (int a = 0; a < n; a++) { for (int b = 0; b < n; b++) { union { float f; uint32_t u; } sum = { 0.0f }; @@ -476,7 +480,7 @@ namespace MIPSInt ApplySwizzleT(&t[a * 4], V_Quad); } - if (USE_VFPU_DOT) { + if (useAccurateDot) { sum.f = vfpu_dot(&s[b * 4], &t[a * 4]); if (my_isnan(sum.f)) { sum.u = 0x7f800001; diff --git a/Core/MIPS/x86/CompVFPU.cpp b/Core/MIPS/x86/CompVFPU.cpp index 0b87ef5ea0..ff7bd79f2b 100644 --- a/Core/MIPS/x86/CompVFPU.cpp +++ b/Core/MIPS/x86/CompVFPU.cpp @@ -29,9 +29,11 @@ #include "math/math_util.h" #include "Common/CPUDetect.h" -#include "Core/MemMap.h" +#include "Core/Compatibility.h" #include "Core/Config.h" +#include "Core/MemMap.h" #include "Core/Reporting.h" +#include "Core/System.h" #include "Core/MIPS/MIPSAnalyst.h" #include "Core/MIPS/MIPSCodeUtils.h" #include "Core/MIPS/MIPSVFPUUtils.h" @@ -2803,10 +2805,15 @@ void Jit::Comp_VScl(MIPSOpcode op) { void Jit::Comp_Vmmul(MIPSOpcode op) { CONDITIONAL_DISABLE(VFPU_MTX_VMMUL); - - // TODO: This probably ignores prefixes? - if (js.HasUnknownPrefix()) + if (!js.HasNoPrefix()) { DISABLE; + } + + if (PSP_CoreParameter().compat.flags().MoreAccurateVMMUL) { + // Fall back to interpreter, which has the accurate implementation. + // Later we might do something more optimized here. + DISABLE; + } MatrixSize sz = GetMtxSize(op); VectorSize vsz = GetVectorSize(sz); diff --git a/assets/compat.ini b/assets/compat.ini index 15aee5a550..74cd2406d5 100644 --- a/assets/compat.ini +++ b/assets/compat.ini @@ -620,3 +620,9 @@ NPJH50471 = true ULJM06033 = true NPJH50559 = true NPEH00030 = true + +[MoreAccurateVMMUL] +# Fixes leg shaking in Tekken 6. The potential for slowdown in other games is large enough +# that we will not generally apply this accurate mode where not needed. +ULES01376 = true +ULUS10466 = true