PowerPC: Fixed moronic bug in mcrfs - together with correctly computing FPRF (which is a new option, slows down slightly) in a few ops this fixes most remaining math errors in Super Monkey Ball and eliminates the need for the HLE hacks. Make sure to have up-to-date gameconfigs. Unfortunately, F-Zero still doesn't work. Misc other small changes. Indent some code.

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@3426 8ced0084-cf51-0410-be5f-012b33b47a6e
This commit is contained in:
hrydgard 2009-06-13 14:10:10 +00:00
parent dd640a4937
commit 10442c8faa
17 changed files with 357 additions and 320 deletions

View File

@ -1030,6 +1030,8 @@ enum NormalSSEOps
}
void XEmitter::MOVQ_xmm(OpArg arg, X64Reg src) {
if (arg.IsSimpleReg())
PanicAlert("Emitter: MOVQ_xmm doesn't support single registers as destination");
if (src > 7)
{
// Alternate encoding

View File

@ -43,6 +43,7 @@ void SCoreStartupParameter::LoadDefaults()
bRunCompareServer = false;
bDSPThread = true;
bLockThreads = true;
bEnableFPRF = false;
bWii = false;
SelectedLanguage = 0;
iTLBHack = 0;

View File

@ -65,6 +65,7 @@ struct SCoreStartupParameter
bool bUseFastMem;
bool bLockThreads;
bool bOptimizeQuantizers;
bool bEnableFPRF;
bool bEnableCheats;
bool bEnableIsoCache;

View File

@ -68,19 +68,21 @@ static const SPatch OSPatches[] =
// wii only
{ "__OSInitAudioSystem", HLE_Misc::UnimplementedFunction },
// Super Monkey Ball
{ ".evil_vec_cosine", HLE_Misc::SMB_EvilVecCosine },
{ ".evil_normalize", HLE_Misc::SMB_EvilNormalize },
{ ".evil_vec_setlength", HLE_Misc::SMB_evil_vec_setlength },
{ ".evil_vec_something", HLE_Misc::FZero_evil_vec_normalize },
{ "PanicAlert", HLE_Misc::HLEPanicAlert },
{ ".sqrt_internal_needs_cr1", HLE_Misc::SMB_sqrt_internal },
{ ".rsqrt_internal_needs_cr1", HLE_Misc::SMB_rsqrt_internal },
{ ".atan2", HLE_Misc::SMB_atan2},
{ ".sqrt_fz", HLE_Misc::FZ_sqrt},
// Super Monkey Ball - no longer needed.
//{ ".evil_vec_cosine", HLE_Misc::SMB_EvilVecCosine },
//{ ".evil_normalize", HLE_Misc::SMB_EvilNormalize },
//{ ".evil_vec_setlength", HLE_Misc::SMB_evil_vec_setlength },
//{ ".evil_vec_something", HLE_Misc::FZero_evil_vec_normalize },
{ "PanicAlert", HLE_Misc::HLEPanicAlert },
//{ ".sqrt_internal_needs_cr1", HLE_Misc::SMB_sqrt_internal },
//{ ".rsqrt_internal_needs_cr1", HLE_Misc::SMB_rsqrt_internal },
//{ ".atan2", HLE_Misc::SMB_atan2},
//{ ".sqrt_fz", HLE_Misc::FZ_sqrt},
{ ".sqrt_internal_fz", HLE_Misc::FZ_sqrt_internal },
{ ".rsqrt_internal_fz", HLE_Misc::FZ_rsqrt_internal },
// F-zero still isn't working correctly, but these aren't really helping.
//{ ".sqrt_internal_fz", HLE_Misc::FZ_sqrt_internal },
//{ ".rsqrt_internal_fz", HLE_Misc::FZ_rsqrt_internal },
//{ ".kill_infinites", HLE_Misc::FZero_kill_infinites },
// special

View File

@ -110,7 +110,12 @@ void SMB_EvilNormalize()
float x = F(r3);
float y = F(r3 + 4);
float z = F(r3 + 8);
float inv_len = 1.0f / sqrtf(x*x + y*y + z*z);
float len = x*x + y*y + z*z;
float inv_len;
if (len <= 0)
inv_len = 0;
else
inv_len = 1.0f / sqrtf(len);
x *= inv_len;
y *= inv_len;
z *= inv_len;

View File

@ -100,6 +100,9 @@ public:
static void Init();
static void DoState(PointerWrap &p);
static u32 GetMask() { return m_InterruptMask; }
static u32 GetCause() { return m_InterruptCause; }
static void SetInterrupt(InterruptCause _causemask, bool _bSet=true);
// Read32

View File

@ -71,67 +71,25 @@ namespace Interpreter
void UpdateFPSCR(UReg_FPSCR fp);
void UpdateSSEState();
void UpdateFPRF(double value)
// start of unit test - Dolphin needs more of these!
/*
void TestFPRF()
{
u64 ivalue = *((u64*)&value);
// 5 bits (C, <, >, =, ?)
// top: class descriptor
FPSCR.FPRF = 4;
// easy cases first
if (ivalue == 0) {
// positive zero
FPSCR.FPRF = 0x2;
} else if (ivalue == 0x8000000000000000ULL) {
// negative zero
FPSCR.FPRF = 0x12;
} else if (ivalue == 0x7FF0000000000000ULL) {
// positive inf
FPSCR.FPRF = 0x5;
} else if (ivalue == 0xFFF0000000000000ULL) {
// negative inf
FPSCR.FPRF = 0x9;
} else {
// OK let's dissect this thing.
int sign = (int)(ivalue >> 63);
int exp = (int)((ivalue >> 52) & 0x7FF);
if (exp >= 1 && exp <= 2046) {
// Nice normalized number.
if (sign) {
FPSCR.FPRF = 0x8; // negative
} else {
FPSCR.FPRF = 0x4; // positive
}
return;
}
u64 mantissa = ivalue & 0x000FFFFFFFFFFFFFULL;
// int mantissa_top = (int)(mantissa >> 51);
if (exp == 0 && mantissa) {
// Denormalized number.
if (sign) {
FPSCR.FPRF = 0x18;
} else {
FPSCR.FPRF = 0x14;
}
} else if (exp == 0x7FF && mantissa /* && mantissa_top*/) {
FPSCR.FPRF = 0x11; // Quiet NAN
return;
}
}
}
UpdateFPRF(1.0);
if (FPSCR.FPRF != 0x4)
PanicAlert("Error 1");
UpdateFPRF(-1.0);
if (FPSCR.FPRF != 0x8)
PanicAlert("Error 2");
PanicAlert("Test done");
}*/
// extremely rare
void Helper_UpdateCR1(double _fValue)
{
FPSCR.FPRF = 0;
if (_fValue == 0.0 || _fValue == -0.0)
FPSCR.FPRF |= 2;
if (_fValue > 0.0)
FPSCR.FPRF |= 4;
if (_fValue < 0.0)
FPSCR.FPRF |= 8;
SetCRField(1, (FPSCR.Hex & 0x0000F000) >> 12);
// Should just update exception flags, not do any compares.
PanicAlert("CR1");
}
@ -218,7 +176,7 @@ void fcmpu(UGeckoInstruction _inst)
// Apply current rounding mode
void fctiwx(UGeckoInstruction _inst)
{
UpdateSSEState();
//UpdateSSEState();
const double b = rPS0(_inst.FB);
u32 value;
if (b > (double)0x7fffffff)
@ -257,7 +215,7 @@ largest representable int on PowerPC. */
// Always round toward zero
void fctiwzx(UGeckoInstruction _inst)
{
//UpdateFPSCR(FPSCR);
//UpdateSSEState();
const double b = rPS0(_inst.FB);
u32 value;
if (b > (double)0x7fffffff)
@ -279,7 +237,6 @@ void fctiwzx(UGeckoInstruction _inst)
// FPSCR.XX |= FPSCR.FI;
// FPSCR.FR = 1; //fabs(d_value) > fabs(b);
}
//FPRF undefined
riPS0(_inst.FD) = (u64)value;
if (_inst.Rc)
@ -305,7 +262,7 @@ void fnabsx(UGeckoInstruction _inst)
riPS0(_inst.FD) = riPS0(_inst.FB) | (1ULL << 63);
// This is a binary instruction. Does not alter FPSCR
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
}
void fnegx(UGeckoInstruction _inst)
{
@ -331,11 +288,12 @@ void frspx(UGeckoInstruction _inst) // round to single
if (true || FPSCR.RN != 0)
{
// Not used in Super Monkey Ball
UpdateSSEState();
// UpdateSSEState();
double b = rPS0(_inst.FB);
double rounded = (double)(float)b;
FPSCR.FI = b != rounded; // changing both of these affect Super Monkey Ball behaviour greatly.
FPSCR.FR = 1; // WHY? fabs(rounded) > fabs(b);
//FPSCR.FI = b != rounded; // changing both of these affect Super Monkey Ball behaviour greatly.
if (Core::g_CoreStartupParameter.bEnableFPRF)
UpdateFPRF(rounded);
rPS0(_inst.FD) = rPS1(_inst.FD) = rounded;
return;
// PanicAlert("frspx: FPSCR.RN=%i", FPSCR.RN);
@ -389,8 +347,8 @@ void frspx(UGeckoInstruction _inst) // round to single
//PanicAlert("NAN %08x %08x", in.i >> 32, in.i);
}
}
UpdateFPRF(out.d);
FPSCR.FR = 1; // SUPER MONKEY BALL HACK
rPS0(_inst.FD) = rPS1(_inst.FD) = out.d;
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
@ -416,19 +374,19 @@ void fmulsx(UGeckoInstruction _inst)
void fmaddx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = (rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB);
FPSCR.FI = 0;
FPSCR.FR = 0;
double result = (rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB);
rPS0(_inst.FD) = result;
UpdateFPRF(result);
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void fmaddsx(UGeckoInstruction _inst)
{
double d_value = (rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB);
rPS0(_inst.FD) = rPS1(_inst.FD) =
static_cast<float>(d_value);
rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast<float>(d_value);
FPSCR.FI = d_value != rPS0(_inst.FD);
FPSCR.FR = 0;
UpdateFPRF(rPS0(_inst.FD));
UpdateFPRF(rPS0(_inst.FD));
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
@ -436,16 +394,11 @@ void fmaddsx(UGeckoInstruction _inst)
void faddx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS0(_inst.FA) + rPS0(_inst.FB);
// FPSCR.FI = 0;
// FPSCR.FR = 1;
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void faddsx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast<float>(rPS0(_inst.FA) + rPS0(_inst.FB));
// FPSCR.FI = 0;
// FPSCR.FR = 1;
// FPSCR.Hex = (rand() ^ (rand() << 8) ^ (rand() << 16)) & ~(0x000000F8);
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
@ -453,8 +406,6 @@ void faddsx(UGeckoInstruction _inst)
void fdivx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS0(_inst.FA) / rPS0(_inst.FB);
// FPSCR.FI = 0;
// FPSCR.FR = 1;
if (fabs(rPS0(_inst.FB)) == 0.0) {
FPSCR.ZX = 1;
}
@ -463,8 +414,6 @@ void fdivx(UGeckoInstruction _inst)
void fdivsx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast<float>(rPS0(_inst.FA) / rPS0(_inst.FB));
// FPSCR.FI = 0;
// FPSCR.FR = 1;
if (fabs(rPS0(_inst.FB)) == 0.0) {
FPSCR.ZX = 1;
}
@ -473,8 +422,6 @@ void fdivsx(UGeckoInstruction _inst)
void fresx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast<float>(1.0f / rPS0(_inst.FB));
// FPSCR.FI = 0;
// FPSCR.FR = 1;
if (fabs(rPS0(_inst.FB)) == 0.0) {
FPSCR.ZX = 1;
}
@ -485,8 +432,6 @@ void fresx(UGeckoInstruction _inst)
void fmsubx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = (rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB);
// FPSCR.FI = 0;
// FPSCR.FR = 0;
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
@ -494,8 +439,6 @@ void fmsubsx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS1(_inst.FD) =
static_cast<float>((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB));
// FPSCR.FI = 0;
// FPSCR.FR = 0;
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
@ -503,16 +446,12 @@ void fmsubsx(UGeckoInstruction _inst)
void fnmaddx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = -((rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB));
// FPSCR.FI = 0;
// FPSCR.FR = 0;
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void fnmaddsx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS1(_inst.FD) =
static_cast<float>(-((rPS0(_inst.FA) * rPS0(_inst.FC)) + rPS0(_inst.FB)));
// FPSCR.FI = 0;
// FPSCR.FR = 0;
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
@ -520,16 +459,12 @@ void fnmaddsx(UGeckoInstruction _inst)
void fnmsubx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = -((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB));
// FPSCR.FI = 0;
// FPSCR.FR = 0;
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void fnmsubsx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS1(_inst.FD) =
static_cast<float>(-((rPS0(_inst.FA) * rPS0(_inst.FC)) - rPS0(_inst.FB)));
// FPSCR.FI = 0;
// FPSCR.FR = 0;
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
@ -537,15 +472,11 @@ void fnmsubsx(UGeckoInstruction _inst)
void fsubx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS0(_inst.FA) - rPS0(_inst.FB);
// FPSCR.FI = 0;
// FPSCR.FR = 0;
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void fsubsx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = rPS1(_inst.FD) = static_cast<float>(rPS0(_inst.FA) - rPS0(_inst.FB));
// FPSCR.FI = 0;
// FPSCR.FR = 0;
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
@ -553,17 +484,12 @@ void fsubsx(UGeckoInstruction _inst)
void frsqrtex(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = 1.0f / (sqrt(rPS0(_inst.FB)));
// FPSCR.FI = 0;
// FPSCR.FR = 0;
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}
void fsqrtx(UGeckoInstruction _inst)
{
rPS0(_inst.FD) = sqrt(rPS0(_inst.FB));
// FPSCR.FI = 0;
// FPSCR.FR = 0;
if (_inst.Rc) Helper_UpdateCR1(rPS0(_inst.FD));
}

View File

@ -130,7 +130,7 @@ void UpdateFPSCR(UReg_FPSCR fp)
void mcrfs(UGeckoInstruction _inst)
{
u32 fpflags = ((FPSCR.Hex >> (4*(_inst.CRFS))) & 0xF);
u32 fpflags = ((FPSCR.Hex >> (4*(7 - _inst.CRFS))) & 0xF);
switch (_inst.CRFS) {
case 0:
FPSCR.FX = 0;
@ -216,7 +216,7 @@ void mtfsfx(UGeckoInstruction _inst)
u32 m = 0;
for (int i = 0; i < 8; i++) { //7?? todo check
if (fm & (1 << i))
m |= (0xf << (i*4));
m |= (0xF << (i*4));
}
FPSCR.Hex = (FPSCR.Hex & ~m) | ((u32)(riPS0(_inst.FB)) & m);
@ -238,12 +238,15 @@ void mfcr(UGeckoInstruction _inst)
void mtcrf(UGeckoInstruction _inst)
{
u32 mask = 0;
u32 crm = _inst.CRM;
if (crm == 0xFF) {
if (crm == 0xFF)
{
SetCR(m_GPR[_inst.RS]);
} else {
}
else
{
//TODO: use lookup table? probably not worth it
u32 mask = 0;
for (int i = 0; i < 8; i++) {
if (crm & (1 << i))
mask |= 0xF << (i*4);
@ -470,10 +473,8 @@ void crxor(UGeckoInstruction _inst)
void mcrf(UGeckoInstruction _inst)
{
u32 cr = GetCR();
u32 crmask = ~(0xF0000000 >> (4*_inst.CRFD));
u32 flags = ((cr << (4*_inst.CRFS)) & 0xF0000000) >> (4*_inst.CRFD);
SetCR((cr & crmask) | flags);
int cr_f = GetCRField(_inst.CRFS);
SetCRField(_inst.CRFD, cr_f);
}
void isync(UGeckoInstruction _inst)

View File

@ -25,196 +25,209 @@
#include "Jit.h"
#include "JitRegCache.h"
const u64 GC_ALIGNED16(psSignBits2[2]) = {0x8000000000000000ULL, 0x8000000000000000ULL};
const u64 GC_ALIGNED16(psAbsMask2[2]) = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL};
const double GC_ALIGNED16(psOneOne2[2]) = {1.0, 1.0};
const u64 GC_ALIGNED16(psSignBits2[2]) = {0x8000000000000000ULL, 0x8000000000000000ULL};
const u64 GC_ALIGNED16(psAbsMask2[2]) = {0x7FFFFFFFFFFFFFFFULL, 0x7FFFFFFFFFFFFFFFULL};
const double GC_ALIGNED16(psOneOne2[2]) = {1.0, 1.0};
void Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool dupe, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg))
void Jit64::fp_tri_op(int d, int a, int b, bool reversible, bool dupe, void (XEmitter::*op)(Gen::X64Reg, Gen::OpArg))
{
fpr.Lock(d, a, b);
if (d == a)
{
fpr.Lock(d, a, b);
if (d == a)
{
fpr.LoadToX64(d, true);
(this->*op)(fpr.RX(d), fpr.R(b));
}
else if (d == b && reversible)
{
fpr.LoadToX64(d, true);
(this->*op)(fpr.RX(d), fpr.R(a));
}
else if (a != d && b != d)
{
// Sources different from d, can use rather quick solution
fpr.LoadToX64(d, !dupe);
MOVSD(fpr.RX(d), fpr.R(a));
(this->*op)(fpr.RX(d), fpr.R(b));
}
else if (b != d)
{
fpr.LoadToX64(d, !dupe);
MOVSD(XMM0, fpr.R(b));
MOVSD(fpr.RX(d), fpr.R(a));
(this->*op)(fpr.RX(d), Gen::R(XMM0));
}
else // Other combo, must use two temps :(
{
MOVSD(XMM0, fpr.R(a));
MOVSD(XMM1, fpr.R(b));
fpr.LoadToX64(d, !dupe);
(this->*op)(XMM0, Gen::R(XMM1));
MOVSD(fpr.RX(d), Gen::R(XMM0));
}
if (dupe) {
ForceSinglePrecisionS(fpr.RX(d));
MOVDDUP(fpr.RX(d), fpr.R(d));
}
fpr.UnlockAll();
fpr.LoadToX64(d, true);
(this->*op)(fpr.RX(d), fpr.R(b));
}
void Jit64::fp_arith_s(UGeckoInstruction inst)
else if (d == b && reversible)
{
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITFloatingPointOff)
{Default(inst); return;} // turn off from debugger
INSTRUCTION_START;
if (inst.Rc) {
Default(inst); return;
}
bool dupe = inst.OPCD == 59;
switch (inst.SUBOP5)
{
case 18: fp_tri_op(inst.FD, inst.FA, inst.FB, false, dupe, &XEmitter::DIVSD); break; //div
case 20: fp_tri_op(inst.FD, inst.FA, inst.FB, false, dupe, &XEmitter::SUBSD); break; //sub
case 21: fp_tri_op(inst.FD, inst.FA, inst.FB, true, dupe, &XEmitter::ADDSD); break; //add
case 23: //sel
Default(inst);
break;
case 24: //res
Default(inst);
break;
case 25: fp_tri_op(inst.FD, inst.FA, inst.FC, true, dupe, &XEmitter::MULSD); break; //mul
default:
_assert_msg_(DYNA_REC, 0, "fp_arith_s WTF!!!");
}
fpr.LoadToX64(d, true);
(this->*op)(fpr.RX(d), fpr.R(a));
}
void Jit64::fmaddXX(UGeckoInstruction inst)
else if (a != d && b != d)
{
// Sources different from d, can use rather quick solution
fpr.LoadToX64(d, !dupe);
MOVSD(fpr.RX(d), fpr.R(a));
(this->*op)(fpr.RX(d), fpr.R(b));
}
else if (b != d)
{
fpr.LoadToX64(d, !dupe);
MOVSD(XMM0, fpr.R(b));
MOVSD(fpr.RX(d), fpr.R(a));
(this->*op)(fpr.RX(d), Gen::R(XMM0));
}
else // Other combo, must use two temps :(
{
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITFloatingPointOff)
{Default(inst); return;} // turn off from debugger
INSTRUCTION_START;
if (inst.Rc) {
Default(inst); return;
}
bool single_precision = inst.OPCD == 59;
int a = inst.FA;
int b = inst.FB;
int c = inst.FC;
int d = inst.FD;
fpr.Lock(a, b, c, d);
MOVSD(XMM0, fpr.R(a));
switch (inst.SUBOP5)
{
case 28: //msub
MULSD(XMM0, fpr.R(c));
SUBSD(XMM0, fpr.R(b));
break;
case 29: //madd
MULSD(XMM0, fpr.R(c));
ADDSD(XMM0, fpr.R(b));
break;
case 30: //nmsub
MULSD(XMM0, fpr.R(c));
SUBSD(XMM0, fpr.R(b));
XORPD(XMM0, M((void*)&psSignBits2));
break;
case 31: //nmadd
MULSD(XMM0, fpr.R(c));
ADDSD(XMM0, fpr.R(b));
XORPD(XMM0, M((void*)&psSignBits2));
break;
}
fpr.LoadToX64(d, false);
//YES it is necessary to dupe the result :(
//TODO : analysis - does the top reg get used? If so, dupe, if not, don't.
if (single_precision) {
ForceSinglePrecisionS(XMM0);
MOVDDUP(fpr.RX(d), R(XMM0));
} else {
MOVSD(fpr.RX(d), R(XMM0));
}
fpr.UnlockAll();
MOVSD(XMM1, fpr.R(b));
fpr.LoadToX64(d, !dupe);
(this->*op)(XMM0, Gen::R(XMM1));
MOVSD(fpr.RX(d), Gen::R(XMM0));
}
void Jit64::fmrx(UGeckoInstruction inst)
{
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITFloatingPointOff)
{Default(inst); return;} // turn off from debugger
INSTRUCTION_START;
if (inst.Rc) {
Default(inst); return;
}
int d = inst.FD;
int b = inst.FB;
fpr.LoadToX64(d, true); // we don't want to destroy the high bit
MOVSD(fpr.RX(d), fpr.R(b));
if (dupe) {
ForceSinglePrecisionS(fpr.RX(d));
MOVDDUP(fpr.RX(d), fpr.R(d));
}
fpr.UnlockAll();
}
void Jit64::fp_arith_s(UGeckoInstruction inst)
{
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITFloatingPointOff)
{Default(inst); return;} // turn off from debugger
INSTRUCTION_START;
if (inst.Rc) {
Default(inst); return;
}
void Jit64::fcmpx(UGeckoInstruction inst)
// Only the interpreter has "proper" support for (some) FP flags
if (inst.SUBOP5 == 25 && Core::g_CoreStartupParameter.bEnableFPRF) {
Default(inst); return;
}
bool dupe = inst.OPCD == 59;
switch (inst.SUBOP5)
{
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITFloatingPointOff)
{Default(inst); return;} // turn off from debugger
INSTRUCTION_START;
if (jo.fpAccurateFlags)
{
Default(inst);
return;
}
bool ordered = inst.SUBOP10 == 32;
/*
double fa = rPS0(_inst.FA);
double fb = rPS0(_inst.FB);
u32 compareResult;
case 18: fp_tri_op(inst.FD, inst.FA, inst.FB, false, dupe, &XEmitter::DIVSD); break; //div
case 20: fp_tri_op(inst.FD, inst.FA, inst.FB, false, dupe, &XEmitter::SUBSD); break; //sub
case 21: fp_tri_op(inst.FD, inst.FA, inst.FB, true, dupe, &XEmitter::ADDSD); break; //add
case 23: //sel
Default(inst);
break;
case 24: //res
Default(inst);
break;
case 25: fp_tri_op(inst.FD, inst.FA, inst.FC, true, dupe, &XEmitter::MULSD); break; //mul
default:
_assert_msg_(DYNA_REC, 0, "fp_arith_s WTF!!!");
}
}
if(IsNAN(fa) || IsNAN(fb)) compareResult = 1;
else if(fa < fb) compareResult = 8;
else if(fa > fb) compareResult = 4;
else compareResult = 2;
void Jit64::fmaddXX(UGeckoInstruction inst)
{
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITFloatingPointOff)
{Default(inst); return;} // turn off from debugger
INSTRUCTION_START;
if (inst.Rc) {
Default(inst); return;
}
// Only the interpreter has "proper" support for (some) FP flags
if (inst.SUBOP5 == 29 && Core::g_CoreStartupParameter.bEnableFPRF) {
Default(inst); return;
}
FPSCR.FPRF = compareResult;
CR = (CR & (~(0xf0000000 >> (_inst.CRFD * 4)))) | (compareResult << ((7 - _inst.CRFD) * 4));
bool single_precision = inst.OPCD == 59;
int a = inst.FA;
int b = inst.FB;
int c = inst.FC;
int d = inst.FD;
fpr.Lock(a, b, c, d);
MOVSD(XMM0, fpr.R(a));
switch (inst.SUBOP5)
{
case 28: //msub
MULSD(XMM0, fpr.R(c));
SUBSD(XMM0, fpr.R(b));
break;
case 29: //madd
MULSD(XMM0, fpr.R(c));
ADDSD(XMM0, fpr.R(b));
break;
case 30: //nmsub
MULSD(XMM0, fpr.R(c));
SUBSD(XMM0, fpr.R(b));
XORPD(XMM0, M((void*)&psSignBits2));
break;
case 31: //nmadd
MULSD(XMM0, fpr.R(c));
ADDSD(XMM0, fpr.R(b));
XORPD(XMM0, M((void*)&psSignBits2));
break;
}
fpr.LoadToX64(d, false);
//YES it is necessary to dupe the result :(
//TODO : analysis - does the top reg get used? If so, dupe, if not, don't.
if (single_precision) {
ForceSinglePrecisionS(XMM0);
MOVDDUP(fpr.RX(d), R(XMM0));
} else {
MOVSD(fpr.RX(d), R(XMM0));
}
// SMB checks flags after this op. Let's lie.
//AND(32, M(&PowerPC::ppcState.fpscr), Imm32(~((0x80000000 >> 19) | (0x80000000 >> 15))));
//OR(32, M(&PowerPC::ppcState.fpscr), Imm32((0x80000000 >> 16)));
fpr.UnlockAll();
}
void Jit64::fmrx(UGeckoInstruction inst)
{
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITFloatingPointOff)
{Default(inst); return;} // turn off from debugger
INSTRUCTION_START;
if (inst.Rc) {
Default(inst); return;
}
int d = inst.FD;
int b = inst.FB;
fpr.LoadToX64(d, true); // we don't want to destroy the high bit
MOVSD(fpr.RX(d), fpr.R(b));
}
void Jit64::fcmpx(UGeckoInstruction inst)
{
if(Core::g_CoreStartupParameter.bJITOff || Core::g_CoreStartupParameter.bJITFloatingPointOff)
{Default(inst); return;} // turn off from debugger
INSTRUCTION_START;
if (jo.fpAccurateFlags)
{
Default(inst);
return;
}
bool ordered = inst.SUBOP10 == 32;
/*
double fa = rPS0(_inst.FA);
double fb = rPS0(_inst.FB);
u32 compareResult;
if(IsNAN(fa) || IsNAN(fb)) compareResult = 1;
else if(fa < fb) compareResult = 8;
else if(fa > fb) compareResult = 4;
else compareResult = 2;
FPSCR.FPRF = compareResult;
CR = (CR & (~(0xf0000000 >> (_inst.CRFD * 4)))) | (compareResult << ((7 - _inst.CRFD) * 4));
*/
int a = inst.FA;
int b = inst.FB;
int crf = inst.CRFD;
int shift = crf * 4;
//FPSCR
//XOR(32,R(EAX),R(EAX));
int a = inst.FA;
int b = inst.FB;
int crf = inst.CRFD;
int shift = crf * 4;
//FPSCR
//XOR(32,R(EAX),R(EAX));
fpr.Lock(a,b);
if (a != b)
fpr.LoadToX64(a, true);
fpr.Lock(a,b);
if (a != b)
fpr.LoadToX64(a, true);
// USES_CR
if (ordered)
COMISD(fpr.R(a).GetSimpleReg(), fpr.R(b));
else
UCOMISD(fpr.R(a).GetSimpleReg(), fpr.R(b));
FixupBranch pLesser = J_CC(CC_B);
FixupBranch pGreater = J_CC(CC_A);
// _x86Reg == 0
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x2));
FixupBranch continue1 = J();
// _x86Reg > 0
SetJumpTarget(pGreater);
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x4));
FixupBranch continue2 = J();
// _x86Reg < 0
SetJumpTarget(pLesser);
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x8));
SetJumpTarget(continue1);
SetJumpTarget(continue2);
fpr.UnlockAll();
}
// USES_CR
if (ordered)
COMISD(fpr.R(a).GetSimpleReg(), fpr.R(b));
else
UCOMISD(fpr.R(a).GetSimpleReg(), fpr.R(b));
FixupBranch pLesser = J_CC(CC_B);
FixupBranch pGreater = J_CC(CC_A);
// _x86Reg == 0
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x2));
FixupBranch continue1 = J();
// _x86Reg > 0
SetJumpTarget(pGreater);
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x4));
FixupBranch continue2 = J();
// _x86Reg < 0
SetJumpTarget(pLesser);
MOV(8, M(&PowerPC::ppcState.cr_fast[crf]), Imm8(0x8));
SetJumpTarget(continue1);
SetJumpTarget(continue2);
fpr.UnlockAll();
}

View File

@ -1878,7 +1878,7 @@ static void DoWriteCode(IRBuilder* ibuild, Jit64* Jit, bool UseProfile, bool Mak
case StorePaired: {
regSpill(RI, EAX);
regSpill(RI, EDX);
unsigned quantreg = *I >> 24;
u32 quantreg = *I >> 24;
Jit->MOVZX(32, 16, EAX, M(&PowerPC::ppcState.spr[SPR_GQR0 + quantreg]));
Jit->MOVZX(32, 8, EDX, R(AL));
// FIXME: Fix ModR/M encoding to allow [EDX*4+disp32]!

View File

@ -39,8 +39,6 @@
//#define INSTRUCTION_START Default(inst); return;
#define INSTRUCTION_START
// The big problem is likely instructions that set the quantizers in the same block.
// We will have to break block after quantizers are written to.
void Jit64::psq_st(UGeckoInstruction inst)
{
INSTRUCTION_START

View File

@ -687,6 +687,7 @@ void InitTables()
}
#define OPLOG
#define OP_TO_LOG "mcrfs"
#ifdef OPLOG
namespace {
@ -700,7 +701,7 @@ void CompileInstruction(UGeckoInstruction _inst)
GekkoOPInfo *info = GetOpInfo(_inst);
if (info) {
#ifdef OPLOG
if (!strcmp(info->opname, "mffsx")) { ///"mcrfs"
if (!strcmp(info->opname, OP_TO_LOG)){ ///"mcrfs"
rsplocations.push_back(jit.js.compilerPC);
}
#endif
@ -764,9 +765,9 @@ void LogCompiledInstructions()
}
fclose(f);
#ifdef OPLOG
f = fopen(StringFromFormat(FULL_LOGS_DIR "mcrfs_at.txt", time).c_str(), "w");
f = fopen(StringFromFormat(FULL_LOGS_DIR OP_TO_LOG "_at.txt", time).c_str(), "w");
for (size_t i = 0; i < rsplocations.size(); i++) {
fprintf(f, "mffsx: %08x\n", rsplocations[i]);
fprintf(f, OP_TO_LOG ": %08x\n", rsplocations[i]);
}
fclose(f);
#endif

View File

@ -43,20 +43,22 @@ static CoreMode mode;
void CompactCR()
{
ppcState.cr = 0;
for (int i = 0; i < 8; i++) {
ppcState.cr |= ppcState.cr_fast[i] << (28 - i * 4);
u32 new_cr = ppcState.cr_fast[0] << 28;
for (int i = 1; i < 8; i++)
{
new_cr |= ppcState.cr_fast[i] << (28 - i * 4);
}
ppcState.cr = new_cr;
}
void ExpandCR()
{
for (int i = 0; i < 8; i++) {
for (int i = 0; i < 8; i++)
{
ppcState.cr_fast[i] = (ppcState.cr >> (28 - i * 4)) & 0xF;
}
}
void DoState(PointerWrap &p)
{
p.Do(ppcState);
@ -304,7 +306,7 @@ void CheckExceptions()
ppcState.Exceptions &= ~EXCEPTION_ALIGNMENT;
}
// EXTERNAL INTTERUPT
// EXTERNAL INTERRUPT
else if (MSR & 0x0008000) //hacky...the exception shouldn't be generated if EE isn't set...
{
if (ppcState.Exceptions & EXCEPTION_EXTERNAL_INT)
@ -353,4 +355,78 @@ void OnIdleIL()
CoreTiming::Idle();
}
} // namespace
int PPCFPClass(double dvalue)
{
#ifdef _WIN32
switch (_fpclass(dvalue))
{
case _FPCLASS_SNAN:
case _FPCLASS_QNAN: return 0x11;
case _FPCLASS_NINF: return 0x9;
case _FPCLASS_NN: return 0x8;
case _FPCLASS_ND: return 0x18;
case _FPCLASS_NZ: return 0x12;
case _FPCLASS_PZ: return 0x2;
case _FPCLASS_PD: return 0x14;
case _FPCLASS_PN: return 0x4;
case _FPCLASS_PINF: return 0x5;
default: return 0x4;
}
#else
// TODO: Make sure the below is equivalent to the above - then switch win32 implementation to it.
union {
double d;
u64 i;
} value;
value.d = dvalue;
// 5 bits (C, <, >, =, ?)
// top: class descriptor
FPSCR.FPRF = 4;
// easy cases first
if (value.i == 0) {
// positive zero
FPSCR.FPRF = 0x2;
} else if (value.i == 0x8000000000000000ULL) {
// negative zero
FPSCR.FPRF = 0x12;
} else if (value.i == 0x7FF0000000000000ULL) {
// positive inf
FPSCR.FPRF = 0x5;
} else if (value.i == 0xFFF0000000000000ULL) {
// negative inf
FPSCR.FPRF = 0x9;
} else {
// OK let's dissect this thing.
int sign = (int)(value.i & 0x8000000000000000ULL) ? 1 : 0;
int exp = (int)((value.i >> 52) & 0x7FF);
if (exp >= 1 && exp <= 2046) {
// Nice normalized number.
if (sign) {
FPSCR.FPRF = 0x8; // negative
} else {
FPSCR.FPRF = 0x4; // positive
}
return;
}
u64 mantissa = value.i & 0x000FFFFFFFFFFFFFULL;
if (exp == 0 && mantissa) {
// Denormalized number.
if (sign) {
FPSCR.FPRF = 0x18;
} else {
FPSCR.FPRF = 0x14;
}
} else if (exp == 0x7FF && mantissa /* && mantissa_top*/) {
FPSCR.FPRF = 0x11; // Quiet NAN
return;
}
}
#endif
}
} // namespace
void UpdateFPRF(double dvalue)
{
FPSCR.FPRF = PowerPC::PPCFPClass(dvalue);
}

View File

@ -127,9 +127,8 @@ void OnIdleIL();
} // namespace
// Wrappers to make it easier to in the future completely replace the storage of CR and Carry bits
// to something more x86-friendly. These are not used 100% consistently yet - and if we do this, we
// need the corresponding stuff on the JIT side too.
// Fast CR system - store them in single bytes instead of nibbles to not have to
// mask/shift them out.
// These are intended to stay fast, probably become faster, and are not likely to slow down much if at all.
inline void SetCRField(int cr_field, int value) {
@ -187,4 +186,6 @@ inline void SetXER_SO(int value) {
((UReg_XER&)PowerPC::ppcState.spr[SPR_XER]).SO = value;
}
void UpdateFPRF(double dvalue);
#endif

View File

@ -18,6 +18,7 @@
#include "Debugger.h"
#include "RegisterView.h"
#include "PowerPC/PowerPC.h"
#include "HW/PeripheralInterface.h"
// F-zero 80005e60 wtf??
@ -25,7 +26,7 @@ extern const char* GetGPRName(unsigned int index);
extern const char* GetFPRName(unsigned int index);
static const char *special_reg_names[] = {
"PC", "LR", "CTR", "CR", "FPSCR", "SRR0", "SRR1",
"PC", "LR", "CTR", "CR", "FPSCR", "SRR0", "SRR1", "Exceptions", "Int Mask", "Int Cause",
};
static u32 GetSpecialRegValue(int reg) {
@ -37,7 +38,10 @@ static u32 GetSpecialRegValue(int reg) {
case 4: return PowerPC::ppcState.fpscr;
case 5: return PowerPC::ppcState.spr[SPR_SRR0];
case 6: return PowerPC::ppcState.spr[SPR_SRR1];
default: return 0;
case 7: return PowerPC::ppcState.Exceptions;
case 8: return CPeripheralInterface::GetMask();
case 9: return CPeripheralInterface::GetCause();
default: return 0;
}
}

View File

@ -29,14 +29,16 @@
// PC (specials)
// LR
// CTR
// CR0
// CR0-7
// FPSCR
// SRR0
// SRR1
// Exceptions
class CRegTable : public wxGridTableBase
{
enum {
NUM_SPECIALS = 7,
NUM_SPECIALS = 10,
};
public:

View File

@ -131,6 +131,7 @@ bool BootCore(const std::string& _rFilename)
ini->Get("Core", "UseDualCore", &StartUp.bUseDualCore, StartUp.bUseDualCore);
ini->Get("Core", "SkipIdle", &StartUp.bSkipIdle, StartUp.bSkipIdle);
ini->Get("Core", "OptimizeQuantizers", &StartUp.bOptimizeQuantizers, StartUp.bOptimizeQuantizers);
ini->Get("Core", "EnableFPRF", &StartUp.bEnableFPRF, StartUp.bEnableFPRF);
ini->Get("Core", "TLBHack", &StartUp.iTLBHack, StartUp.iTLBHack);
// ------------------------------------------------