DSPLLE - flags&stuff,xar->subarn,0x80 kinda figured out,... (experimental)

git-svn-id: https://dolphin-emu.googlecode.com/svn/trunk@5174 8ced0084-cf51-0410-be5f-012b33b47a6e
2024-10-08 08:33:28 +00:00 · 2010-03-08 21:25:35 +00:00 · 2010-03-08 21:25:35 +00:00 · 70a712c065
commit 70a712c065
parent 76ad8db445
14 changed files with 1351 additions and 1072 deletions
--- a/Source/Core/DSPCore/Src/DSPAccelerator.cpp
+++ b/Source/Core/DSPCore/Src/DSPAccelerator.cpp
@ -72,11 +72,11 @@ u16 dsp_read_aram_d3()
 	u32 Address = (g_dsp.ifx_regs[DSP_ACCAH] << 16) | g_dsp.ifx_regs[DSP_ACCAL];
 	u16 val = 0;
 	switch (g_dsp.ifx_regs[DSP_FORMAT]) {
-		case 0x5:   // unsigned 8-bit reads .. I think.
+		case 0x5:   // u8 reads
 			val = DSPHost_ReadHostMemory(Address);
 			Address++;
 			break;
-		case 0x6:   // unsigned 16-bit reads .. I think.
+		case 0x6:   // u16 reads
 		    val = (DSPHost_ReadHostMemory(Address) << 8) | DSPHost_ReadHostMemory(Address + 1);
 			Address += 2;
 			break;
@ -132,24 +132,22 @@ u16 dsp_read_accelerator()
 	    case 0x00:  // ADPCM audio
 		    val = ADPCM_Step(Address);
 		    break;
-
 	    case 0x0A:  // 16-bit PCM audio
 		    val = (DSPHost_ReadHostMemory(Address) << 8) | DSPHost_ReadHostMemory(Address + 1);
-
 		    g_dsp.ifx_regs[DSP_YN2] = g_dsp.ifx_regs[DSP_YN1];
 		    g_dsp.ifx_regs[DSP_YN1] = val;
-
 		    Address += 2;
 		    break;
-
 	    default:
-		    val = (DSPHost_ReadHostMemory(Address) << 8) | DSPHost_ReadHostMemory(Address + 1);
-		    Address += 2;
-		    ERROR_LOG(DSPLLE, "Unknown DSP Format %i", g_dsp.ifx_regs[DSP_FORMAT]);
+		    ERROR_LOG(DSPLLE, "Unknown DSP Format %x", g_dsp.ifx_regs[DSP_FORMAT]);
 		    break;
 	}

 	// TODO: Take GAIN into account, whatever it is.
+	if (g_dsp.ifx_regs[DSP_GAIN] > 0)
+	{
+		//NOTICE_LOG(DSPLLE,"format: 0x%04x - val: 0x%04x - gain: 0x%04x", g_dsp.ifx_regs[DSP_FORMAT], val, g_dsp.ifx_regs[DSP_GAIN]);
+	}

 	// Check for loop.
 	if (Address >= EndAddress)
--- a/Source/Core/DSPCore/Src/DSPCore.h
+++ b/Source/Core/DSPCore/Src/DSPCore.h
@ -150,19 +150,22 @@


 // SR bits
-#define SR_CARRY		0x0001
-#define SR_OVERFLOW		0x0002
-#define SR_ARITH_ZERO	0x0004
-#define SR_SIGN			0x0008
-#define SR_OVER_S32		0x0010 // set when there there was mod/tst/cmp on accu and result is over s32
-#define SR_TOP2BITS		0x0020 // if the upper (ac?.m/ax?.h) 2 bits are equal
-#define SR_LOGIC_ZERO	0x0040
-#define SR_80			0x0080 // Unknown, set by add, sub
-#define SR_INT_ENABLE	0x0200 // Not 100% sure but duddie says so. This should replace the hack, if so.
+#define SR_CARRY			0x0001
+#define SR_OVERFLOW			0x0002
+#define SR_ARITH_ZERO		0x0004
+#define SR_SIGN				0x0008
+#define SR_OVER_S32			0x0010 // set when there there was mod/tst/cmp on accu and result is over s32
+#define SR_TOP2BITS			0x0020 // if the upper (ac?.m/ax?.h) 2 bits are equal
+#define SR_LOGIC_ZERO		0x0040
+#define SR_OVERFLOW_SPECIAL	0x0080 // set at the same time as 0x2 (under same conditions) - but not cleared the same
+#define SR_100				0x0100 // unknown
+#define SR_INT_ENABLE		0x0200 // Not 100% sure but duddie says so. This should replace the hack, if so.
+#define SR_400				0x0400 // unknown
 #define SR_EXT_INT_ENABLE	0x0800 // Appears in zelda - seems to disable external interupts
-#define SR_MUL_MODIFY	0x2000 // 1 = normal. 0 = x2   (M0, M2)
-#define SR_40_MODE_BIT	0x4000 // 0 = "16", 1 = "40"  (SET16, SET40)  Controls sign extension when loading mid accums.
-#define SR_MUL_UNSIGNED	0x8000 // 0 = normal. 1 = unsigned  (CLR15, SET15) If set, treats operands as unsigned. Tested with mulx only so far.
+#define SR_1000				0x1000 // unknown
+#define SR_MUL_MODIFY		0x2000 // 1 = normal. 0 = x2   (M0, M2)
+#define SR_40_MODE_BIT		0x4000 // 0 = "16", 1 = "40"  (SET16, SET40)  Controls sign extension when loading mid accums.
+#define SR_MUL_UNSIGNED		0x8000 // 0 = normal. 1 = unsigned  (CLR15, SET15) If set, treats operands as unsigned. Tested with mulx only so far.

 // This should be the bits affected by CMP. Does not include logic zero.
 #define SR_CMP_MASK		0x3f
--- a/Source/Core/DSPCore/Src/DSPHWInterface.cpp
+++ b/Source/Core/DSPCore/Src/DSPHWInterface.cpp
@ -133,7 +133,7 @@ void gdsp_ifx_write(u16 addr, u16 val)
 		    if (val & 0x1)
 			    DSPHost_InterruptRequest();
 			else 
-				ERROR_LOG(DSPLLE, "Unknown Interrupt Request pc=%04x (%04x)", g_dsp.pc, val);
+				INFO_LOG(DSPLLE, "Unknown Interrupt Request pc=%04x (%04x)", g_dsp.pc, val);
 		    break;

 	    case 0xfc: // DMBH
@ -156,14 +156,14 @@ void gdsp_ifx_write(u16 addr, u16 val)
 		    g_dsp.ifx_regs[DSP_DSCR] &= ~0x0004;
 		    break;

-		case 0xd3:   // ZeldaUnk (accelerator WRITE)
-			//		   	NOTICE_LOG(DSPLLE, "Write To ZeldaUnk pc=%04x (%04x)", g_dsp.pc, val);
+		case 0xd3: // Accelerator write (Zelda type) - "UnkZelda"
 			dsp_write_aram_d3(val);
 			break;

-		case 0xde:
-			//if (val)
-			//	PanicAlert("Gain written: %04x", val);   // BMX XXX does, and sounds HORRIBLE.
+		case 0xde: // BMX XXX does, and sounds HORRIBLE. / Spyro - A Hero's Tail / Sega GC games / Wiiware - World of Goo
+			if (val) {
+				INFO_LOG(DSPLLE,"Gain Written: 0x%04x", val); 
+			}
 	    case 0xcd:
 	    case 0xce:
 	    case 0xcf:
@ -210,8 +210,7 @@ u16 gdsp_ifx_read(u16 addr)
 	    case 0xdd:  // ADPCM Accelerator reads
 		    return dsp_read_accelerator();

-	    case 0xd3:
-			//NOTICE_LOG(DSPLLE, "Read from ZeldaUnk pc=%04x", g_dsp.pc);
+	    case 0xd3: // Accelerator reads (Zelda type) - "UnkZelda"
 		    return dsp_read_aram_d3();

 	    default:
--- a/Source/Core/DSPCore/Src/DSPIntCCUtil.cpp
+++ b/Source/Core/DSPCore/Src/DSPIntCCUtil.cpp
@ -28,7 +28,6 @@ namespace DSPInterpreter {

 void Update_SR_Register64(s64 _Value, bool carry, bool overflow)
 {
-	// TODO: recheck 0x1,0x2,even 0x80... implement...
 	g_dsp.r[DSP_REG_SR] &= ~SR_CMP_MASK;

 	// 0x01
@ -37,10 +36,11 @@ void Update_SR_Register64(s64 _Value, bool carry, bool overflow)
 		g_dsp.r[DSP_REG_SR] |= SR_CARRY;
 	}

-	// 0x02
+	// 0x02 and 0x80
 	if (overflow)
 	{
 		g_dsp.r[DSP_REG_SR] |= SR_OVERFLOW;
+		g_dsp.r[DSP_REG_SR]  |= SR_OVERFLOW_SPECIAL; 
 	}

 	// 0x04
@ -66,16 +66,11 @@ void Update_SR_Register64(s64 _Value, bool carry, bool overflow)
 	{
 		g_dsp.r[DSP_REG_SR] |= SR_TOP2BITS;
 	}
-
-	// 0x80
-	{
-	}
 }


 void Update_SR_Register16(s16 _Value, bool carry, bool overflow, bool overS32)
 {
-	// TODO: recheck 0x1,0x2,even 0x80... implement...
 	g_dsp.r[DSP_REG_SR] &= ~SR_CMP_MASK;

 	// 0x01
@ -84,12 +79,13 @@ void Update_SR_Register16(s16 _Value, bool carry, bool overflow, bool overS32)
 		g_dsp.r[DSP_REG_SR] |= SR_CARRY;
 	}

-	// 0x02
+	// 0x02 and 0x80
 	if (overflow)
 	{
 		g_dsp.r[DSP_REG_SR] |= SR_OVERFLOW;
+		g_dsp.r[DSP_REG_SR]  |= SR_OVERFLOW_SPECIAL; 
 	}
-	
+
 	// 0x04
 	if (_Value == 0)
 	{
@ -113,10 +109,6 @@ void Update_SR_Register16(s16 _Value, bool carry, bool overflow, bool overS32)
 	{
 		g_dsp.r[DSP_REG_SR] |= SR_TOP2BITS;
 	}
-
-	// 0x80
-	{
-	}
 }

 void Update_SR_LZ(bool value) {
--- a/Source/Core/DSPCore/Src/DSPIntCCUtil.h
+++ b/Source/Core/DSPCore/Src/DSPIntCCUtil.h
@ -34,12 +34,12 @@ void Update_SR_Register16(s16 _Value, bool carry = false, bool overflow = false,
 void Update_SR_Register64(s64 _Value, bool carry = false, bool overflow = false);
 void Update_SR_LZ(bool value);

-inline bool isAddCarry(u64 val, u64 result) {
-	return (val > result);
+inline bool isCarry(u64 val, u64 result) {
+	return (val>result);
 }

-inline bool isSubCarry(u64 val, u64 result) {
-	return (val < result);
+inline bool isCarry2(u64 val, u64 result) {
+	return (val>=result); 
 }

 inline bool isOverflow(s64 val1, s64 val2, s64 res) {
--- a/Source/Core/DSPCore/Src/DSPIntExtOps.cpp
+++ b/Source/Core/DSPCore/Src/DSPIntExtOps.cpp
@ -480,7 +480,27 @@ void zeroWriteBackLog()
 {
 	// always make sure to have an extra entry at the end w/ -1 to avoid
 	// infinitive loops
-	for (int i = 0; writeBackLogIdx[i] != -1; i++) 
+	for (int i = 0; writeBackLogIdx[i] != -1; i++) {
 		dsp_op_write_reg(writeBackLogIdx[i], 0);
+	}
 }

+//needed for 0x3... (at least)..., + clrl
+//ex. corner case -> 0x4060: main opcode modifies .m, and extended .l -> .l shoudnt be zeroed because of .m write...
+void zeroWriteBackLogPreserveAcc(u8 acc) 
+{
+	for (int i = 0; writeBackLogIdx[i] != -1; i++) {
+		
+		// acc0
+		if ((acc == 0) &&  
+			((writeBackLogIdx[i] == DSP_REG_ACL0) || (writeBackLogIdx[i] == DSP_REG_ACM0) || (writeBackLogIdx[i] == DSP_REG_ACH0)))
+			continue;
+		
+		// acc1
+		if ((acc == 1) && 
+			((writeBackLogIdx[i] == DSP_REG_ACL1) || (writeBackLogIdx[i] == DSP_REG_ACM1) || (writeBackLogIdx[i] == DSP_REG_ACH1)))
+			continue;
+
+		dsp_op_write_reg(writeBackLogIdx[i], 0);
+	}
+}
--- a/Source/Core/DSPCore/Src/DSPIntUtil.h
+++ b/Source/Core/DSPCore/Src/DSPIntUtil.h
@ -37,6 +37,7 @@
 // ---------------------------------------------------------------------------------------
 // --- SR
 // ---------------------------------------------------------------------------------------
+
 inline void dsp_SR_set_flag(int flag)
 {
 	g_dsp.r[DSP_REG_SR] |= flag;
@ -47,9 +48,13 @@ inline bool dsp_SR_is_flag_set(int flag)
 	return (g_dsp.r[DSP_REG_SR] & flag) != 0;
 }

-
+// ---------------------------------------------------------------------------------------
+// --- AR increments, decrements
+// ---------------------------------------------------------------------------------------
+//
 // HORRIBLE UGLINESS, someone please fix.
 // See http://code.google.com/p/dolphin-emu/source/detail?r=3125
+
 inline u16 ToMask(u16 a)
 {
 	a = a | (a >> 8);
@ -112,6 +117,24 @@ inline s16 dsp_increase_addr_reg(int reg, s16 value)
 	return tmp;
 }

+inline s16 dsp_decrease_addr_reg(int reg, s16 value)
+{
+	s16 tmp = - 1;
+
+	// TODO: DO RIGHT!
+	if (value > 0) {
+		for (int i = 0; i < value; i++) {
+			tmp = dsp_decrement_addr_reg(reg, tmp);
+		}
+	} else if (value < 0) {
+		for (int i = 0; i < (int)(-value); i++) {
+			tmp = dsp_increment_addr_reg(reg, tmp);
+		}
+	} else
+		tmp = g_dsp.r[reg];
+
+	return tmp;
+}

 // ---------------------------------------------------------------------------------------
 // --- reg
@ -143,18 +166,8 @@ inline void dsp_op_write_reg(int reg, u16 val)
 	case DSP_REG_ACM0:
 	case DSP_REG_ACM1:
 		g_dsp.r[reg] = val;
-		// Enabling the below sign extension code breaks things.
-		// There's probably some condition that enable it, maybe one of 
-		// the status flags like M2.
-		// Or maybe it only happens when this call is a result of 'l and similar extended opcodes.
-		
-		// Sign extend the loaded register. ACM0
-		// g_dsp.r[reg - DSP_REG_ACM0 + DSP_REG_ACH0] = ((s16)val < 0 ? 0xFFFF : 0);
-		// g_dsp.r[reg - DSP_REG_ACM0 + DSP_REG_ACL0] = 0;  // ?
 		break;

-		// There might also be something similar for AX.L but I'm not at all sure about that.
-
 	// Stack registers.
 	case DSP_REG_ST0:
 	case DSP_REG_ST1:
@ -262,21 +275,18 @@ inline s16 dsp_get_acc_l(int _reg)
 	return g_dsp.r[DSP_REG_ACL0 + _reg];
 }

-
 inline s16 dsp_get_acc_m(int _reg)
 {
 	_assert_(_reg < 2);
 	return g_dsp.r[DSP_REG_ACM0 + _reg];
 }

-
 inline s16 dsp_get_acc_h(int _reg)
 {
 	_assert_(_reg < 2);
 	return g_dsp.r[DSP_REG_ACH0 + _reg];
 }

-
 // ---------------------------------------------------------------------------------------
 // --- AX - extra accumulators (32-bit)
 // ---------------------------------------------------------------------------------------
--- a/Source/Core/DSPCore/Src/DSPInterpreter.cpp
+++ b/Source/Core/DSPCore/Src/DSPInterpreter.cpp
@ -73,6 +73,7 @@ u16 ReadCR()

 void Step()
 {
+	DSPCore_CheckExternalInterrupt();
 	DSPCore_CheckExceptions();

 	g_dsp.step_counter++;
@ -91,12 +92,7 @@ void Step()
 		ProfilerDump(g_dsp.step_counter);
 	}
 #endif
-/*
-	//Pikmin GC (US)
-	if (g_dsp.pc == 0x0506)
-		NOTICE_LOG(DSPLLE,"-> FORMAT JUMPTABLE --> pc:=%04x,ac0.m:=%04x,ac1.m:=%04x", g_dsp.pc, dsp_get_acc_m(0), dsp_get_acc_m(1));
-*/
-	
+
 	u16 opc = dsp_fetch_code();
 	ExecuteInstruction(UDSPInstruction(opc));
 	HandleLoop();
@ -141,7 +137,7 @@ int RunCyclesDebug(int cycles)
 			return 0;
 	}

-	DSPCore_CheckExternalInterrupt();
+	//DSPCore_CheckExternalInterrupt();
 	
 	// Now, let's run a few cycles with idle skipping.
 	for (int i = 0; i < 8; i++)
@ -181,23 +177,7 @@ int RunCyclesDebug(int cycles)
 // Used by non-thread mode. Meant to be efficient.
 int RunCycles(int cycles)
 {
-	DSPCore_CheckExternalInterrupt();
-
-	if (cycles < 18)
-	{
-		for (int i = 0; i < cycles; i++)
-		{
-			if (g_dsp.cr & CR_HALT)
-				return 0;
-			if (DSPAnalyzer::code_flags[g_dsp.pc] & DSPAnalyzer::CODE_IDLE_SKIP)
-				return 0;
-			Step();
-			cycles--;
-		}
-		return cycles;
-	}
-
-	DSPCore_CheckExternalInterrupt();
+	//DSPCore_CheckExternalInterrupt();

 	// First, let's run a few cycles with no idle skipping so that things can progress a bit.
 	for (int i = 0; i < 8; i++)
--- a/Source/Core/DSPCore/Src/DSPInterpreter.h
+++ b/Source/Core/DSPCore/Src/DSPInterpreter.h
@ -20,6 +20,8 @@

 #include "DSPTables.h"

+#define PRECISE_SR_FLAGS
+
 #define DSP_REG_MASK    0x1f

 namespace DSPInterpreter {
@ -95,7 +97,6 @@ void lsrnrx(const UDSPInstruction& opc);
 void asrnrx(const UDSPInstruction& opc);
 void lsrnr(const UDSPInstruction& opc);
 void asrnr(const UDSPInstruction& opc);
-void orf(const UDSPInstruction& opc);
 void add(const UDSPInstruction& opc);
 void addp(const UDSPInstruction& opc);
 void cmpis(const UDSPInstruction& opc);
@ -126,7 +127,7 @@ void lsrn(const UDSPInstruction& opc);
 void asrn(const UDSPInstruction& opc);
 void dar(const UDSPInstruction& opc);
 void iar(const UDSPInstruction& opc);
-void xar(const UDSPInstruction& opc);
+void subarn(const UDSPInstruction& opc);
 void sbclr(const UDSPInstruction& opc);
 void sbset(const UDSPInstruction& opc);
 void mov(const UDSPInstruction& opc);
@ -161,22 +162,12 @@ void andf(const UDSPInstruction& opc);
 void xori(const UDSPInstruction& opc);
 void andi(const UDSPInstruction& opc);
 void ori(const UDSPInstruction& opc);
+void srbith(const UDSPInstruction& opc);
 void mulaxh(const UDSPInstruction& opc);
 void tstprod(const UDSPInstruction& opc);

-// FIXME inside
-void srbith(const UDSPInstruction& opc);
-
-// END OF FIXMEs
-
-// TODO: PENDING IMPLEMENTATION / UNIMPLEMENTED
-void tstaxl(const UDSPInstruction& opc);
-// The mysterious a100
-
-// END OF UNIMPLEMENTED
-
-// Helpers
-inline void tsta(int reg);
+//mia
+void a100(const UDSPInstruction& opc);

 }  // namespace

--- a/Source/Core/DSPCore/Src/DSPTables.cpp
+++ b/Source/Core/DSPCore/Src/DSPTables.cpp
@ -31,18 +31,13 @@ void nop(const UDSPInstruction& opc)
 		DSPInterpreter::unknown(opc);
 }
 
-// Unknown Ops
-// All AX games: a100
-
-// TODO: Fill up the tables with the corresponding instructions
 const DSPOPCTemplate opcodes[] =
 {
-	{"NOP",		0x0000, 0xffff, nop, nop, 1, 0, {}, false},
+	{"NOP",		0x0000, 0xfffc, nop, nop, 1, 0, {}, false},

 	{"DAR",		0x0004, 0xfffc, DSPInterpreter::dar, nop, 1, 1, {{P_REG, 1, 0, 0, 0x0003}}, false},
 	{"IAR",		0x0008, 0xfffc, DSPInterpreter::iar, nop, 1, 1, {{P_REG, 1, 0, 0, 0x0003}}, false},
-	{"XAR",		0x000c, 0xfffc, DSPInterpreter::xar, nop, 1, 1, {{P_REG, 1, 0, 0, 0x0003}}, false},
-
+	{"SUBARN",	0x000c, 0xfffc, DSPInterpreter::subarn, nop, 1, 1, {{P_REG, 1, 0, 0, 0x0003}}, false},
 	{"ADDARN",  0x0010, 0xfff0, DSPInterpreter::addarn, nop, 1, 2, {{P_REG, 1, 0, 0, 0x0003}, {P_REG04, 1, 0, 2, 0x000c}}, false},

 	{"HALT",	0x0021, 0xffff, DSPInterpreter::halt, nop, 1, 0, {}, false},
@ -120,8 +115,8 @@ const DSPOPCTemplate opcodes[] =
 	{"CALLRLZ",	0x171d, 0xff1f, DSPInterpreter::callr, nop, 1, 1, {{P_REG, 1, 0, 5, 0x00e0}}, false},
 	{"CALLR",	0x171f, 0xff1f, DSPInterpreter::callr, nop, 1, 1, {{P_REG, 1, 0, 5, 0x00e0}}, false},

-	{"SBCLR",   0x1200, 0xfff8, DSPInterpreter::sbclr, nop, 1, 1, {{P_IMM, 1, 0, 0, 0x0007}}, false},
-	{"SBSET",   0x1300, 0xfff8, DSPInterpreter::sbset, nop, 1, 1, {{P_IMM, 1, 0, 0, 0x0007}}, false},
+	{"SBCLR",   0x1200, 0xff00, DSPInterpreter::sbclr, nop, 1, 1, {{P_IMM, 1, 0, 0, 0x0007}}, false},
+	{"SBSET",   0x1300, 0xff00, DSPInterpreter::sbset, nop, 1, 1, {{P_IMM, 1, 0, 0, 0x0007}}, false},

 	{"LSL",		0x1400, 0xfec0, DSPInterpreter::lsl, nop, 1, 2, {{P_ACC, 1, 0, 8, 0x0100}, {P_IMM, 1, 0, 0, 0x003f}}, false},
 	{"LSR",		0x1440, 0xfec0, DSPInterpreter::lsr, nop, 1, 2, {{P_ACC, 1, 0, 8, 0x0100}, {P_IMM, 1, 0, 0, 0x003f}}, false},
@ -151,7 +146,6 @@ const DSPOPCTemplate opcodes[] =

 	{"ANDF",	0x02a0, 0xfeff, DSPInterpreter::andf, nop, 2, 2, {{P_ACCM, 1, 0, 8, 0x0100}, {P_IMM, 2, 1, 0, 0xffff}}, false},
 	{"ANDCF",	0x02c0, 0xfeff, DSPInterpreter::andcf, nop, 2, 2, {{P_ACCM, 1, 0, 8, 0x0100}, {P_IMM, 2, 1, 0, 0xffff}}, false},
-	{"ORF",		0x02e0, 0xfeff, DSPInterpreter::orf, nop, 2, 2, {{P_ACCM, 1, 0, 8, 0x0100}, {P_IMM, 2, 1, 0, 0xffff}}, false}, // Hermes: ??? (has it commented out)

 	{"ILRR",    0x0210, 0xfefc, DSPInterpreter::ilrr,  nop, 1, 2, {{P_ACCM, 1, 0, 8, 0x0100}, {P_PRG, 1, 0, 0, 0x0003}}, false},
 	{"ILRRD",	0x0214, 0xfefc, DSPInterpreter::ilrrd, nop, 1, 2, {{P_ACCM, 1, 0, 8, 0x0100}, {P_PRG, 1, 0, 0, 0x0003}}, false}, // Hermes doesn't list this
@ -244,14 +238,13 @@ const DSPOPCTemplate opcodes[] =
 	{"MULAC",   0x9400, 0xf600, DSPInterpreter::mulac,  nop, 1 | P_EXT, 3, {{P_REG18, 1, 0, 11, 0x0800}, {P_REG1A, 1, 0, 11, 0x0800}, {P_ACC, 1, 0, 8, 0x0100}}, true},
 	{"MULMV",   0x9600, 0xf600, DSPInterpreter::mulmv,  nop, 1 | P_EXT, 3, {{P_REG18, 1, 0, 11, 0x0800}, {P_REG1A, 1, 0, 11, 0x0800}, {P_ACC, 1, 0, 8, 0x0100}}, true},
 	
-	//a-b (!!!retest TST/TSTAXL!!!)
+	//a-b (!figure out 0xa100/0xa900!)
 	{"MULX",    0xa000, 0xe700, DSPInterpreter::mulx,    nop, 1 | P_EXT, 2, {{P_REGM18, 1, 0, 11, 0x1000}, {P_REGM19, 1, 0, 10, 0x0800}}, true},
-	{"TST",		0xa100, 0xe700, DSPInterpreter::tst,   nop, 1 | P_EXT, 1, {{P_ACC, 1, 0, 11, 0x0800}}, true},
-	//{"TSTAXL",  0xa100, 0xff00, DSPInterpreter::tstaxl, nop, 1 | P_EXT, 1, {{P_ACC, 1, 0, 11, 0x0800}}, true}, //Definitely not TSTAXL, it affects one of the accumulators
-	//{"TST",		0xb100, 0xf700, DSPInterpreter::tst,   nop, 1 | P_EXT, 1, {{P_ACC, 1, 0, 11, 0x0800}}, true},
+	{"a100",    0xa100, 0xf700, DSPInterpreter::a100,   nop, 1 | P_EXT, 1, {{P_ACC, 1, 0, 11, 0x0800}}, true}, //Definitely not TSTAXL, it affects one of the accumulators
 	{"MULXMVZ", 0xa200, 0xe600, DSPInterpreter::mulxmvz, nop, 1 | P_EXT, 3, {{P_REGM18, 1, 0, 11, 0x1000}, {P_REGM19, 1, 0, 10, 0x0800}, {P_ACC, 1, 0, 8, 0x0100}}, true},
 	{"MULXAC",  0xa400, 0xe600, DSPInterpreter::mulxac,  nop, 1 | P_EXT, 3, {{P_REGM18, 1, 0, 11, 0x1000}, {P_REGM19, 1, 0, 10, 0x0800}, {P_ACC, 1, 0, 8, 0x0100}}, true},
 	{"MULXMV",  0xa600, 0xe600, DSPInterpreter::mulxmv,  nop, 1 | P_EXT, 3, {{P_REGM18, 1, 0, 11, 0x1000}, {P_REGM19, 1, 0, 10, 0x0800}, {P_ACC, 1, 0, 8, 0x0100}}, true},
+	{"TST",		0xb100, 0xf700, DSPInterpreter::tst,   nop, 1 | P_EXT, 1, {{P_ACC, 1, 0, 11, 0x0800}}, true},

 	//c-d
 	{"MULC",    0xc000, 0xe700, DSPInterpreter::mulc,    nop, 1 | P_EXT, 2, {{P_ACCM, 1, 0, 12, 0x1000}, {P_REG1A, 1, 0, 11, 0x0800}}, true},
--- a/Source/Core/DSPCore/Src/DSPTables.h
+++ b/Source/Core/DSPCore/Src/DSPTables.h
@ -155,6 +155,7 @@ const char *pdregnamelong(int val);
 void InitInstructionTable();
 void applyWriteBackLog();
 void zeroWriteBackLog();
+void zeroWriteBackLogPreserveAcc(u8 acc);

 inline void ExecuteInstruction(const UDSPInstruction& inst)
 {
--- a/Source/Core/DSPCore/Src/DspIntArithmetic.cpp
+++ b/Source/Core/DSPCore/Src/DspIntArithmetic.cpp
--- a/Source/Core/DSPCore/Src/DspIntMisc.cpp
+++ b/Source/Core/DSPCore/Src/DspIntMisc.cpp
@ -25,12 +25,6 @@

 namespace DSPInterpreter {

-void unknown(const UDSPInstruction& opc)
-{
-	//_assert_msg_(MASTER_LOG, !g_dsp.exception_in_progress_hack, "assert while exception");
-	ERROR_LOG(DSPLLE, "LLE: Unrecognized opcode 0x%04x, pc 0x%04x", opc.hex, g_dsp.pc);
-}
-
 // MRR $D, $S
 // 0001 11dd ddds ssss
 // Move value from register $S to register $D.
@ -50,7 +44,7 @@ void mrr(const UDSPInstruction& opc)
 // iiii iiii iiii iiii
 // Load immediate value I to register $D. 
 // FIXME: Perform additional operation depending on destination register.
-
+//
 // DSPSpy discovery: This, and possibly other instructions that load a
 // register, has a different behaviour in S40 mode if loaded to AC0.M: The
 // value gets sign extended to the whole accumulator! This does not happen in
@ -75,45 +69,21 @@ void lris(const UDSPInstruction& opc)
 	dsp_conditional_extend_accum(reg);
 }

-
-// TSTAXL $acR
-// 1000 r001 xxxx xxxx
-// r specifies one of the main accumulators.
-// Definitely not a test instruction - it changes the accums.
-// Not affected by m0/m2. Not affected by s16/s40.
-void tstaxl(const UDSPInstruction& opc)
-{
-	// This is probably all wrong.
-	//u8 reg  = (opc.hex >> 8) & 0x1;
-	//s16 val = dsp_get_ax_l(reg);
-	//Update_SR_Register16(val);
-}
-
-// ADDARN $arD, $ixS
-// 0000 0000 0001 ssdd
-// Adds indexing register $ixS to an addressing register $arD.
-void addarn(const UDSPInstruction& opc)
-{
-	u8 dreg = opc.hex & 0x3;
-	u8 sreg = (opc.hex >> 2) & 0x3;
-
-	g_dsp.r[dreg] = dsp_increase_addr_reg(dreg, (s16)g_dsp.r[DSP_REG_IX0 + sreg]);
-
-	// It is critical for the Zelda ucode that this one wraps correctly.
-}
+//----

 // NX
 // 1000 -000 xxxx xxxx
 // No operation, but can be extended with extended opcode.
+// This opcode is supposed to do nothing - it's used if you want to use
+// an opcode extension but not do anything. At least according to duddie.
 void nx(const UDSPInstruction& opc)
 {
 	zeroWriteBackLog();
-	// This opcode is supposed to do nothing - it's used if you want to use
-	// an opcode extension but not do anything. At least according to duddie.
 }

-//-------------------------------------------------------------
-// DAR $arD  ?
+//----
+
+// DAR $arD
 // 0000 0000 0000 01dd
 // Decrement address register $arD.
 void dar(const UDSPInstruction& opc)
@ -121,7 +91,7 @@ void dar(const UDSPInstruction& opc)
 	g_dsp.r[opc.hex & 0x3] = dsp_decrement_addr_reg(opc.hex & 0x3);
 }

-// IAR $arD  ?
+// IAR $arD
 // 0000 0000 0000 10dd
 // Increment address register $arD.
 void iar(const UDSPInstruction& opc)
@ -129,37 +99,49 @@ void iar(const UDSPInstruction& opc)
 	g_dsp.r[opc.hex & 0x3] = dsp_increment_addr_reg(opc.hex & 0x3);
 }

-// XAR $arD  ?
+// SUBARN $arD  
 // 0000 0000 0000 11dd
-// $arD result somehow depends on $wrD
-// unknown atm
-// used in IPL ucode
-void xar(const UDSPInstruction& opc)
+// Subtract indexing register $ixD from an addressing register $arD.
+// used only in IPL-NTSC ucode
+void subarn(const UDSPInstruction& opc)
 {
-//	u8 dreg = opc.hex & 0x3;
+	u8 dreg = opc.hex & 0x3;
+	g_dsp.r[dreg] = dsp_decrease_addr_reg(dreg, (s16)g_dsp.r[DSP_REG_IX0 + dreg]);
 }

+// ADDARN $arD, $ixS
+// 0000 0000 0001 ssdd
+// Adds indexing register $ixS to an addressing register $arD.
+// It is critical for the Zelda ucode that this one wraps correctly.
+void addarn(const UDSPInstruction& opc)
+{
+	u8 dreg = opc.hex & 0x3;
+	u8 sreg = (opc.hex >> 2) & 0x3;
+	g_dsp.r[dreg] = dsp_increase_addr_reg(dreg, (s16)g_dsp.r[DSP_REG_IX0 + sreg]);
+}
+
+//----
+
 // SBCLR #I
-// 0001 0011 0000 0iii
+// 0001 0011 aaaa aiii
 // bit of status register $sr. Bit number is calculated by adding 6 to
 // immediate value I.
 void sbclr(const UDSPInstruction& opc)
 {
-	u8 bit = (opc.hex & 0xff) + 6;
+	u8 bit = (opc.hex & 0x7) + 6;
 	g_dsp.r[DSP_REG_SR] &= ~(1 << bit);
 }

 // SBSET #I
-// 0001 0010 0000 0iii
+// 0001 0010 aaaa aiii
 // Set bit of status register $sr. Bit number is calculated by adding 6 to
 // immediate value I.
 void sbset(const UDSPInstruction& opc)
 {
-	u8 bit = (opc.hex & 0xff) + 6;
+	u8 bit = (opc.hex & 0x7) + 6;
 	g_dsp.r[DSP_REG_SR] |= (1 << bit);
 }

-
 // This is a bunch of flag setters, flipping bits in SR. So far so good,
 // but it's harder to know exactly what effect they have.
 void srbith(const UDSPInstruction& opc)
@ -199,4 +181,11 @@ void srbith(const UDSPInstruction& opc)
 	}
 }

+//----
+
+void unknown(const UDSPInstruction& opc)
+{
+	ERROR_LOG(DSPLLE, "LLE: Unrecognized opcode 0x%04x, pc 0x%04x", opc.hex, g_dsp.pc);
+}
+
 }  // namespace
--- a/Source/Core/DSPCore/Src/DspIntMultiplier.cpp
+++ b/Source/Core/DSPCore/Src/DspIntMultiplier.cpp
@ -46,35 +46,25 @@ inline s64 dsp_get_multiply_prod(u16 a, u16 b, bool sign)
 	return prod;
 }
 	
-// Sets prod as a side effect.
 s64 dsp_multiply(u16 a, u16 b, bool sign = false)
 {
 	s64 prod = dsp_get_multiply_prod(a, b, sign);
-	
-	// Store the product, and return it, in case the caller wants to read it.
-	//	dsp_set_long_prod(prod);
 	return prod;
 }

 s64 dsp_multiply_add(u16 a, u16 b, bool sign = false)
 {
-	s64 prod = dsp_get_multiply_prod(a, b, sign) + dsp_get_long_prod();
-
-	// Store the product, and return it, in case the caller wants to read it.
-	//	dsp_set_long_prod(prod);
+	s64 prod = dsp_get_long_prod() + dsp_get_multiply_prod(a, b, sign);
 	return prod;
 }

 s64 dsp_multiply_sub(u16 a, u16 b, bool sign = false)
 {
 	s64 prod = dsp_get_long_prod() -  dsp_get_multiply_prod(a, b, sign);
-
-	// Store the product, and return it, in case the caller wants to read it.
-	//	dsp_set_long_prod(prod);
 	return prod;
 }

-
+//----

 // CLRP
 // 1000 0100 xxxx xxxx
@ -91,40 +81,87 @@ void clrp(const UDSPInstruction& opc)
 	// 00ff_(fff0 + 0010)_0000 = 0100_0000_0000, conveniently, lower 40bits = 0
 }

+// TSTPROD
+// 1000 0101 xxxx xxxx
+// Test prod regs value.
+//
+// flags out: xx xx0x <- CF??
+void tstprod(const UDSPInstruction& opc)
+{
+	s64 prod = dsp_get_long_prod();
+	Update_SR_Register64(prod);
+	zeroWriteBackLog();
+}
+
+//----
+
 // MOVP $acD
 // 0110 111d xxxx xxxx
 // Moves multiply product from $prod register to accumulator $acD register.
+//
+// flags out: xx xx00
 void movp(const UDSPInstruction& opc)
 {
 	u8 dreg = (opc.hex >> 8) & 0x1;

-	s64 prod = dsp_get_long_prod();
-	zeroWriteBackLog();
-	dsp_set_long_acc(dreg, prod);
+	s64 acc = dsp_get_long_prod();

-	Update_SR_Register64(prod);
+	zeroWriteBackLog();
+
+	dsp_set_long_acc(dreg, acc);
+#ifdef PRECISE_SR_FLAGS
+	Update_SR_Register64(acc);
+#endif
 }

 // MOVNP $acD
 // 0111 111d xxxx xxxx 
 // Moves negative of multiply product from $prod register to accumulator
 // $acD register.
+//
+// flags out: xx xx0x <- CF??
 void movnp(const UDSPInstruction& opc)
 {
 	u8 dreg = (opc.hex >> 8) & 0x1;

-	s64 prod = dsp_get_long_prod();
-	s64 acc = -prod;
+	s64 acc = -dsp_get_long_prod();
+
 	zeroWriteBackLog();
+
 	dsp_set_long_acc(dreg, acc);
-	
+#ifdef PRECISE_SR_FLAGS
 	Update_SR_Register64(acc);
+#endif
+}
+
+// MOVPZ $acD
+// 1111 111d xxxx xxxx
+// Moves multiply product from $prod register to accumulator $acD
+// register and sets $acD.l to 0
+//
+// flags out: xx xx0x <- CF??
+void movpz(const UDSPInstruction& opc)
+{
+	u8 dreg = (opc.hex >> 8) & 0x01;
+
+	s64 acc = dsp_get_long_prod() & ~0xffff;
+
+	zeroWriteBackLog();
+
+	dsp_set_long_acc(dreg, acc);
+#ifdef PRECISE_SR_FLAGS
+	Update_SR_Register64(acc);
+#endif
 }

 // ADDPAXZ $acD, $axS
 // 1111 10sd xxxx xxxx
 // Adds secondary accumulator $axS to product register and stores result
 // in accumulator register. Low 16-bits of $acD ($acD.l) are set to 0.
+//
+// flags out: x-xx xxxx
+//
+// TEST THIS!!
 void addpaxz(const UDSPInstruction& opc)
 {
 	u8 dreg = (opc.hex >> 8) & 0x1;
@ -135,28 +172,15 @@ void addpaxz(const UDSPInstruction& opc)
 	s64 acc = (prod + ax) & ~0xffff;

 	zeroWriteBackLog();
-	dsp_set_long_acc(dreg, acc);

-	Update_SR_Register64(acc);
+	dsp_set_long_acc(dreg, acc);
+#ifdef PRECISE_SR_FLAGS
+	acc = dsp_get_long_acc(dreg);
+	Update_SR_Register64(acc, isCarry2(prod, acc), isOverflow(prod, ax, acc));
+#endif
 }

-// MOVPZ $acD
-// 1111 111d xxxx xxxx
-// Moves multiply product from $prod register to accumulator $acD
-// register and sets $acD.l to 0
-void movpz(const UDSPInstruction& opc)
-{
-	u8 dreg = (opc.hex >> 8) & 0x01;
-
-	// overwrite acc and clear low part
-	s64 prod = dsp_get_long_prod();
-	s64 acc = prod & ~0xffff;
-
-	zeroWriteBackLog();
-	dsp_set_long_acc(dreg, acc);
-
-	Update_SR_Register64(acc);
-}
+//----

 // MULAXH
 // 1000 0011 xxxx xxxx
@ -164,28 +188,223 @@ void movpz(const UDSPInstruction& opc)
 void mulaxh(const UDSPInstruction& opc)
 {
 	s64 prod = dsp_multiply(dsp_get_ax_h(0), dsp_get_ax_h(0));
+
 	zeroWriteBackLog();
+	
 	dsp_set_long_prod(prod);		
 }

-// TSTPROD
-// 1000 0101 xxxx xxxx
-// Test prod regs value.
-void tstprod(const UDSPInstruction& opc)
+//----
+
+// MUL $axS.l, $axS.h
+// 1001 s000 xxxx xxxx
+// Multiply low part $axS.l of secondary accumulator $axS by high part
+// $axS.h of secondary accumulator $axS (treat them both as signed).
+void mul(const UDSPInstruction& opc)
 {
-	s64 prod = dsp_get_long_prod();
-	Update_SR_Register64(prod);
+	u8 sreg  = (opc.hex >> 11) & 0x1;
+
+	u16 axl = dsp_get_ax_l(sreg);
+	u16 axh = dsp_get_ax_h(sreg);
+	s64 prod = dsp_multiply(axh, axl);
+	
 	zeroWriteBackLog();
+
+	dsp_set_long_prod(prod);
 }

+// MULAC $axS.l, $axS.h, $acR
+// 1001 s10r xxxx xxxx
+// Add product register to accumulator register $acR. Multiply low part
+// $axS.l of secondary accumulator $axS by high part $axS.h of secondary
+// accumulator $axS (treat them both as signed).
+//
+// flags out: xx xx00
+void mulac(const UDSPInstruction& opc)
+{
+	u8 rreg = (opc.hex >> 8) & 0x1;
+	u8 sreg = (opc.hex >> 11) & 0x1;
+
+	s64 acc = dsp_get_long_acc(rreg) + dsp_get_long_prod();
+	u16 axl = dsp_get_ax_l(sreg);
+	u16 axh = dsp_get_ax_h(sreg);
+	s64 prod = dsp_multiply(axl, axh);
+												
+	zeroWriteBackLog();
+
+	dsp_set_long_prod(prod);
+	dsp_set_long_acc(rreg, acc);
+#ifdef PRECISE_SR_FLAGS
+	Update_SR_Register64(dsp_get_long_acc(rreg));
+#endif
+}
+
+// MULMV $axS.l, $axS.h, $acR
+// 1001 s11r xxxx xxxx
+// Move product register to accumulator register $acR. Multiply low part
+// $axS.l of secondary accumulator $axS by high part $axS.h of secondary
+// accumulator $axS (treat them both as signed).
+//
+// flags out: xx xx00
+void mulmv(const UDSPInstruction& opc)
+{
+	u8 rreg  = (opc.hex >> 8) & 0x1;
+	u8 sreg  = ((opc.hex >> 11) & 0x1);
+
+	s64 acc = dsp_get_long_prod();
+	u16 axl = dsp_get_ax_l(sreg);
+	u16 axh = dsp_get_ax_h(sreg);
+	s64 prod = dsp_multiply(axl, axh);
+												
+	zeroWriteBackLog();
+
+	dsp_set_long_prod(prod);
+	dsp_set_long_acc(rreg, acc);
+#ifdef PRECISE_SR_FLAGS
+	Update_SR_Register64(dsp_get_long_acc(rreg));
+#endif
+}
+
+// MULMVZ $axS.l, $axS.h, $acR
+// 1001 s01r xxxx xxxx
+// Move product register to accumulator register $acR and clear low part
+// of accumulator register $acR.l. Multiply low part $axS.l of secondary
+// accumulator $axS by high part $axS.h of secondary accumulator $axS (treat
+// them both as signed).
+//
+// flags out: xx xx00
+void mulmvz(const UDSPInstruction& opc)
+{
+	u8 rreg = (opc.hex >> 8) & 0x1;
+	u8 sreg = (opc.hex >> 11) & 0x1;
+
+	s64 acc = dsp_get_long_prod() & ~0xffff;
+	u16 axl = dsp_get_ax_l(sreg);
+	u16 axh = dsp_get_ax_h(sreg);
+	s64 prod = dsp_multiply(axl, axh);
+	
+	zeroWriteBackLog();
+
+	dsp_set_long_prod(prod);
+	dsp_set_long_acc(rreg, acc);
+#ifdef PRECISE_SR_FLAGS
+	Update_SR_Register64(dsp_get_long_acc(rreg));
+#endif
+}
+
+//----
+
+// MULX $ax0.S, $ax1.T
+// 101s t000 xxxx xxxx
+// Multiply one part $ax0 by one part $ax1 (treat them both as signed).
+// Part is selected by S and T bits. Zero selects low part, one selects high part.
+void mulx(const UDSPInstruction& opc)
+{
+	u8 treg = ((opc.hex >> 11) & 0x1);
+	u8 sreg = ((opc.hex >> 12) & 0x1);
+
+	u16 val1 = (sreg == 0) ? dsp_get_ax_l(0) : dsp_get_ax_h(0);
+	u16 val2 = (treg == 0) ? dsp_get_ax_l(1) : dsp_get_ax_h(1);
+	s64 prod = dsp_multiply(val1, val2, true);
+	
+	zeroWriteBackLog();
+
+	dsp_set_long_prod(prod);
+}
+
+// MULXAC $ax0.S, $ax1.T, $acR
+// 101s t01r xxxx xxxx
+// Add product register to accumulator register $acR. Multiply one part
+// $ax0 by one part $ax1 (treat them both as signed). Part is selected by S and
+// T bits. Zero selects low part, one selects high part.
+//
+// flags out: xx xx00
+void mulxac(const UDSPInstruction& opc)
+{
+	u8 rreg = (opc.hex >> 8) & 0x1;
+	u8 treg = (opc.hex >> 11) & 0x1;
+	u8 sreg = (opc.hex >> 12) & 0x1;
+
+	s64 acc = dsp_get_long_acc(rreg) + dsp_get_long_prod();
+	u16 val1 = (sreg == 0) ? dsp_get_ax_l(0) : dsp_get_ax_h(0);
+	u16 val2 = (treg == 0) ? dsp_get_ax_l(1) : dsp_get_ax_h(1);
+	s64 prod = dsp_multiply(val1, val2, true);
+	
+	zeroWriteBackLog();
+
+	dsp_set_long_prod(prod);
+	dsp_set_long_acc(rreg, acc);
+#ifdef PRECISE_SR_FLAGS
+	Update_SR_Register64(dsp_get_long_acc(rreg));
+#endif
+}
+
+// MULXMV $ax0.S, $ax1.T, $acR
+// 101s t11r xxxx xxxx
+// Move product register to accumulator register $acR. Multiply one part
+// $ax0 by one part $ax1 (treat them both as signed). Part is selected by S and
+// T bits. Zero selects low part, one selects high part.
+//
+// flags out: xx xx00
+void mulxmv(const UDSPInstruction& opc)
+{
+	u8 rreg = ((opc.hex >> 8) & 0x1);
+	u8 treg = (opc.hex >> 11) & 0x1;
+	u8 sreg = (opc.hex >> 12) & 0x1;
+
+	s64 acc = dsp_get_long_prod();
+	s16 val1 = (sreg == 0) ? dsp_get_ax_l(0) : dsp_get_ax_h(0);
+	s16 val2 = (treg == 0) ? dsp_get_ax_l(1) : dsp_get_ax_h(1);
+	s64 prod = dsp_multiply(val1, val2, true);
+
+	zeroWriteBackLog();
+
+	dsp_set_long_prod(prod);
+	dsp_set_long_acc(rreg, acc);
+#ifdef PRECISE_SR_FLAGS
+	Update_SR_Register64(dsp_get_long_acc(rreg));
+#endif
+}
+
+// MULXMV $ax0.S, $ax1.T, $acR
+// 101s t01r xxxx xxxx
+// Move product register to accumulator register $acR and clear low part
+// of accumulator register $acR.l. Multiply one part $ax0 by one part $ax1 (treat
+// them both as signed). Part is selected by S and T bits. Zero selects low part,
+// one selects high part.
+//
+// flags out: xx xx00
+void mulxmvz(const UDSPInstruction& opc)
+{
+	u8 rreg  = (opc.hex >> 8) & 0x1;
+	u8 treg = (opc.hex >> 11) & 0x1;
+	u8 sreg = (opc.hex >> 12) & 0x1;
+
+	s64 acc = dsp_get_long_prod() & ~0xffff;
+	u16 val1 = (sreg == 0) ? dsp_get_ax_l(0) : dsp_get_ax_h(0);
+	u16 val2 = (treg == 0) ? dsp_get_ax_l(1) : dsp_get_ax_h(1);
+	s64 prod = dsp_multiply(val1, val2, true);
+
+	zeroWriteBackLog();
+
+	dsp_set_long_prod(prod);
+	dsp_set_long_acc(rreg, acc);
+#ifdef PRECISE_SR_FLAGS
+	Update_SR_Register64(dsp_get_long_acc(rreg));
+#endif
+}
+
+//----
+
 // MULC $acS.m, $axT.h
 // 110s t000 xxxx xxxx
 // Multiply mid part of accumulator register $acS.m by high part $axS.h of
 // secondary accumulator $axS (treat them both as signed).
 void mulc(const UDSPInstruction& opc)
 {
-	u8 sreg = (opc.hex >> 12) & 0x1;
 	u8 treg = (opc.hex >> 11) & 0x1;
+	u8 sreg = (opc.hex >> 12) & 0x1;
+
 	u16 accm = dsp_get_acc_m(sreg);
 	u16 axh = dsp_get_ax_h(treg);
 	s64 prod = dsp_multiply(accm, axh);
@ -193,7 +412,61 @@ void mulc(const UDSPInstruction& opc)
 	zeroWriteBackLog();

 	dsp_set_long_prod(prod);
-	Update_SR_Register64(prod);
+}
+
+// MULCAC $acS.m, $axT.h, $acR
+// 110s	t10r xxxx xxxx
+// Multiply mid part of accumulator register $acS.m by high part $axS.h of
+// secondary accumulator $axS  (treat them both as signed). Add product
+// register before multiplication to accumulator $acR.
+//
+// flags out: xx xx00
+void mulcac(const UDSPInstruction& opc)
+{
+	u8 rreg = (opc.hex >> 8) & 0x1;
+	u8 treg  = (opc.hex >> 11) & 0x1;
+	u8 sreg  = (opc.hex >> 12) & 0x1;
+
+	s64 acc = dsp_get_long_acc(rreg) + dsp_get_long_prod();
+	u16 accm = dsp_get_acc_m(sreg);
+	u16 axh = dsp_get_ax_h(treg);
+	s64 prod = dsp_multiply(accm, axh);
+	
+	zeroWriteBackLog();
+
+	dsp_set_long_prod(prod);
+	dsp_set_long_acc(rreg, acc);
+#ifdef PRECISE_SR_FLAGS
+	Update_SR_Register64(dsp_get_long_acc(rreg));
+#endif
+}
+
+// MULCMV $acS.m, $axT.h, $acR
+// 110s t11r xxxx xxxx
+// Multiply mid part of accumulator register $acS.m by high part $axT.h of
+// secondary accumulator $axT  (treat them both as signed). Move product
+// register before multiplication to accumulator $acR.
+// possible mistake in duddie's doc axT.h rather than axS.h
+//
+// flags out: xx xx00
+void mulcmv(const UDSPInstruction& opc)
+{
+	u8 rreg = (opc.hex >> 8) & 0x1;
+	u8 treg  = (opc.hex >> 11) & 0x1;
+	u8 sreg  = (opc.hex >> 12) & 0x1;
+
+	s64 acc = dsp_get_long_prod();
+	u16 accm = dsp_get_acc_m(sreg);
+	u16 axh = dsp_get_ax_h(treg);
+	s64 prod = dsp_multiply(accm, axh);
+	
+	zeroWriteBackLog();
+
+	dsp_set_long_prod(prod);
+	dsp_set_long_acc(rreg, acc);
+#ifdef PRECISE_SR_FLAGS
+	Update_SR_Register64(dsp_get_long_acc(rreg));
+#endif
 }

 // MULCMVZ $acS.m, $axT.h, $acR
@ -203,265 +476,29 @@ void mulc(const UDSPInstruction& opc)
 // secondary accumulator $axT  (treat them both as signed). Move product
 // register before multiplication to accumulator $acR, set low part of 
 // accumulator $acR.l to zero.
+//
+// flags out: xx xx00
 void mulcmvz(const UDSPInstruction& opc)
 {
-	s64 TempProd = dsp_get_long_prod();
-
-	// update prod
-	u8 sreg  = (opc.hex >> 12) & 0x1;
+	u8 rreg = (opc.hex >> 8) & 0x1;
 	u8 treg  = (opc.hex >> 11) & 0x1;
+	u8 sreg  = (opc.hex >> 12) & 0x1;

+	s64 acc = dsp_get_long_prod() & ~0xffff;
 	u16 accm = dsp_get_acc_m(sreg);
 	u16 axh = dsp_get_ax_h(treg);
 	s64 prod = dsp_multiply(accm, axh);
 											
 	zeroWriteBackLog();
-	dsp_set_long_prod(prod);

-	// update acc
-	u8 rreg = (opc.hex >> 8) & 0x1;
-	s64 acc = TempProd & ~0xffff; // clear lower 4 bytes
+	dsp_set_long_prod(prod);
 	dsp_set_long_acc(rreg, acc);
-
-	Update_SR_Register64(acc);
+#ifdef PRECISE_SR_FLAGS
+	Update_SR_Register64(dsp_get_long_acc(rreg));
+#endif
 }

-// MULCMV $acS.m, $axT.h, $acR
-// 110s t11r xxxx xxxx
-// Multiply mid part of accumulator register $acS.m by high part $axT.h of
-// secondary accumulator $axT  (treat them both as signed). Move product
-// register before multiplication to accumulator $acR.
-// possible mistake in duddie's doc axT.h rather than axS.h
-void mulcmv(const UDSPInstruction& opc)
-{
-	s64 old_prod = dsp_get_long_prod();
-
-	// update prod
-	u8 sreg  = (opc.hex >> 12) & 0x1;
-	u8 treg  = (opc.hex >> 11) & 0x1;
-	u16 accm = dsp_get_acc_m(sreg);
-	u16 axh = dsp_get_ax_h(treg);
-	u8 rreg = (opc.hex >> 8) & 0x1;
-	s64 prod = dsp_multiply(accm, axh);
-	
-	zeroWriteBackLog();
-
-	dsp_set_long_prod(prod);
-	// update acc
-	dsp_set_long_acc(rreg, old_prod);
-
-	Update_SR_Register64(old_prod);
-}
-
-// MULCAC $acS.m, $axT.h, $acR
-// 110s	t10r xxxx xxxx
-// Multiply mid part of accumulator register $acS.m by high part $axS.h of
-// secondary accumulator $axS  (treat them both as signed). Add product
-// register before multiplication to accumulator $acR.
-void mulcac(const UDSPInstruction& opc)
-{
-	s64 old_prod = dsp_get_long_prod();
-
-	// update prod
-	u8 sreg  = (opc.hex >> 12) & 0x1;
-	u8 treg  = (opc.hex >> 11) & 0x1;
-	u16 accm = dsp_get_acc_m(sreg);
-	u16 axh = dsp_get_ax_h(treg);
-	s64 prod = dsp_multiply(accm, axh);
-	u8 rreg = (opc.hex >> 8) & 0x1;
-	s64 acc = old_prod + dsp_get_long_acc(rreg);
-	
-	zeroWriteBackLog();
-	dsp_set_long_prod(prod);
-	// update acc
-	dsp_set_long_acc(rreg, acc);
-
-	Update_SR_Register64(acc);
-}
-
-
-// MUL $axS.l, $axS.h
-// 1001 s000 xxxx xxxx
-// Multiply low part $axS.l of secondary accumulator $axS by high part
-// $axS.h of secondary accumulator $axS (treat them both as signed).
-void mul(const UDSPInstruction& opc)
-{
-	u8 sreg  = (opc.hex >> 11) & 0x1;
-	u16 axl = dsp_get_ax_l(sreg);
-	u16 axh = dsp_get_ax_h(sreg);
-	s64 prod = dsp_multiply(axh, axl);
-	
-	zeroWriteBackLog();
-	
-	dsp_set_long_prod(prod);
-	// FIXME: no update in duddie's docs
-	Update_SR_Register64(prod);
-}
-
-// MULAC $axS.l, $axS.h, $acR
-// 1001 s10r xxxx xxxx
-// Add product register to accumulator register $acR. Multiply low part
-// $axS.l of secondary accumulator $axS by high part $axS.h of secondary
-// accumulator $axS (treat them both as signed).
-void mulac(const UDSPInstruction& opc)
-{
-	// add old prod to acc
-	u8 rreg = (opc.hex >> 8) & 0x1;
-	u8 sreg = (opc.hex >> 11) & 0x1;
-
-	s64 acR = dsp_get_long_acc(rreg) + dsp_get_long_prod();
-	u16 axl = dsp_get_ax_l(sreg);
-	u16 axh = dsp_get_ax_h(sreg);
-	s64 prod = dsp_multiply(axl, axh);
-												
-	zeroWriteBackLog();
-
-	dsp_set_long_acc(rreg, acR);
-	dsp_set_long_prod(prod);
-
-	// FIXME: no update in duddie's docs
-	Update_SR_Register64(prod);
-}
-
-// MULMV $axS.l, $axS.h, $acR
-// 1001 s11r xxxx xxxx
-// Move product register to accumulator register $acR. Multiply low part
-// $axS.l of secondary accumulator $axS by high part $axS.h of secondary
-// accumulator $axS (treat them both as signed).
-void mulmv(const UDSPInstruction& opc)
-{
-	u8 rreg  = (opc.hex >> 8) & 0x1;
-	u8 sreg  = ((opc.hex >> 11) & 0x1);
-	s64 acc = dsp_get_long_prod();
-	u16 axl = dsp_get_ax_l(sreg);
-	u16 axh = dsp_get_ax_h(sreg);
-	s64 prod = dsp_multiply(axl, axh);
-												
-	zeroWriteBackLog();
-	dsp_set_long_acc(rreg, acc);
-	dsp_set_long_prod(prod);
-	Update_SR_Register64(prod);
-}
-
-// MULMVZ $axS.l, $axS.h, $acR
-// 1001 s01r xxxx xxxx
-// Move product register to accumulator register $acR and clear low part
-// of accumulator register $acR.l. Multiply low part $axS.l of secondary
-// accumulator $axS by high part $axS.h of secondary accumulator $axS (treat
-// them both as signed).
-void mulmvz(const UDSPInstruction& opc)
-{
-	u8 sreg = (opc.hex >> 11) & 0x1;
-	u8 rreg = (opc.hex >> 8) & 0x1;
-
-	// overwrite acc and clear low part
-	s64 acc = dsp_get_long_prod() & ~0xffff;
-	u16 axl = dsp_get_ax_l(sreg);
-	u16 axh = dsp_get_ax_h(sreg);
-	s64 prod = dsp_multiply(axl, axh);
-	
-	zeroWriteBackLog();
-
-	dsp_set_long_acc(rreg, acc);
-	dsp_set_long_prod(prod);
-	Update_SR_Register64(prod);
-}
-
-// MULX $ax0.S, $ax1.T
-// 101s t000 xxxx xxxx
-// Multiply one part $ax0 by one part $ax1 (treat them both as signed).
-// Part is selected by S and T bits. Zero selects low part, one selects high part.
-void mulx(const UDSPInstruction& opc)
-{
-	u8 sreg = ((opc.hex >> 12) & 0x1);
-	u8 treg = ((opc.hex >> 11) & 0x1);
-
-	u16 val1 = (sreg == 0) ? dsp_get_ax_l(0) : dsp_get_ax_h(0);
-	u16 val2 = (treg == 0) ? dsp_get_ax_l(1) : dsp_get_ax_h(1);
-	s64 prod = dsp_multiply(val1, val2, true);
-	
-	zeroWriteBackLog();
-	dsp_set_long_prod(prod);
-	Update_SR_Register64(prod);
-}
-
-// MULXAC $ax0.S, $ax1.T, $acR
-// 101s t01r xxxx xxxx
-// Add product register to accumulator register $acR. Multiply one part
-// $ax0 by one part $ax1 (treat them both as signed). Part is selected by S and
-// T bits. Zero selects low part, one selects high part.
-void mulxac(const UDSPInstruction& opc)
-{
-	// add old prod to acc
-	u8 rreg = (opc.hex >> 8) & 0x1;
-	s64 acR = dsp_get_long_acc(rreg) + dsp_get_long_prod();
-
-	// math new prod
-	u8 sreg = (opc.hex >> 12) & 0x1;
-	u8 treg = (opc.hex >> 11) & 0x1;
-
-	u16 val1 = (sreg == 0) ? dsp_get_ax_l(0) : dsp_get_ax_h(0);
-	u16 val2 = (treg == 0) ? dsp_get_ax_l(1) : dsp_get_ax_h(1);
-		
-	s64 prod = dsp_multiply(val1, val2, true);
-	
-	zeroWriteBackLog();
-	dsp_set_long_acc(rreg, acR);
-	dsp_set_long_prod(prod);
-	Update_SR_Register64(prod);
-}
-
-// MULXMV $ax0.S, $ax1.T, $acR
-// 101s t11r xxxx xxxx
-// Move product register to accumulator register $acR. Multiply one part
-// $ax0 by one part $ax1 (treat them both as signed). Part is selected by S and
-// T bits. Zero selects low part, one selects high part.
-void mulxmv(const UDSPInstruction& opc)
-{
-	// add old prod to acc
-	u8 rreg = ((opc.hex >> 8) & 0x1);
-	s64 acR = dsp_get_long_prod();
-
-	// math new prod
-	u8 sreg = (opc.hex >> 12) & 0x1;
-	u8 treg = (opc.hex >> 11) & 0x1;
-
-	s16 val1 = (sreg == 0) ? dsp_get_ax_l(0) : dsp_get_ax_h(0);
-	s16 val2 = (treg == 0) ? dsp_get_ax_l(1) : dsp_get_ax_h(1);
-	s64 prod = dsp_multiply(val1, val2, true);
-
-	zeroWriteBackLog();
-	dsp_set_long_acc(rreg, acR);
-	dsp_set_long_prod(prod);
-	Update_SR_Register64(prod);
-}
-
-// MULXMV $ax0.S, $ax1.T, $acR
-// 101s t01r xxxx xxxx
-// Move product register to accumulator register $acR and clear low part
-// of accumulator register $acR.l. Multiply one part $ax0 by one part $ax1 (treat
-// them both as signed). Part is selected by S and T bits. Zero selects low part,
-// one selects high part.
-void mulxmvz(const UDSPInstruction& opc)
-{
-	// overwrite acc and clear low part
-	u8 rreg  = (opc.hex >> 8) & 0x1;
-	s64 acc = dsp_get_long_prod() & ~0xffff;
-
-	// math prod
-	u8 sreg = (opc.hex >> 12) & 0x1;
-	u8 treg = (opc.hex >> 11) & 0x1;
-
-	u16 val1 = (sreg == 0) ? dsp_get_ax_l(0) : dsp_get_ax_h(0);
-	u16 val2 = (treg == 0) ? dsp_get_ax_l(1) : dsp_get_ax_h(1);
-	s64 prod = dsp_multiply(val1, val2, true);
-
-	zeroWriteBackLog();
-
-	dsp_set_long_acc(rreg, acc);
-	dsp_set_long_prod(prod);
-	Update_SR_Register64(prod);
-}
+//----

 // MADDX ax0.S ax1.T
 // 1110 00st xxxx xxxx
@ -470,16 +507,16 @@ void mulxmvz(const UDSPInstruction& opc)
 // signed) and add result to product register.
 void maddx(const UDSPInstruction& opc)
 {
-	u8 sreg = (opc.hex >> 9) & 0x1;
 	u8 treg = (opc.hex >> 8) & 0x1;
+	u8 sreg = (opc.hex >> 9) & 0x1;

 	u16 val1 = (sreg == 0) ? dsp_get_ax_l(0) : dsp_get_ax_h(0);
 	u16 val2 = (treg == 0) ? dsp_get_ax_l(1) : dsp_get_ax_h(1);
 	s64 prod = dsp_multiply_add(val1, val2);
 	
 	zeroWriteBackLog();
+
 	dsp_set_long_prod(prod);
-	Update_SR_Register64(prod);
 }

 // MSUBX $(0x18+S*2), $(0x19+T*2)
@ -489,16 +526,16 @@ void maddx(const UDSPInstruction& opc)
 // signed) and subtract result from product register.
 void msubx(const UDSPInstruction& opc)
 {
-	u8 sreg = (opc.hex >> 9) & 0x1;
 	u8 treg = (opc.hex >> 8) & 0x1;
+	u8 sreg = (opc.hex >> 9) & 0x1;

 	u16 val1 = (sreg == 0) ? dsp_get_ax_l(0) : dsp_get_ax_h(0);
 	u16 val2 = (treg == 0) ? dsp_get_ax_l(1) : dsp_get_ax_h(1);
 	s64 prod = dsp_multiply_sub(val1, val2);

 	zeroWriteBackLog();
+
 	dsp_set_long_prod(prod);
-	Update_SR_Register64(prod);
 }

 // MADDC $acS.m, $axT.h
@ -508,8 +545,9 @@ void msubx(const UDSPInstruction& opc)
 // register.
 void maddc(const UDSPInstruction& opc)
 {
-	u32 sreg = (opc.hex >> 9) & 0x1;
-	u32 treg = (opc.hex >> 8) & 0x1;
+	u8 treg = (opc.hex >> 8) & 0x1;
+	u8 sreg = (opc.hex >> 9) & 0x1;
+
 	u16 accm = dsp_get_acc_m(sreg);
 	u16 axh = dsp_get_ax_h(treg);
 	s64 prod = dsp_multiply_add(accm, axh);
@ -517,7 +555,6 @@ void maddc(const UDSPInstruction& opc)
 	zeroWriteBackLog();

 	dsp_set_long_prod(prod);
-	Update_SR_Register64(prod);
 }

 // MSUBC $acS.m, $axT.h
@ -527,15 +564,16 @@ void maddc(const UDSPInstruction& opc)
 // product register.
 void msubc(const UDSPInstruction& opc)
 {
-	u32 sreg = (opc.hex >> 9) & 0x1;
-	u32 treg = (opc.hex >> 8) & 0x1;
+	u8 treg = (opc.hex >> 8) & 0x1;
+	u8 sreg = (opc.hex >> 9) & 0x1;
+	
 	u16 accm = dsp_get_acc_m(sreg);
 	u16 axh = dsp_get_ax_h(treg);
 	s64 prod = dsp_multiply_sub(accm, axh);

 	zeroWriteBackLog();
+
 	dsp_set_long_prod(prod);
-	Update_SR_Register64(prod);
 }

 // MADD $axS.l, $axS.h
@ -546,13 +584,14 @@ void msubc(const UDSPInstruction& opc)
 void madd(const UDSPInstruction& opc)
 {
 	u8 sreg = (opc.hex >> 8) & 0x1;
+	
 	u16 axl = dsp_get_ax_l(sreg);
 	u16 axh = dsp_get_ax_h(sreg);
 	s64 prod = dsp_multiply_add(axl, axh);
 												
 	zeroWriteBackLog();
+
 	dsp_set_long_prod(prod);
-	Update_SR_Register64(prod);
 }

 // MSUB $axS.l, $axS.h
@ -563,13 +602,14 @@ void madd(const UDSPInstruction& opc)
 void msub(const UDSPInstruction& opc)
 {
 	u8 sreg = (opc.hex >> 8) & 0x1;
+	
 	u16 axl = dsp_get_ax_l(sreg);
 	u16 axh = dsp_get_ax_h(sreg);
 	s64 prod = dsp_multiply_sub(axl, axh);
 												
 	zeroWriteBackLog();
+
 	dsp_set_long_prod(prod);
-	Update_SR_Register64(prod);
 }

 }  // namespace