mirror of
https://github.com/mupen64plus-ae/mupen64plus-rsp-cxd4.git
synced 2024-11-23 13:40:01 +00:00
completely vectorized all accumulator R/W
This commit is contained in:
parent
cd5c32e7af
commit
49bd94cd9f
2
rsp.h
2
rsp.h
@ -251,7 +251,7 @@ void trace_RSP_registers(void)
|
||||
for (i = 0; i < 8; i++)
|
||||
fprintf(
|
||||
out, "ACC[%o]: [%04X][%04X][%04X]\n", i,
|
||||
VACC[i].s[HI], VACC[i].s[MD], VACC[i].s[LO]);
|
||||
ACC_H(i), ACC_M(i), ACC_L(i));
|
||||
fprintf(out, "\n");
|
||||
fprintf(out, "DivIn: %i\n", DivIn);
|
||||
fprintf(out, "DivOut: %i\n", DivOut);
|
||||
|
89
vu/vabs.h
89
vu/vabs.h
@ -1,34 +1,5 @@
|
||||
#include "vu.h"
|
||||
|
||||
static void VABS(int vd, int vs, int vt, int e)
|
||||
{
|
||||
register int i;
|
||||
|
||||
#ifdef FORCE_STATIC_CLAMP
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
register signed short ti;
|
||||
|
||||
ti = VR_T(i);
|
||||
ti ^= -(VR[vs][i] < 0); /* ti = ~ti */
|
||||
ti += (VR[vs][i] < 0) & (ti != 0x7FFF); /* abs(-32768) == +32767 */
|
||||
ti &= -(VR[vs][i] != 0);
|
||||
ACC_R(i) = ti;
|
||||
}
|
||||
#else
|
||||
for (i = 0; i < N; i++)
|
||||
if (VR[vs][i] < 0)
|
||||
ACC_R(i) = -(VR_T(i) ^ (VR_T(i) == -32768));
|
||||
else if (VR[vs][i] == 0)
|
||||
ACC_R(i) = 0x0000;
|
||||
else
|
||||
ACC_R(i) = +VR_T(i);
|
||||
#endif
|
||||
for (i = 0; i < N; i++)
|
||||
ACC_W(i) = ACC_R(i);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* -1: VT *= -1, because VS < 0 // VT ^= -2 if even, or ^= -1, += 1
|
||||
* 0: VT *= 0, because VS = 0 // VT ^= VT
|
||||
@ -89,9 +60,9 @@ static void VABS_v(void)
|
||||
result[i] = VR[vt][i];
|
||||
do_abs(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VABS0q(void)
|
||||
@ -105,9 +76,9 @@ static void VABS0q(void)
|
||||
result[i] = VR[vt][(0x2 & 01) + (i & 0xE)];
|
||||
do_abs(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VABS1q(void)
|
||||
@ -121,9 +92,9 @@ static void VABS1q(void)
|
||||
result[i] = VR[vt][(0x3 & 01) + (i & 0xE)];
|
||||
do_abs(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VABS0h(void)
|
||||
@ -137,9 +108,9 @@ static void VABS0h(void)
|
||||
result[i] = VR[vt][(0x4 & 03) + (i & 0xC)];
|
||||
do_abs(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VABS1h(void)
|
||||
@ -153,9 +124,9 @@ static void VABS1h(void)
|
||||
result[i] = VR[vt][(0x5 & 03) + (i & 0xC)];
|
||||
do_abs(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VABS2h(void)
|
||||
@ -169,9 +140,9 @@ static void VABS2h(void)
|
||||
result[i] = VR[vt][(0x6 & 03) + (i & 0xC)];
|
||||
do_abs(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VABS3h(void)
|
||||
@ -185,9 +156,9 @@ static void VABS3h(void)
|
||||
result[i] = VR[vt][(0x7 & 03) + (i & 0xC)];
|
||||
do_abs(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VABS0w(void)
|
||||
@ -201,9 +172,9 @@ static void VABS0w(void)
|
||||
result[i] = VR[vt][(0x8 & 07) + (i & 0x0)];
|
||||
do_abs(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VABS1w(void)
|
||||
@ -217,9 +188,9 @@ static void VABS1w(void)
|
||||
result[i] = VR[vt][(0x9 & 07) + (i & 0x0)];
|
||||
do_abs(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VABS2w(void)
|
||||
@ -233,9 +204,9 @@ static void VABS2w(void)
|
||||
result[i] = VR[vt][(0xA & 07) + (i & 0x0)];
|
||||
do_abs(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VABS3w(void)
|
||||
@ -249,9 +220,9 @@ static void VABS3w(void)
|
||||
result[i] = VR[vt][(0xB & 07) + (i & 0x0)];
|
||||
do_abs(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VABS4w(void)
|
||||
@ -265,9 +236,9 @@ static void VABS4w(void)
|
||||
result[i] = VR[vt][(0xC & 07) + (i & 0x0)];
|
||||
do_abs(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VABS5w(void)
|
||||
@ -281,9 +252,9 @@ static void VABS5w(void)
|
||||
result[i] = VR[vt][(0xD & 07) + (i & 0x0)];
|
||||
do_abs(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VABS6w(void)
|
||||
@ -297,9 +268,9 @@ static void VABS6w(void)
|
||||
result[i] = VR[vt][(0xE & 07) + (i & 0x0)];
|
||||
do_abs(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VABS7w(void)
|
||||
@ -313,8 +284,8 @@ static void VABS7w(void)
|
||||
result[i] = VR[vt][(0xF & 07) + (i & 0x0)];
|
||||
do_abs(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
|
48
vu/vadd.h
48
vu/vadd.h
@ -1,23 +1,5 @@
|
||||
#include "vu.h"
|
||||
|
||||
static void VADD(int vd, int vs, int vt, int e)
|
||||
{
|
||||
register int i;
|
||||
|
||||
for (i = 0; i < N; i++) /* Try to vectorize the adds to be parallel. */
|
||||
result[i] = VR[vs][i] + VR_T(i);
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
result[i] += VCO & 0x0001;
|
||||
VCO >>= 1;
|
||||
}
|
||||
VCO = 0x0000; /* Clear the remaining, upper NOTEQUAL bits. */
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)result[i];
|
||||
SIGNED_CLAMP(VMUL_PTR, SM_ADD_A);
|
||||
return;
|
||||
}
|
||||
|
||||
void clr_ci(void) /* clear CARRY and carry in to accumulators */
|
||||
{
|
||||
int ci[8];
|
||||
@ -41,7 +23,7 @@ static void VADD_v(void)
|
||||
result[i] = VR[vs][i] + VR[vt][i];
|
||||
clr_ci();
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
SIGNED_CLAMP(VR[vd], SM_ADD_A);
|
||||
return;
|
||||
}
|
||||
@ -56,7 +38,7 @@ static void VADD0q(void)
|
||||
result[i] = VR[vs][i] + VR[vt][(0x2 & 01) + (i & 0xE)];
|
||||
clr_ci();
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
SIGNED_CLAMP(VR[vd], SM_ADD_A);
|
||||
return;
|
||||
}
|
||||
@ -71,7 +53,7 @@ static void VADD1q(void)
|
||||
result[i] = VR[vs][i] + VR[vt][(0x3 & 01) + (i & 0xE)];
|
||||
clr_ci();
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
SIGNED_CLAMP(VR[vd], SM_ADD_A);
|
||||
return;
|
||||
}
|
||||
@ -86,7 +68,7 @@ static void VADD0h(void)
|
||||
result[i] = VR[vs][i] + VR[vt][(0x4 & 03) + (i & 0xC)];
|
||||
clr_ci();
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
SIGNED_CLAMP(VR[vd], SM_ADD_A);
|
||||
return;
|
||||
}
|
||||
@ -101,7 +83,7 @@ static void VADD1h(void)
|
||||
result[i] = VR[vs][i] + VR[vt][(0x5 & 03) + (i & 0xC)];
|
||||
clr_ci();
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
SIGNED_CLAMP(VR[vd], SM_ADD_A);
|
||||
return;
|
||||
}
|
||||
@ -116,7 +98,7 @@ static void VADD2h(void)
|
||||
result[i] = VR[vs][i] + VR[vt][(0x6 & 03) + (i & 0xC)];
|
||||
clr_ci();
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
SIGNED_CLAMP(VR[vd], SM_ADD_A);
|
||||
return;
|
||||
}
|
||||
@ -131,7 +113,7 @@ static void VADD3h(void)
|
||||
result[i] = VR[vs][i] + VR[vt][(0x7 & 03) + (i & 0xC)];
|
||||
clr_ci();
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
SIGNED_CLAMP(VR[vd], SM_ADD_A);
|
||||
return;
|
||||
}
|
||||
@ -146,7 +128,7 @@ static void VADD0w(void)
|
||||
result[i] = VR[vs][i] + VR[vt][(0x8 & 07) + (i & 0x0)];
|
||||
clr_ci();
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
SIGNED_CLAMP(VR[vd], SM_ADD_A);
|
||||
return;
|
||||
}
|
||||
@ -161,7 +143,7 @@ static void VADD1w(void)
|
||||
result[i] = VR[vs][i] + VR[vt][(0x9 & 07) + (i & 0x0)];
|
||||
clr_ci();
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
SIGNED_CLAMP(VR[vd], SM_ADD_A);
|
||||
return;
|
||||
}
|
||||
@ -176,7 +158,7 @@ static void VADD2w(void)
|
||||
result[i] = VR[vs][i] + VR[vt][(0xA & 07) + (i & 0x0)];
|
||||
clr_ci();
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
SIGNED_CLAMP(VR[vd], SM_ADD_A);
|
||||
return;
|
||||
}
|
||||
@ -191,7 +173,7 @@ static void VADD3w(void)
|
||||
result[i] = VR[vs][i] + VR[vt][(0xB & 07) + (i & 0x0)];
|
||||
clr_ci();
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
SIGNED_CLAMP(VR[vd], SM_ADD_A);
|
||||
return;
|
||||
}
|
||||
@ -206,7 +188,7 @@ static void VADD4w(void)
|
||||
result[i] = VR[vs][i] + VR[vt][(0xC & 07) + (i & 0x0)];
|
||||
clr_ci();
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
SIGNED_CLAMP(VR[vd], SM_ADD_A);
|
||||
return;
|
||||
}
|
||||
@ -221,7 +203,7 @@ static void VADD5w(void)
|
||||
result[i] = VR[vs][i] + VR[vt][(0xD & 07) + (i & 0x0)];
|
||||
clr_ci();
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
SIGNED_CLAMP(VR[vd], SM_ADD_A);
|
||||
return;
|
||||
}
|
||||
@ -236,7 +218,7 @@ static void VADD6w(void)
|
||||
result[i] = VR[vs][i] + VR[vt][(0xE & 07) + (i & 0x0)];
|
||||
clr_ci();
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
SIGNED_CLAMP(VR[vd], SM_ADD_A);
|
||||
return;
|
||||
}
|
||||
@ -251,7 +233,7 @@ static void VADD7w(void)
|
||||
result[i] = VR[vs][i] + VR[vt][(0xF & 07) + (i & 0x0)];
|
||||
clr_ci();
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
SIGNED_CLAMP(VR[vd], SM_ADD_A);
|
||||
return;
|
||||
}
|
||||
|
76
vu/vaddc.h
76
vu/vaddc.h
@ -1,21 +1,5 @@
|
||||
#include "vu.h"
|
||||
|
||||
static void VADDC(int vd, int vs, int vt, int e)
|
||||
{
|
||||
register int i;
|
||||
|
||||
VCO = 0x0000;
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] = (unsigned short)VR[vs][i] + (unsigned short)VR_T(i);
|
||||
for (i = 0; i < N; i++)
|
||||
ACC_R(i) = (short)result[i];
|
||||
for (i = 0; i < N; i++)
|
||||
VCO |= !!(result[i] & ~0x0000FFFF) << i;
|
||||
for (i = 0; i < N; i++)
|
||||
ACC_W(i) = ACC_R(i);
|
||||
return;
|
||||
}
|
||||
|
||||
#if (0)
|
||||
#define SETCO(i) (result[i] > 0x0000FFFF)
|
||||
#elif (1)
|
||||
@ -54,9 +38,9 @@ static void VADDC_v(void)
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] = (unsigned short)(VR[vs][i]) + (unsigned short)(VR[vt][i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
set_co();
|
||||
return;
|
||||
}
|
||||
@ -72,9 +56,9 @@ static void VADDC0q(void)
|
||||
(unsigned short)(VR[vs][i])
|
||||
+ (unsigned short)(VR[vt][(0x2 & 01) + (i & 0xE)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
set_co();
|
||||
return;
|
||||
}
|
||||
@ -90,9 +74,9 @@ static void VADDC1q(void)
|
||||
(unsigned short)(VR[vs][i])
|
||||
+ (unsigned short)(VR[vt][(0x3 & 01) + (i & 0xE)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
set_co();
|
||||
return;
|
||||
}
|
||||
@ -108,9 +92,9 @@ static void VADDC0h(void)
|
||||
(unsigned short)(VR[vs][i])
|
||||
+ (unsigned short)(VR[vt][(0x4 & 03) + (i & 0xC)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
set_co();
|
||||
return;
|
||||
}
|
||||
@ -126,9 +110,9 @@ static void VADDC1h(void)
|
||||
(unsigned short)(VR[vs][i])
|
||||
+ (unsigned short)(VR[vt][(0x5 & 03) + (i & 0xC)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
set_co();
|
||||
return;
|
||||
}
|
||||
@ -144,9 +128,9 @@ static void VADDC2h(void)
|
||||
(unsigned short)(VR[vs][i])
|
||||
+ (unsigned short)(VR[vt][(0x6 & 03) + (i & 0xC)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
set_co();
|
||||
return;
|
||||
}
|
||||
@ -162,9 +146,9 @@ static void VADDC3h(void)
|
||||
(unsigned short)(VR[vs][i])
|
||||
+ (unsigned short)(VR[vt][(0x7 & 03) + (i & 0xC)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
set_co();
|
||||
return;
|
||||
}
|
||||
@ -180,9 +164,9 @@ static void VADDC0w(void)
|
||||
(unsigned short)(VR[vs][i])
|
||||
+ (unsigned short)(VR[vt][(0x8 & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
set_co();
|
||||
return;
|
||||
}
|
||||
@ -198,9 +182,9 @@ static void VADDC1w(void)
|
||||
(unsigned short)(VR[vs][i])
|
||||
+ (unsigned short)(VR[vt][(0x9 & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
set_co();
|
||||
return;
|
||||
}
|
||||
@ -216,9 +200,9 @@ static void VADDC2w(void)
|
||||
(unsigned short)(VR[vs][i])
|
||||
+ (unsigned short)(VR[vt][(0xA & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
set_co();
|
||||
return;
|
||||
}
|
||||
@ -234,9 +218,9 @@ static void VADDC3w(void)
|
||||
(unsigned short)(VR[vs][i])
|
||||
+ (unsigned short)(VR[vt][(0xB & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
set_co();
|
||||
return;
|
||||
}
|
||||
@ -252,9 +236,9 @@ static void VADDC4w(void)
|
||||
(unsigned short)(VR[vs][i])
|
||||
+ (unsigned short)(VR[vt][(0xC & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
set_co();
|
||||
return;
|
||||
}
|
||||
@ -270,9 +254,9 @@ static void VADDC5w(void)
|
||||
(unsigned short)(VR[vs][i])
|
||||
+ (unsigned short)(VR[vt][(0xD & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
set_co();
|
||||
return;
|
||||
}
|
||||
@ -288,9 +272,9 @@ static void VADDC6w(void)
|
||||
(unsigned short)(VR[vs][i])
|
||||
+ (unsigned short)(VR[vt][(0xE & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
set_co();
|
||||
return;
|
||||
}
|
||||
@ -306,9 +290,9 @@ static void VADDC7w(void)
|
||||
(unsigned short)(VR[vs][i])
|
||||
+ (unsigned short)(VR[vt][(0xF & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
set_co();
|
||||
return;
|
||||
}
|
||||
|
71
vu/vand.h
71
vu/vand.h
@ -1,16 +1,5 @@
|
||||
#include "vu.h"
|
||||
|
||||
static void VAND(int vd, int vs, int vt, int e)
|
||||
{
|
||||
register int i;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
ACC_R(i) = VR[vs][i] & VR_T(i);
|
||||
for (i = 0; i < N; i++)
|
||||
ACC_W(i) = ACC_R(i);
|
||||
return;
|
||||
}
|
||||
|
||||
static void VAND_v(void)
|
||||
{
|
||||
register int i;
|
||||
@ -19,9 +8,9 @@ static void VAND_v(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vs][i] & VR[vt][i];
|
||||
ACC_L(i) = VR[vs][i] & VR[vt][i];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VAND0q(void)
|
||||
@ -32,9 +21,9 @@ static void VAND0q(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vs][i] & VR[vt][(0x2 & 01) + (i & 0xE)];
|
||||
ACC_L(i) = VR[vs][i] & VR[vt][(0x2 & 01) + (i & 0xE)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VAND1q(void)
|
||||
@ -45,9 +34,9 @@ static void VAND1q(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vs][i] & VR[vt][(0x3 & 01) + (i & 0xE)];
|
||||
ACC_L(i) = VR[vs][i] & VR[vt][(0x3 & 01) + (i & 0xE)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VAND0h(void)
|
||||
@ -58,9 +47,9 @@ static void VAND0h(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vs][i] & VR[vt][(0x4 & 03) + (i & 0xC)];
|
||||
ACC_L(i) = VR[vs][i] & VR[vt][(0x4 & 03) + (i & 0xC)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VAND1h(void)
|
||||
@ -71,9 +60,9 @@ static void VAND1h(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vs][i] & VR[vt][(0x5 & 03) + (i & 0xC)];
|
||||
ACC_L(i) = VR[vs][i] & VR[vt][(0x5 & 03) + (i & 0xC)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VAND2h(void)
|
||||
@ -84,9 +73,9 @@ static void VAND2h(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vs][i] & VR[vt][(0x6 & 03) + (i & 0xC)];
|
||||
ACC_L(i) = VR[vs][i] & VR[vt][(0x6 & 03) + (i & 0xC)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VAND3h(void)
|
||||
@ -97,9 +86,9 @@ static void VAND3h(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vs][i] & VR[vt][(0x7 & 03) + (i & 0xC)];
|
||||
ACC_L(i) = VR[vs][i] & VR[vt][(0x7 & 03) + (i & 0xC)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VAND0w(void)
|
||||
@ -110,9 +99,9 @@ static void VAND0w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vs][i] & VR[vt][(0x8 & 07) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vs][i] & VR[vt][(0x8 & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VAND1w(void)
|
||||
@ -123,9 +112,9 @@ static void VAND1w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vs][i] & VR[vt][(0x9 & 07) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vs][i] & VR[vt][(0x9 & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VAND2w(void)
|
||||
@ -136,9 +125,9 @@ static void VAND2w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vs][i] & VR[vt][(0xA & 07) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vs][i] & VR[vt][(0xA & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VAND3w(void)
|
||||
@ -149,9 +138,9 @@ static void VAND3w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vs][i] & VR[vt][(0xB & 07) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vs][i] & VR[vt][(0xB & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VAND4w(void)
|
||||
@ -162,9 +151,9 @@ static void VAND4w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vs][i] & VR[vt][(0xC & 07) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vs][i] & VR[vt][(0xC & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VAND5w(void)
|
||||
@ -175,9 +164,9 @@ static void VAND5w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vs][i] & VR[vt][(0xD & 07) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vs][i] & VR[vt][(0xD & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VAND6w(void)
|
||||
@ -188,9 +177,9 @@ static void VAND6w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vs][i] & VR[vt][(0xE & 07) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vs][i] & VR[vt][(0xE & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VAND7w(void)
|
||||
@ -201,8 +190,8 @@ static void VAND7w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vs][i] & VR[vt][(0xF & 07) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vs][i] & VR[vt][(0xF & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
|
72
vu/vch.h
72
vu/vch.h
@ -1,45 +1,5 @@
|
||||
#include "vu.h"
|
||||
|
||||
static void VCH(int vd, int vs, int vt, int e)
|
||||
{
|
||||
int ge, le, neq;
|
||||
register int i;
|
||||
|
||||
VCO = 0x0000;
|
||||
VCC = 0x0000;
|
||||
VCE = 0x00;
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
const signed short VS = VR[vs][i];
|
||||
const signed short VT = VR_T(i);
|
||||
const int sn = (VS ^ VT) < 0; /* sn = (unsigned short)(VS ^ VT) >> 15 */
|
||||
|
||||
if (sn)
|
||||
{
|
||||
ge = (VT < 0);
|
||||
le = (VS + VT <= 0);
|
||||
neq = (VS + VT == -1); /* compare extension */
|
||||
VCE |= neq << i;
|
||||
neq ^= !(VS + VT == 0); /* !(x | y) = x ^ !(y), if (x & y) != 1 */
|
||||
ACC_R(i) = le ? -VT : VS;
|
||||
VCO |= (neq <<= (i + 0x8)) | (sn << (i + 0x0)); /* sn = 1 */
|
||||
}
|
||||
else
|
||||
{
|
||||
le = (VT < 0);
|
||||
ge = (VS - VT >= 0);
|
||||
neq = !(VS - VT == 0);
|
||||
VCE |= 0x00 << i;
|
||||
ACC_R(i) = ge ? VT : VS;
|
||||
VCO |= (neq <<= (i + 0x8)) | (sn << (i + 0x0)); /* sn = 0 */
|
||||
}
|
||||
VCC |= (ge <<= (i + 0x8)) | (le <<= (i + 0x0));
|
||||
}
|
||||
for (i = 0; i < N; i++)
|
||||
ACC_W(i) = ACC_R(i);
|
||||
return;
|
||||
}
|
||||
|
||||
void do_ch(int vs)
|
||||
{
|
||||
int eq[8], neq[8], vce[8];
|
||||
@ -73,7 +33,7 @@ void do_ch(int vs)
|
||||
for (i = 0; i < N; i++)
|
||||
ge[i] = sn[i] ? (VC[i] > 0x0000) : (VR[vs][i] >= VC[i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (sn[i] ? le[i] : ge[i]) ? VC[i] : VR[vs][i];
|
||||
ACC_L(i) = (sn[i] ? le[i] : ge[i]) ? VC[i] : VR[vs][i];
|
||||
|
||||
VCC = 0x0000;
|
||||
for (i = 0; i < N; i++)
|
||||
@ -98,7 +58,7 @@ static void VCH_v(void)
|
||||
VC[i] = VR[vt][i];
|
||||
do_ch(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VCH0q(void)
|
||||
@ -112,7 +72,7 @@ static void VCH0q(void)
|
||||
VC[i] = VR[vt][(0x2 & 01) + (i & 0xE)];
|
||||
do_ch(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VCH1q(void)
|
||||
@ -126,7 +86,7 @@ static void VCH1q(void)
|
||||
VC[i] = VR[vt][(0x3 & 01) + (i & 0xE)];
|
||||
do_ch(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VCH0h(void)
|
||||
@ -140,7 +100,7 @@ static void VCH0h(void)
|
||||
VC[i] = VR[vt][(0x4 & 03) + (i & 0xC)];
|
||||
do_ch(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VCH1h(void)
|
||||
@ -154,7 +114,7 @@ static void VCH1h(void)
|
||||
VC[i] = VR[vt][(0x5 & 03) + (i & 0xC)];
|
||||
do_ch(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VCH2h(void)
|
||||
@ -168,7 +128,7 @@ static void VCH2h(void)
|
||||
VC[i] = VR[vt][(0x6 & 03) + (i & 0xC)];
|
||||
do_ch(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VCH3h(void)
|
||||
@ -182,7 +142,7 @@ static void VCH3h(void)
|
||||
VC[i] = VR[vt][(0x7 & 03) + (i & 0xC)];
|
||||
do_ch(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VCH0w(void)
|
||||
@ -196,7 +156,7 @@ static void VCH0w(void)
|
||||
VC[i] = VR[vt][(0x8 & 07) + (i & 0x0)];
|
||||
do_ch(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VCH1w(void)
|
||||
@ -210,7 +170,7 @@ static void VCH1w(void)
|
||||
VC[i] = VR[vt][(0x9 & 07) + (i & 0x0)];
|
||||
do_ch(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VCH2w(void)
|
||||
@ -224,7 +184,7 @@ static void VCH2w(void)
|
||||
VC[i] = VR[vt][(0xA & 07) + (i & 0x0)];
|
||||
do_ch(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VCH3w(void)
|
||||
@ -238,7 +198,7 @@ static void VCH3w(void)
|
||||
VC[i] = VR[vt][(0xB & 07) + (i & 0x0)];
|
||||
do_ch(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VCH4w(void)
|
||||
@ -252,7 +212,7 @@ static void VCH4w(void)
|
||||
VC[i] = VR[vt][(0xC & 07) + (i & 0x0)];
|
||||
do_ch(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VCH5w(void)
|
||||
@ -266,7 +226,7 @@ static void VCH5w(void)
|
||||
VC[i] = VR[vt][(0xD & 07) + (i & 0x0)];
|
||||
do_ch(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VCH6w(void)
|
||||
@ -280,7 +240,7 @@ static void VCH6w(void)
|
||||
VC[i] = VR[vt][(0xE & 07) + (i & 0x0)];
|
||||
do_ch(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VCH7w(void)
|
||||
@ -294,6 +254,6 @@ static void VCH7w(void)
|
||||
VC[i] = VR[vt][(0xF & 07) + (i & 0x0)];
|
||||
do_ch(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
|
80
vu/vcl.h
80
vu/vcl.h
@ -1,53 +1,5 @@
|
||||
#include "vu.h"
|
||||
|
||||
static void VCL(int vd, int vs, int vt, int e)
|
||||
{
|
||||
register const unsigned short VCC_old = VCC;
|
||||
int ge, le;
|
||||
register int i;
|
||||
|
||||
VCC = 0x0000; /* Undergo the correction phase, factoring old VCC bits. */
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
const unsigned short VS = (unsigned short)VR[vs][i];
|
||||
const unsigned short VT = (unsigned short)VR_T(i);
|
||||
const int eq = (~VCO >> (i + 0x8)) & 0x0001; /* !(NOTEQUAL) */
|
||||
const int sn = (VCO >> (i + 0x0)) & 0x0001; /* CARRY */
|
||||
|
||||
le = VCC_old & (0x0001 << i); /* unless (eq & sn) */
|
||||
ge = VCC_old & (0x0100 << i); /* unless (eq & !sn) */
|
||||
if (sn)
|
||||
{
|
||||
if (eq)
|
||||
{
|
||||
const int sum = VS + VT;
|
||||
const int ce = (VCE >> i) & 0x01;
|
||||
int lz = ((sum & 0x0000FFFF) == 0x00000000);
|
||||
int uz = ((sum & 0xFFFF0000) == 0x00000000); /* !carryout */
|
||||
|
||||
le = (~ce & (lz & uz)) | (ce & (lz | uz));
|
||||
le <<= i + 0x0;
|
||||
}
|
||||
ACC_R(i) = le ? -VT : VS;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (eq)
|
||||
{
|
||||
ge = (VS - VT >= 0);
|
||||
ge <<= i + 0x8;
|
||||
}
|
||||
ACC_R(i) = ge ? VT : VS;
|
||||
}
|
||||
VCC |= ge | le;
|
||||
}
|
||||
for (i = 0; i < N; i++)
|
||||
ACC_W(i) = ACC_R(i);
|
||||
VCO = 0x0000;
|
||||
VCE = 0x00;
|
||||
return;
|
||||
}
|
||||
|
||||
void do_cl(int vs)
|
||||
{
|
||||
int eq[8], vce[8];
|
||||
@ -105,7 +57,7 @@ void do_cl(int vs)
|
||||
for (i = 0; i < N; i++)
|
||||
eq[i] = sn[i] ? le[i] : ge[i];
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = eq[i] ? VC[i] : VR[vs][i];
|
||||
ACC_L(i) = eq[i] ? VC[i] : VR[vs][i];
|
||||
|
||||
VCC = 0x0000;
|
||||
for (i = 0; i < N; i++)
|
||||
@ -126,7 +78,7 @@ static void VCL_v(void)
|
||||
VC[i] = VR[vt][i];
|
||||
do_cl(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VCL0q(void)
|
||||
@ -140,7 +92,7 @@ static void VCL0q(void)
|
||||
VC[i] = VR[vt][(0x2 & 01) + (i & 0xE)];
|
||||
do_cl(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VCL1q(void)
|
||||
@ -154,7 +106,7 @@ static void VCL1q(void)
|
||||
VC[i] = VR[vt][(0x3 & 01) + (i & 0xE)];
|
||||
do_cl(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VCL0h(void)
|
||||
@ -168,7 +120,7 @@ static void VCL0h(void)
|
||||
VC[i] = VR[vt][(0x4 & 03) + (i & 0xC)];
|
||||
do_cl(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VCL1h(void)
|
||||
@ -182,7 +134,7 @@ static void VCL1h(void)
|
||||
VC[i] = VR[vt][(0x5 & 03) + (i & 0xC)];
|
||||
do_cl(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VCL2h(void)
|
||||
@ -196,7 +148,7 @@ static void VCL2h(void)
|
||||
VC[i] = VR[vt][(0x6 & 03) + (i & 0xC)];
|
||||
do_cl(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VCL3h(void)
|
||||
@ -210,7 +162,7 @@ static void VCL3h(void)
|
||||
VC[i] = VR[vt][(0x7 & 03) + (i & 0xC)];
|
||||
do_cl(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VCL0w(void)
|
||||
@ -224,7 +176,7 @@ static void VCL0w(void)
|
||||
VC[i] = VR[vt][(0x8 & 07) + (i & 0x0)];
|
||||
do_cl(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VCL1w(void)
|
||||
@ -238,7 +190,7 @@ static void VCL1w(void)
|
||||
VC[i] = VR[vt][(0x9 & 07) + (i & 0x0)];
|
||||
do_cl(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VCL2w(void)
|
||||
@ -252,7 +204,7 @@ static void VCL2w(void)
|
||||
VC[i] = VR[vt][(0xA & 07) + (i & 0x0)];
|
||||
do_cl(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VCL3w(void)
|
||||
@ -266,7 +218,7 @@ static void VCL3w(void)
|
||||
VC[i] = VR[vt][(0xB & 07) + (i & 0x0)];
|
||||
do_cl(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VCL4w(void)
|
||||
@ -280,7 +232,7 @@ static void VCL4w(void)
|
||||
VC[i] = VR[vt][(0xC & 07) + (i & 0x0)];
|
||||
do_cl(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VCL5w(void)
|
||||
@ -294,7 +246,7 @@ static void VCL5w(void)
|
||||
VC[i] = VR[vt][(0xD & 07) + (i & 0x0)];
|
||||
do_cl(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VCL6w(void)
|
||||
@ -308,7 +260,7 @@ static void VCL6w(void)
|
||||
VC[i] = VR[vt][(0xE & 07) + (i & 0x0)];
|
||||
do_cl(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VCL7w(void)
|
||||
@ -322,6 +274,6 @@ static void VCL7w(void)
|
||||
VC[i] = VR[vt][(0xF & 07) + (i & 0x0)];
|
||||
do_cl(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
|
65
vu/vcr.h
65
vu/vcr.h
@ -1,38 +1,5 @@
|
||||
#include "vu.h"
|
||||
|
||||
static void VCR(int vd, int vs, int vt, int e)
|
||||
{
|
||||
int ge, le;
|
||||
register int i;
|
||||
|
||||
VCC = 0x0000;
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
const signed short VS = VR[vs][i];
|
||||
const signed short VT = VR_T(i);
|
||||
const int sn = (VS ^ VT) < 0; /* sn = (unsigned short)(VS ^ VT) >> 15 */
|
||||
|
||||
if (sn)
|
||||
{
|
||||
ge = (VT < 0); /* -VT > -0; (-VT - 1) > -1; (~VT) >= 0 */
|
||||
le = (VS + VT + 1 <= 0); /* VS + VT < 0; VS < -VT: "VS <= ~VT" */
|
||||
ACC_R(i) = le ? ~VT : VS;
|
||||
}
|
||||
else
|
||||
{
|
||||
le = (VT < 0);
|
||||
ge = (VS - VT >= 0); /* VS - VT + 1 > 0; VS > VT - 1: "VS >= VT" */
|
||||
ACC_R(i) = le ? VT : VS;
|
||||
}
|
||||
VCC |= (ge <<= (i + 8)) | (le <<= (i + 0));
|
||||
}
|
||||
for (i = 0; i < N; i++)
|
||||
ACC_W(i) = ACC_R(i);
|
||||
VCO = 0x0000;
|
||||
VCE = 0x00;
|
||||
return;
|
||||
}
|
||||
|
||||
void do_cr(int vs)
|
||||
{
|
||||
int ge[8], le[8];
|
||||
@ -62,7 +29,7 @@ void do_cr(int vs)
|
||||
for (i = 0; i < N; i++)
|
||||
VC[i] ^= sn[i]; /* if (sn == ~0) {VT = ~VT;} else {VT = VT;} */
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = le[i] ? VC[i] : VR[vs][i];
|
||||
ACC_L(i) = le[i] ? VC[i] : VR[vs][i];
|
||||
#if (0)
|
||||
VCC = 0x0000;
|
||||
for (i = 0; i < N; i++)
|
||||
@ -92,7 +59,7 @@ static void VCR_v(void)
|
||||
VC[i] = VR[vt][i];
|
||||
do_cr(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VCR0q(void)
|
||||
@ -106,7 +73,7 @@ static void VCR0q(void)
|
||||
VC[i] = VR[vt][(0x2 & 01) + (i & 0xE)];
|
||||
do_cr(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VCR1q(void)
|
||||
@ -120,7 +87,7 @@ static void VCR1q(void)
|
||||
VC[i] = VR[vt][(0x3 & 01) + (i & 0xE)];
|
||||
do_cr(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VCR0h(void)
|
||||
@ -134,7 +101,7 @@ static void VCR0h(void)
|
||||
VC[i] = VR[vt][(0x4 & 03) + (i & 0xC)];
|
||||
do_cr(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VCR1h(void)
|
||||
@ -148,7 +115,7 @@ static void VCR1h(void)
|
||||
VC[i] = VR[vt][(0x5 & 03) + (i & 0xC)];
|
||||
do_cr(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VCR2h(void)
|
||||
@ -162,7 +129,7 @@ static void VCR2h(void)
|
||||
VC[i] = VR[vt][(0x6 & 03) + (i & 0xC)];
|
||||
do_cr(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VCR3h(void)
|
||||
@ -176,7 +143,7 @@ static void VCR3h(void)
|
||||
VC[i] = VR[vt][(0x7 & 03) + (i & 0xC)];
|
||||
do_cr(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VCR0w(void)
|
||||
@ -190,7 +157,7 @@ static void VCR0w(void)
|
||||
VC[i] = VR[vt][(0x8 & 07) + (i & 0x0)];
|
||||
do_cr(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VCR1w(void)
|
||||
@ -204,7 +171,7 @@ static void VCR1w(void)
|
||||
VC[i] = VR[vt][(0x9 & 07) + (i & 0x0)];
|
||||
do_cr(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VCR2w(void)
|
||||
@ -218,7 +185,7 @@ static void VCR2w(void)
|
||||
VC[i] = VR[vt][(0xA & 07) + (i & 0x0)];
|
||||
do_cr(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VCR3w(void)
|
||||
@ -232,7 +199,7 @@ static void VCR3w(void)
|
||||
VC[i] = VR[vt][(0xB & 07) + (i & 0x0)];
|
||||
do_cr(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VCR4w(void)
|
||||
@ -246,7 +213,7 @@ static void VCR4w(void)
|
||||
VC[i] = VR[vt][(0xC & 07) + (i & 0x0)];
|
||||
do_cr(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VCR5w(void)
|
||||
@ -260,7 +227,7 @@ static void VCR5w(void)
|
||||
VC[i] = VR[vt][(0xD & 07) + (i & 0x0)];
|
||||
do_cr(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VCR6w(void)
|
||||
@ -274,7 +241,7 @@ static void VCR6w(void)
|
||||
VC[i] = VR[vt][(0xE & 07) + (i & 0x0)];
|
||||
do_cr(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VCR7w(void)
|
||||
@ -288,6 +255,6 @@ static void VCR7w(void)
|
||||
VC[i] = VR[vt][(0xF & 07) + (i & 0x0)];
|
||||
do_cr(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
|
58
vu/veq.h
58
vu/veq.h
@ -1,29 +1,5 @@
|
||||
#include "vu.h"
|
||||
|
||||
static void VEQ(int vd, int vs, int vt, int e)
|
||||
{
|
||||
int eq; /* equal, unless (NOTEQUAL) */
|
||||
register unsigned char VCO_VCE;
|
||||
register int i;
|
||||
|
||||
VCC = 0x0000;
|
||||
VCO_VCE = ~(unsigned char)(VCO >> 8);
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
const signed short VS = VR[vs][i];
|
||||
const signed short VT = VR_T(i);
|
||||
|
||||
eq = (VCO_VCE >> i) & 0x01;
|
||||
eq &= (VS == VT);
|
||||
VCC |= eq <<= i;
|
||||
ACC_R(i) = VT; /* More accurately, `ACC_R(i) = eq ? VS : VT`. */
|
||||
}
|
||||
for (i = 0; i < N; i++)
|
||||
ACC_W(i) = ACC_R(i);
|
||||
VCO = 0x0000;
|
||||
return;
|
||||
}
|
||||
|
||||
void do_eq(int vs)
|
||||
{
|
||||
int eq[8];
|
||||
@ -42,10 +18,10 @@ void do_eq(int vs)
|
||||
VCC |= 0 << (i + 0x8);
|
||||
#if (0)
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = eq[i] ? VR[vs][i] : VC[i]; /* correct but redundant */
|
||||
ACC_L(i) = eq[i] ? VR[vs][i] : VC[i]; /* correct but redundant */
|
||||
#else
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VC[i];
|
||||
ACC_L(i) = VC[i];
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
@ -61,7 +37,7 @@ static void VEQ_v(void)
|
||||
VC[i] = VR[vt][i];
|
||||
do_eq(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VEQ0q(void)
|
||||
@ -75,7 +51,7 @@ static void VEQ0q(void)
|
||||
VC[i] = VR[vt][(0x2 & 01) + (i & 0xE)];
|
||||
do_eq(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VEQ1q(void)
|
||||
@ -89,7 +65,7 @@ static void VEQ1q(void)
|
||||
VC[i] = VR[vt][(0x3 & 01) + (i & 0xE)];
|
||||
do_eq(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VEQ0h(void)
|
||||
@ -103,7 +79,7 @@ static void VEQ0h(void)
|
||||
VC[i] = VR[vt][(0x4 & 03) + (i & 0xC)];
|
||||
do_eq(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VEQ1h(void)
|
||||
@ -117,7 +93,7 @@ static void VEQ1h(void)
|
||||
VC[i] = VR[vt][(0x5 & 03) + (i & 0xC)];
|
||||
do_eq(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VEQ2h(void)
|
||||
@ -131,7 +107,7 @@ static void VEQ2h(void)
|
||||
VC[i] = VR[vt][(0x6 & 03) + (i & 0xC)];
|
||||
do_eq(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VEQ3h(void)
|
||||
@ -145,7 +121,7 @@ static void VEQ3h(void)
|
||||
VC[i] = VR[vt][(0x7 & 03) + (i & 0xC)];
|
||||
do_eq(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VEQ0w(void)
|
||||
@ -159,7 +135,7 @@ static void VEQ0w(void)
|
||||
VC[i] = VR[vt][(0x8 & 07) + (i & 0x0)];
|
||||
do_eq(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VEQ1w(void)
|
||||
@ -173,7 +149,7 @@ static void VEQ1w(void)
|
||||
VC[i] = VR[vt][(0x9 & 07) + (i & 0x0)];
|
||||
do_eq(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VEQ2w(void)
|
||||
@ -187,7 +163,7 @@ static void VEQ2w(void)
|
||||
VC[i] = VR[vt][(0xA & 07) + (i & 0x0)];
|
||||
do_eq(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VEQ3w(void)
|
||||
@ -201,7 +177,7 @@ static void VEQ3w(void)
|
||||
VC[i] = VR[vt][(0xB & 07) + (i & 0x0)];
|
||||
do_eq(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VEQ4w(void)
|
||||
@ -215,7 +191,7 @@ static void VEQ4w(void)
|
||||
VC[i] = VR[vt][(0xC & 07) + (i & 0x0)];
|
||||
do_eq(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VEQ5w(void)
|
||||
@ -229,7 +205,7 @@ static void VEQ5w(void)
|
||||
VC[i] = VR[vt][(0xD & 07) + (i & 0x0)];
|
||||
do_eq(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VEQ6w(void)
|
||||
@ -243,7 +219,7 @@ static void VEQ6w(void)
|
||||
VC[i] = VR[vt][(0xE & 07) + (i & 0x0)];
|
||||
do_eq(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VEQ7w(void)
|
||||
@ -257,6 +233,6 @@ static void VEQ7w(void)
|
||||
VC[i] = VR[vt][(0xF & 07) + (i & 0x0)];
|
||||
do_eq(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
|
57
vu/vge.h
57
vu/vge.h
@ -1,30 +1,5 @@
|
||||
#include "vu.h"
|
||||
|
||||
static void VGE(int vd, int vs, int vt, int e)
|
||||
{
|
||||
int ge; /* greater than or, unless (CARRY && NOTEQUAL), equal */
|
||||
register unsigned char VCO_VCE;
|
||||
register int i;
|
||||
|
||||
VCC = 0x0000;
|
||||
VCO_VCE = ~(unsigned char)(VCO >> 8);
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
const signed short VS = VR[vs][i];
|
||||
const signed short VT = VR_T(i);
|
||||
|
||||
ge = ((~VCO >> i) & 0x0001) | ((VCO_VCE >> i) & 0x01);
|
||||
ge &= (VS == VT);
|
||||
ge |= (VS > VT);
|
||||
VCC |= ge <<= i;
|
||||
ACC_R(i) = ge ? VS : VT;
|
||||
}
|
||||
for (i = 0; i < N; i++)
|
||||
ACC_W(i) = ACC_R(i);
|
||||
VCO = 0x0000;
|
||||
return;
|
||||
}
|
||||
|
||||
void do_ge(int vs)
|
||||
{
|
||||
int ge[8];
|
||||
@ -50,7 +25,7 @@ void do_ge(int vs)
|
||||
for (i = 0; i < N; i++)
|
||||
VCC |= 0 << (i + 0x8);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = ge[i] ? VR[vs][i] : VC[i];
|
||||
ACC_L(i) = ge[i] ? VR[vs][i] : VC[i];
|
||||
return;
|
||||
}
|
||||
|
||||
@ -65,7 +40,7 @@ static void VGE_v(void)
|
||||
VC[i] = VR[vt][i];
|
||||
do_ge(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VGE0q(void)
|
||||
@ -79,7 +54,7 @@ static void VGE0q(void)
|
||||
VC[i] = VR[vt][(0x2 & 01) + (i & 0xE)];
|
||||
do_ge(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VGE1q(void)
|
||||
@ -93,7 +68,7 @@ static void VGE1q(void)
|
||||
VC[i] = VR[vt][(0x3 & 01) + (i & 0xE)];
|
||||
do_ge(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VGE0h(void)
|
||||
@ -107,7 +82,7 @@ static void VGE0h(void)
|
||||
VC[i] = VR[vt][(0x4 & 03) + (i & 0xC)];
|
||||
do_ge(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VGE1h(void)
|
||||
@ -121,7 +96,7 @@ static void VGE1h(void)
|
||||
VC[i] = VR[vt][(0x5 & 03) + (i & 0xC)];
|
||||
do_ge(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VGE2h(void)
|
||||
@ -135,7 +110,7 @@ static void VGE2h(void)
|
||||
VC[i] = VR[vt][(0x6 & 03) + (i & 0xC)];
|
||||
do_ge(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VGE3h(void)
|
||||
@ -149,7 +124,7 @@ static void VGE3h(void)
|
||||
VC[i] = VR[vt][(0x7 & 03) + (i & 0xC)];
|
||||
do_ge(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VGE0w(void)
|
||||
@ -163,7 +138,7 @@ static void VGE0w(void)
|
||||
VC[i] = VR[vt][(0x8 & 07) + (i & 0x0)];
|
||||
do_ge(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VGE1w(void)
|
||||
@ -177,7 +152,7 @@ static void VGE1w(void)
|
||||
VC[i] = VR[vt][(0x9 & 07) + (i & 0x0)];
|
||||
do_ge(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VGE2w(void)
|
||||
@ -191,7 +166,7 @@ static void VGE2w(void)
|
||||
VC[i] = VR[vt][(0xA & 07) + (i & 0x0)];
|
||||
do_ge(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VGE3w(void)
|
||||
@ -205,7 +180,7 @@ static void VGE3w(void)
|
||||
VC[i] = VR[vt][(0xB & 07) + (i & 0x0)];
|
||||
do_ge(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VGE4w(void)
|
||||
@ -219,7 +194,7 @@ static void VGE4w(void)
|
||||
VC[i] = VR[vt][(0xC & 07) + (i & 0x0)];
|
||||
do_ge(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VGE5w(void)
|
||||
@ -233,7 +208,7 @@ static void VGE5w(void)
|
||||
VC[i] = VR[vt][(0xD & 07) + (i & 0x0)];
|
||||
do_ge(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VGE6w(void)
|
||||
@ -247,7 +222,7 @@ static void VGE6w(void)
|
||||
VC[i] = VR[vt][(0xE & 07) + (i & 0x0)];
|
||||
do_ge(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VGE7w(void)
|
||||
@ -261,6 +236,6 @@ static void VGE7w(void)
|
||||
VC[i] = VR[vt][(0xF & 07) + (i & 0x0)];
|
||||
do_ge(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
|
57
vu/vlt.h
57
vu/vlt.h
@ -1,30 +1,5 @@
|
||||
#include "vu.h"
|
||||
|
||||
static void VLT(int vd, int vs, int vt, int e)
|
||||
{
|
||||
int lt; /* less than, or if (CARRY && NOTEQUAL), equal */
|
||||
register unsigned char VCO_VCE;
|
||||
register int i;
|
||||
|
||||
VCC = 0x0000;
|
||||
VCO_VCE = ~(unsigned char)(VCO >> 8);
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
const signed short VS = VR[vs][i];
|
||||
const signed short VT = VR_T(i);
|
||||
|
||||
lt = ((VCO >> i) & 0x0001) & ((~VCO_VCE >> i) & 0x01);
|
||||
lt &= (VS == VT);
|
||||
lt |= (VS < VT);
|
||||
VCC |= lt <<= i;
|
||||
ACC_R(i) = lt ? VS : VT;
|
||||
}
|
||||
for (i = 0; i < N; i++)
|
||||
ACC_W(i) = ACC_R(i);
|
||||
VCO = 0x0000;
|
||||
return;
|
||||
}
|
||||
|
||||
void do_lt(int vs)
|
||||
{
|
||||
int lt[8];
|
||||
@ -49,7 +24,7 @@ void do_lt(int vs)
|
||||
for (i = 0; i < N; i++)
|
||||
VCC |= 0 << (i + 0x8);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = lt[i] ? VR[vs][i] : VC[i];
|
||||
ACC_L(i) = lt[i] ? VR[vs][i] : VC[i];
|
||||
return;
|
||||
}
|
||||
|
||||
@ -64,7 +39,7 @@ static void VLT_v(void)
|
||||
VC[i] = VR[vt][i];
|
||||
do_lt(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VLT0q(void)
|
||||
@ -78,7 +53,7 @@ static void VLT0q(void)
|
||||
VC[i] = VR[vt][(0x2 & 01) + (i & 0xE)];
|
||||
do_lt(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VLT1q(void)
|
||||
@ -92,7 +67,7 @@ static void VLT1q(void)
|
||||
VC[i] = VR[vt][(0x3 & 01) + (i & 0xE)];
|
||||
do_lt(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VLT0h(void)
|
||||
@ -106,7 +81,7 @@ static void VLT0h(void)
|
||||
VC[i] = VR[vt][(0x4 & 03) + (i & 0xC)];
|
||||
do_lt(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VLT1h(void)
|
||||
@ -120,7 +95,7 @@ static void VLT1h(void)
|
||||
VC[i] = VR[vt][(0x5 & 03) + (i & 0xC)];
|
||||
do_lt(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VLT2h(void)
|
||||
@ -134,7 +109,7 @@ static void VLT2h(void)
|
||||
VC[i] = VR[vt][(0x6 & 03) + (i & 0xC)];
|
||||
do_lt(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VLT3h(void)
|
||||
@ -148,7 +123,7 @@ static void VLT3h(void)
|
||||
VC[i] = VR[vt][(0x7 & 03) + (i & 0xC)];
|
||||
do_lt(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VLT0w(void)
|
||||
@ -162,7 +137,7 @@ static void VLT0w(void)
|
||||
VC[i] = VR[vt][(0x8 & 07) + (i & 0x0)];
|
||||
do_lt(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VLT1w(void)
|
||||
@ -176,7 +151,7 @@ static void VLT1w(void)
|
||||
VC[i] = VR[vt][(0x9 & 07) + (i & 0x0)];
|
||||
do_lt(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VLT2w(void)
|
||||
@ -190,7 +165,7 @@ static void VLT2w(void)
|
||||
VC[i] = VR[vt][(0xA & 07) + (i & 0x0)];
|
||||
do_lt(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VLT3w(void)
|
||||
@ -204,7 +179,7 @@ static void VLT3w(void)
|
||||
VC[i] = VR[vt][(0xB & 07) + (i & 0x0)];
|
||||
do_lt(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VLT4w(void)
|
||||
@ -218,7 +193,7 @@ static void VLT4w(void)
|
||||
VC[i] = VR[vt][(0xC & 07) + (i & 0x0)];
|
||||
do_lt(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VLT5w(void)
|
||||
@ -232,7 +207,7 @@ static void VLT5w(void)
|
||||
VC[i] = VR[vt][(0xD & 07) + (i & 0x0)];
|
||||
do_lt(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VLT6w(void)
|
||||
@ -246,7 +221,7 @@ static void VLT6w(void)
|
||||
VC[i] = VR[vt][(0xE & 07) + (i & 0x0)];
|
||||
do_lt(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VLT7w(void)
|
||||
@ -260,6 +235,6 @@ static void VLT7w(void)
|
||||
VC[i] = VR[vt][(0xF & 07) + (i & 0x0)];
|
||||
do_lt(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
|
83
vu/vmacf.h
83
vu/vmacf.h
@ -1,192 +1,205 @@
|
||||
#include "vu.h"
|
||||
|
||||
static void VMACF(int vd, int vs, int vt, int e)
|
||||
INLINE void do_macf(short* VD, short* VS, short* VT)
|
||||
{
|
||||
INT64 acc[N];
|
||||
register int i;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += VR[vs][i]*VR_T(i) << 1;
|
||||
SIGNED_CLAMP(VMUL_PTR, SM_MUL_X);
|
||||
acc[i] = (VS[i]*VT[i]) << 1;
|
||||
do_acc(acc);
|
||||
SIGNED_CLAMP(VD, SM_MUL_X);
|
||||
return;
|
||||
}
|
||||
|
||||
static void VMACF_v(void)
|
||||
{
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += 2*VR[vs][i]*VR[vt][i];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
do_macf(VR[vd], VR[vs], VR[vt]);
|
||||
return;
|
||||
}
|
||||
static void VMACF0q(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0x2 & 01) + (i & 0xE)];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0x2 & 0x1) + (i & 0xE)];
|
||||
do_macf(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMACF1q(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0x3 & 01) + (i & 0xE)];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0x3 & 0x1) + (i & 0xE)];
|
||||
do_macf(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMACF0h(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0x4 & 03) + (i & 0xC)];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0x4 & 0x3) + (i & 0xC)];
|
||||
do_macf(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMACF1h(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0x5 & 03) + (i & 0xC)];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0x5 & 0x3) + (i & 0xC)];
|
||||
do_macf(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMACF2h(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0x6 & 03) + (i & 0xC)];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0x6 & 0x3) + (i & 0xC)];
|
||||
do_macf(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMACF3h(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0x7 & 03) + (i & 0xC)];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0x7 & 0x3) + (i & 0xC)];
|
||||
do_macf(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMACF0w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0x8 & 07) + (i & 0x0)];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0x8 & 0x7) + (i & 0x0)];
|
||||
do_macf(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMACF1w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0x9 & 07) + (i & 0x0)];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0x9 & 0x7) + (i & 0x0)];
|
||||
do_macf(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMACF2w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0xA & 07) + (i & 0x0)];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0xA & 0x7) + (i & 0x0)];
|
||||
do_macf(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMACF3w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0xB & 07) + (i & 0x0)];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0xB & 0x7) + (i & 0x0)];
|
||||
do_macf(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMACF4w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0xC & 07) + (i & 0x0)];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0xC & 0x7) + (i & 0x0)];
|
||||
do_macf(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMACF5w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0xD & 07) + (i & 0x0)];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0xD & 0x7) + (i & 0x0)];
|
||||
do_macf(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMACF6w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0xE & 07) + (i & 0x0)];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0xE & 0x7) + (i & 0x0)];
|
||||
do_macf(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMACF7w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0xF & 07) + (i & 0x0)];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0xF & 0x7) + (i & 0x0)];
|
||||
do_macf(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
|
26
vu/vmacq.h
26
vu/vmacq.h
@ -1,31 +1,7 @@
|
||||
#include "vu.h"
|
||||
|
||||
/*
|
||||
* Note about VMACQ.
|
||||
*
|
||||
* Current implementation of VMACQ is experimental.
|
||||
* It is the surviving op-code of the MPEG-DCT-designated RSP circuitries.
|
||||
* As such, for it to not be omitted, it is heavily modified from the actual.
|
||||
*
|
||||
* It was changed into this newer, archaic algorithm on the hardware.
|
||||
* Could not find any games using VMACQ, so I gave up waiting for an error
|
||||
* complaining to me that I should implement it. The below algorithm is in
|
||||
* conformance to the suggested, explained mode of operation.
|
||||
*/
|
||||
|
||||
static void VMACQ(void)
|
||||
{
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
|
||||
message("VMACQ", 2); /* untested, any N64 ROMs use this?? */
|
||||
for (i = 0; i < N; i++)
|
||||
if (VACC[i].DW & (32 << 16)) /* Bit 21 of acc. must be nonzero. */
|
||||
continue; /* VACC[i].DW += 0x000000000000; */
|
||||
else
|
||||
VACC[i].DW += (VACC[i].s[HI] & 0x8000) ? +32 << 16 : -32 << 16;
|
||||
for (i = 0; i < N; i++) /* Sign-extend 48-bit to 64-bit supersets. */
|
||||
VACC[i].HW[03] = (signed short)(VACC[i].s[HI]) >> 15;
|
||||
SIGNED_CLAMP(VMUL_PTR, SM_MUL_Q);
|
||||
message("VMACQ\nUnimplemented.", 3); /* untested, any N64 ROMs use this?? */
|
||||
return;
|
||||
}
|
||||
|
93
vu/vmacu.h
93
vu/vmacu.h
@ -1,6 +1,6 @@
|
||||
#include "vu.h"
|
||||
|
||||
void UNSIGNED_CLAMP(int vd)
|
||||
INLINE void UNSIGNED_CLAMP(short* VD)
|
||||
{
|
||||
register int i;
|
||||
|
||||
@ -9,202 +9,215 @@ void UNSIGNED_CLAMP(int vd)
|
||||
register signed short result;
|
||||
register short int tmp;
|
||||
|
||||
result = VACC[i].s[MD]; /* raw slice before clamping */
|
||||
tmp = (signed short)(VACC[i].DW >> 31) != 0x0000;
|
||||
result = ACC_M(i); /* raw slice before clamping */
|
||||
tmp = (((ACC_H(i) << 1) | !!(ACC_M(i) & 0x8000)) != 0x0000);
|
||||
result |= -tmp; /* slice overflow */
|
||||
tmp = VACC[i].s[HI] >> 15; /* Zero- or one-extend. */
|
||||
tmp = ACC_H(i) >> 15; /* Zero- or one-extend. */
|
||||
result &= ~tmp; /* slice underflow */
|
||||
VR[vd][i] = result;
|
||||
VD[i] = result;
|
||||
}
|
||||
}
|
||||
|
||||
static void VMACU(int vd, int vs, int vt, int e)
|
||||
INLINE void do_macu(short* VD, short* VS, short* VT)
|
||||
{
|
||||
INT64 acc[N];
|
||||
register int i;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += VR[vs][i]*VR_T(i) << 1;
|
||||
UNSIGNED_CLAMP(vd);
|
||||
acc[i] = (VS[i]*VT[i]) << 1;
|
||||
do_acc(acc);
|
||||
UNSIGNED_CLAMP(VD);
|
||||
return;
|
||||
}
|
||||
|
||||
static void VMACU_v(void)
|
||||
{
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += 2*VR[vs][i]*VR[vt][i];
|
||||
UNSIGNED_CLAMP(vd);
|
||||
do_macu(VR[vd], VR[vs], VR[vt]);
|
||||
return;
|
||||
}
|
||||
static void VMACU0q(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0x2 & 01) + (i & 0xE)];
|
||||
UNSIGNED_CLAMP(vd);
|
||||
SV[i] = VR[vt][(0x2 & 0x1) + (i & 0xE)];
|
||||
do_macu(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMACU1q(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0x3 & 01) + (i & 0xE)];
|
||||
UNSIGNED_CLAMP(vd);
|
||||
SV[i] = VR[vt][(0x3 & 0x1) + (i & 0xE)];
|
||||
do_macu(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMACU0h(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0x4 & 03) + (i & 0xC)];
|
||||
UNSIGNED_CLAMP(vd);
|
||||
SV[i] = VR[vt][(0x4 & 0x3) + (i & 0xC)];
|
||||
do_macu(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMACU1h(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0x5 & 03) + (i & 0xC)];
|
||||
UNSIGNED_CLAMP(vd);
|
||||
SV[i] = VR[vt][(0x5 & 0x3) + (i & 0xC)];
|
||||
do_macu(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMACU2h(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0x6 & 03) + (i & 0xC)];
|
||||
UNSIGNED_CLAMP(vd);
|
||||
SV[i] = VR[vt][(0x6 & 0x3) + (i & 0xC)];
|
||||
do_macu(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMACU3h(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0x7 & 03) + (i & 0xC)];
|
||||
UNSIGNED_CLAMP(vd);
|
||||
SV[i] = VR[vt][(0x7 & 0x3) + (i & 0xC)];
|
||||
do_macu(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMACU0w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0x8 & 07) + (i & 0x0)];
|
||||
UNSIGNED_CLAMP(vd);
|
||||
SV[i] = VR[vt][(0x8 & 0x7) + (i & 0x0)];
|
||||
do_macu(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMACU1w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0x9 & 07) + (i & 0x0)];
|
||||
UNSIGNED_CLAMP(vd);
|
||||
SV[i] = VR[vt][(0x9 & 0x7) + (i & 0x0)];
|
||||
do_macu(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMACU2w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0xA & 07) + (i & 0x0)];
|
||||
UNSIGNED_CLAMP(vd);
|
||||
SV[i] = VR[vt][(0xA & 0x7) + (i & 0x0)];
|
||||
do_macu(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMACU3w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0xB & 07) + (i & 0x0)];
|
||||
UNSIGNED_CLAMP(vd);
|
||||
SV[i] = VR[vt][(0xB & 0x7) + (i & 0x0)];
|
||||
do_macu(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMACU4w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0xC & 07) + (i & 0x0)];
|
||||
UNSIGNED_CLAMP(vd);
|
||||
SV[i] = VR[vt][(0xC & 0x7) + (i & 0x0)];
|
||||
do_macu(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMACU5w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0xD & 07) + (i & 0x0)];
|
||||
UNSIGNED_CLAMP(vd);
|
||||
SV[i] = VR[vt][(0xD & 0x7) + (i & 0x0)];
|
||||
do_macu(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMACU6w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0xE & 07) + (i & 0x0)];
|
||||
UNSIGNED_CLAMP(vd);
|
||||
SV[i] = VR[vt][(0xE & 0x7) + (i & 0x0)];
|
||||
do_macu(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMACU7w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0xF & 07) + (i & 0x0)];
|
||||
UNSIGNED_CLAMP(vd);
|
||||
SV[i] = VR[vt][(0xF & 0x7) + (i & 0x0)];
|
||||
do_macu(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
|
119
vu/vmadh.h
119
vu/vmadh.h
@ -1,226 +1,207 @@
|
||||
#include "vu.h"
|
||||
|
||||
static void VMADH(int vd, int vs, int vt, int e)
|
||||
INLINE void do_madh(short* VD, signed short* VS, signed short* VT)
|
||||
{
|
||||
register signed long long product;
|
||||
INT64 acc[N];
|
||||
register int i;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
product = VR[vs][i] * VR_T(i);
|
||||
VACC[i].DW += product << 16;
|
||||
}
|
||||
SIGNED_CLAMP(VMUL_PTR, SM_MUL_X);
|
||||
acc[i] = VS[i] * VT[i];
|
||||
for (i = 0; i < N; i++)
|
||||
acc[i] = acc[i] << 16;
|
||||
do_acc(acc);
|
||||
SIGNED_CLAMP(VD, SM_MUL_X);
|
||||
return;
|
||||
}
|
||||
|
||||
static void VMADH_v(void)
|
||||
{
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] = VR[vs][i] * VR[vt][i];
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += (INT64)(result[i]) << 16;
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
do_madh(VR[vd], VR[vs], VR[vt]);
|
||||
return;
|
||||
}
|
||||
static void VMADH0q(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] = VR[vs][i] * VR[vt][(0x2 & 01) + (i & 0xE)];
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += (INT64)(result[i]) << 16;
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0x2 & 0x1) + (i & 0xE)];
|
||||
do_madh(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMADH1q(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] = VR[vs][i] * VR[vt][(0x3 & 01) + (i & 0xE)];
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += (INT64)(result[i]) << 16;
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0x3 & 0x1) + (i & 0xE)];
|
||||
do_madh(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMADH0h(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] = VR[vs][i] * VR[vt][(0x4 & 03) + (i & 0xC)];
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += (INT64)(result[i]) << 16;
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0x4 & 0x3) + (i & 0xC)];
|
||||
do_madh(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMADH1h(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] = VR[vs][i] * VR[vt][(0x5 & 03) + (i & 0xC)];
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += (INT64)(result[i]) << 16;
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0x5 & 0x3) + (i & 0xC)];
|
||||
do_madh(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMADH2h(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] = VR[vs][i] * VR[vt][(0x6 & 03) + (i & 0xC)];
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += (INT64)(result[i]) << 16;
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0x6 & 0x3) + (i & 0xC)];
|
||||
do_madh(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMADH3h(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] = VR[vs][i] * VR[vt][(0x7 & 03) + (i & 0xC)];
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += (INT64)(result[i]) << 16;
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0x7 & 0x3) + (i & 0xC)];
|
||||
do_madh(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMADH0w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] = VR[vs][i] * VR[vt][(0x8 & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += (INT64)(result[i]) << 16;
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0x8 & 0x7) + (i & 0x0)];
|
||||
do_madh(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMADH1w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] = VR[vs][i] * VR[vt][(0x9 & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += (INT64)(result[i]) << 16;
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0x9 & 0x7) + (i & 0x0)];
|
||||
do_madh(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMADH2w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] = VR[vs][i] * VR[vt][(0xA & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += (INT64)(result[i]) << 16;
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0xA & 0x7) + (i & 0x0)];
|
||||
do_madh(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMADH3w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] = VR[vs][i] * VR[vt][(0xB & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += (INT64)(result[i]) << 16;
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0xB & 0x7) + (i & 0x0)];
|
||||
do_madh(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMADH4w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] = VR[vs][i] * VR[vt][(0xC & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += (INT64)(result[i]) << 16;
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0xC & 0x7) + (i & 0x0)];
|
||||
do_madh(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMADH5w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] = VR[vs][i] * VR[vt][(0xD & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += (INT64)(result[i]) << 16;
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0xD & 0x7) + (i & 0x0)];
|
||||
do_madh(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMADH6w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] = VR[vs][i] * VR[vt][(0xE & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += (INT64)(result[i]) << 16;
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0xE & 0x7) + (i & 0x0)];
|
||||
do_madh(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMADH7w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] = VR[vs][i] * VR[vt][(0xF & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += (INT64)(result[i]) << 16;
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0xF & 0x7) + (i & 0x0)];
|
||||
do_madh(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
|
149
vu/vmadl.h
149
vu/vmadl.h
@ -1,256 +1,207 @@
|
||||
#include "vu.h"
|
||||
|
||||
static void VMADL(int vd, int vs, int vt, int e)
|
||||
INLINE void do_madl(short* VD, unsigned short* VS, unsigned short* VT)
|
||||
{
|
||||
register unsigned int product;
|
||||
INT64 acc[N];
|
||||
register int i;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
product = (unsigned short)VR[vs][i] * (unsigned short)VR_T(i);
|
||||
VACC[i].DW += product >> 16;
|
||||
}
|
||||
SIGNED_CLAMP(VMUL_PTR, SM_MUL_Z);
|
||||
acc[i] = VS[i] * VT[i];
|
||||
for (i = 0; i < N; i++)
|
||||
acc[i] = acc[i] >> 16;
|
||||
do_acc(acc);
|
||||
SIGNED_CLAMP(VD, SM_MUL_Z);
|
||||
return;
|
||||
}
|
||||
|
||||
static void VMADL_v(void)
|
||||
{
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
MUDL_acc[i].W =
|
||||
(unsigned short)(VR[vs][i])
|
||||
* (unsigned short)(VR[vt][i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += MUDL_acc[i].H[1];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
|
||||
do_madl(VR[vd], VR[vs], VR[vt]);
|
||||
return;
|
||||
}
|
||||
static void VMADL0q(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
MUDL_acc[i].W =
|
||||
(unsigned short)(VR[vs][i])
|
||||
* (unsigned short)(VR[vt][(0x2 & 01) + (i & 0xE)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += MUDL_acc[i].H[1];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
|
||||
SV[i] = VR[vt][(0x2 & 0x1) + (i & 0xE)];
|
||||
do_madl(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMADL1q(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
MUDL_acc[i].W =
|
||||
(unsigned short)(VR[vs][i])
|
||||
* (unsigned short)(VR[vt][(0x3 & 01) + (i & 0xE)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += MUDL_acc[i].H[1];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
|
||||
SV[i] = VR[vt][(0x3 & 0x1) + (i & 0xE)];
|
||||
do_madl(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMADL0h(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
MUDL_acc[i].W =
|
||||
(unsigned short)(VR[vs][i])
|
||||
* (unsigned short)(VR[vt][(0x4 & 03) + (i & 0xC)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += MUDL_acc[i].H[1];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
|
||||
SV[i] = VR[vt][(0x4 & 0x3) + (i & 0xC)];
|
||||
do_madl(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMADL1h(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
MUDL_acc[i].W =
|
||||
(unsigned short)(VR[vs][i])
|
||||
* (unsigned short)(VR[vt][(0x5 & 03) + (i & 0xC)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += MUDL_acc[i].H[1];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
|
||||
SV[i] = VR[vt][(0x5 & 0x3) + (i & 0xC)];
|
||||
do_madl(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMADL2h(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
MUDL_acc[i].W =
|
||||
(unsigned short)(VR[vs][i])
|
||||
* (unsigned short)(VR[vt][(0x6 & 03) + (i & 0xC)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += MUDL_acc[i].H[1];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
|
||||
SV[i] = VR[vt][(0x6 & 0x3) + (i & 0xC)];
|
||||
do_madl(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMADL3h(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
MUDL_acc[i].W =
|
||||
(unsigned short)(VR[vs][i])
|
||||
* (unsigned short)(VR[vt][(0x7 & 03) + (i & 0xC)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += MUDL_acc[i].H[1];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
|
||||
SV[i] = VR[vt][(0x7 & 0x3) + (i & 0xC)];
|
||||
do_madl(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMADL0w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
MUDL_acc[i].W =
|
||||
(unsigned short)(VR[vs][i])
|
||||
* (unsigned short)(VR[vt][(0x8 & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += MUDL_acc[i].H[1];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
|
||||
SV[i] = VR[vt][(0x8 & 0x7) + (i & 0x0)];
|
||||
do_madl(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMADL1w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
MUDL_acc[i].W =
|
||||
(unsigned short)(VR[vs][i])
|
||||
* (unsigned short)(VR[vt][(0x9 & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += MUDL_acc[i].H[1];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
|
||||
SV[i] = VR[vt][(0x9 & 0x7) + (i & 0x0)];
|
||||
do_madl(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMADL2w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
MUDL_acc[i].W =
|
||||
(unsigned short)(VR[vs][i])
|
||||
* (unsigned short)(VR[vt][(0xA & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += MUDL_acc[i].H[1];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
|
||||
SV[i] = VR[vt][(0xA & 0x7) + (i & 0x0)];
|
||||
do_madl(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMADL3w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
MUDL_acc[i].W =
|
||||
(unsigned short)(VR[vs][i])
|
||||
* (unsigned short)(VR[vt][(0xB & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += MUDL_acc[i].H[1];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
|
||||
SV[i] = VR[vt][(0xB & 0x7) + (i & 0x0)];
|
||||
do_madl(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMADL4w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
MUDL_acc[i].W =
|
||||
(unsigned short)(VR[vs][i])
|
||||
* (unsigned short)(VR[vt][(0xC & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += MUDL_acc[i].H[1];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
|
||||
SV[i] = VR[vt][(0xC & 0x7) + (i & 0x0)];
|
||||
do_madl(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMADL5w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
MUDL_acc[i].W =
|
||||
(unsigned short)(VR[vs][i])
|
||||
* (unsigned short)(VR[vt][(0xD & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += MUDL_acc[i].H[1];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
|
||||
SV[i] = VR[vt][(0xD & 0x7) + (i & 0x0)];
|
||||
do_madl(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMADL6w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
MUDL_acc[i].W =
|
||||
(unsigned short)(VR[vs][i])
|
||||
* (unsigned short)(VR[vt][(0xE & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += MUDL_acc[i].H[1];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
|
||||
SV[i] = VR[vt][(0xE & 0x7) + (i & 0x0)];
|
||||
do_madl(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMADL7w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
MUDL_acc[i].W =
|
||||
(unsigned short)(VR[vs][i])
|
||||
* (unsigned short)(VR[vt][(0xF & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += MUDL_acc[i].H[1];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
|
||||
SV[i] = VR[vt][(0xF & 0x7) + (i & 0x0)];
|
||||
do_madl(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
|
141
vu/vmadm.h
141
vu/vmadm.h
@ -1,250 +1,205 @@
|
||||
#include "vu.h"
|
||||
|
||||
static void VMADM(int vd, int vs, int vt, int e)
|
||||
INLINE void do_madm(short* VD, signed short* VS, unsigned short* VT)
|
||||
{
|
||||
INT64 acc[N];
|
||||
register int i;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += VR[vs][i] * (unsigned short)VR_T(i);
|
||||
SIGNED_CLAMP(VMUL_PTR, SM_MUL_X);
|
||||
acc[i] = VS[i] * VT[i];
|
||||
do_acc(acc);
|
||||
SIGNED_CLAMP(VD, SM_MUL_X);
|
||||
return;
|
||||
}
|
||||
|
||||
static void VMADM_v(void)
|
||||
{
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] = VR[vs][i] * (unsigned short)(VR[vt][i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += result[i];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
do_madm(VR[vd], VR[vs], VR[vt]);
|
||||
return;
|
||||
}
|
||||
static void VMADM0q(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] =
|
||||
VR[vs][i]
|
||||
* (unsigned short)(VR[vt][(0x2 & 01) + (i & 0xE)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += result[i];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0x2 & 0x1) + (i & 0xE)];
|
||||
do_madm(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMADM1q(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] =
|
||||
VR[vs][i]
|
||||
* (unsigned short)(VR[vt][(0x3 & 01) + (i & 0xE)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += result[i];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0x3 & 0x1) + (i & 0xE)];
|
||||
do_madm(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMADM0h(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] =
|
||||
VR[vs][i]
|
||||
* (unsigned short)(VR[vt][(0x4 & 03) + (i & 0xC)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += result[i];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0x4 & 0x3) + (i & 0xC)];
|
||||
do_madm(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMADM1h(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] =
|
||||
VR[vs][i]
|
||||
* (unsigned short)(VR[vt][(0x5 & 03) + (i & 0xC)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += result[i];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0x5 & 0x3) + (i & 0xC)];
|
||||
do_madm(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMADM2h(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] =
|
||||
VR[vs][i]
|
||||
* (unsigned short)(VR[vt][(0x6 & 03) + (i & 0xC)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += result[i];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0x6 & 0x3) + (i & 0xC)];
|
||||
do_madm(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMADM3h(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] =
|
||||
VR[vs][i]
|
||||
* (unsigned short)(VR[vt][(0x7 & 03) + (i & 0xC)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += result[i];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0x7 & 0x3) + (i & 0xC)];
|
||||
do_madm(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMADM0w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] =
|
||||
VR[vs][i]
|
||||
* (unsigned short)(VR[vt][(0x8 & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += result[i];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0x8 & 0x7) + (i & 0x0)];
|
||||
do_madm(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMADM1w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] =
|
||||
VR[vs][i]
|
||||
* (unsigned short)(VR[vt][(0x9 & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += result[i];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0x9 & 0x7) + (i & 0x0)];
|
||||
do_madm(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMADM2w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] =
|
||||
VR[vs][i]
|
||||
* (unsigned short)(VR[vt][(0xA & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += result[i];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0xA & 0x7) + (i & 0x0)];
|
||||
do_madm(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMADM3w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] =
|
||||
VR[vs][i]
|
||||
* (unsigned short)(VR[vt][(0xB & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += result[i];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0xB & 0x7) + (i & 0x0)];
|
||||
do_madm(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMADM4w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] =
|
||||
VR[vs][i]
|
||||
* (unsigned short)(VR[vt][(0xC & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += result[i];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0xC & 0x7) + (i & 0x0)];
|
||||
do_madm(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMADM5w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] =
|
||||
VR[vs][i]
|
||||
* (unsigned short)(VR[vt][(0xD & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += result[i];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0xD & 0x7) + (i & 0x0)];
|
||||
do_madm(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMADM6w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] =
|
||||
VR[vs][i]
|
||||
* (unsigned short)(VR[vt][(0xE & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += result[i];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0xE & 0x7) + (i & 0x0)];
|
||||
do_madm(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMADM7w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] =
|
||||
VR[vs][i]
|
||||
* (unsigned short)(VR[vt][(0xF & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += result[i];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0xF & 0x7) + (i & 0x0)];
|
||||
do_madm(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
|
141
vu/vmadn.h
141
vu/vmadn.h
@ -1,250 +1,205 @@
|
||||
#include "vu.h"
|
||||
|
||||
static void VMADN(int vd, int vs, int vt, int e)
|
||||
INLINE void do_madn(short* VD, unsigned short* VS, signed short* VT)
|
||||
{
|
||||
INT64 acc[N];
|
||||
register int i;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += (unsigned short)VR[vs][i] * VR_T(i);
|
||||
SIGNED_CLAMP(VMUL_PTR, SM_MUL_Z);
|
||||
acc[i] = VS[i] * VT[i];
|
||||
do_acc(acc);
|
||||
SIGNED_CLAMP(VD, SM_MUL_Z);
|
||||
return;
|
||||
}
|
||||
|
||||
static void VMADN_v(void)
|
||||
{
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] = (unsigned short)(VR[vs][i]) * VR[vt][i];
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += result[i];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
|
||||
do_madn(VR[vd], VR[vs], VR[vt]);
|
||||
return;
|
||||
}
|
||||
static void VMADN0q(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] =
|
||||
(unsigned short)(VR[vs][i])
|
||||
* VR[vt][(0x2 & 01) + (i & 0xE)];
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += result[i];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
|
||||
SV[i] = VR[vt][(0x2 & 0x1) + (i & 0xE)];
|
||||
do_madn(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMADN1q(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] =
|
||||
(unsigned short)(VR[vs][i])
|
||||
* VR[vt][(0x3 & 01) + (i & 0xE)];
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += result[i];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
|
||||
SV[i] = VR[vt][(0x3 & 0x1) + (i & 0xE)];
|
||||
do_madn(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMADN0h(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] =
|
||||
(unsigned short)(VR[vs][i])
|
||||
* VR[vt][(0x4 & 03) + (i & 0xC)];
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += result[i];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
|
||||
SV[i] = VR[vt][(0x4 & 0x3) + (i & 0xC)];
|
||||
do_madn(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMADN1h(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] =
|
||||
(unsigned short)(VR[vs][i])
|
||||
* VR[vt][(0x5 & 03) + (i & 0xC)];
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += result[i];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
|
||||
SV[i] = VR[vt][(0x5 & 0x3) + (i & 0xC)];
|
||||
do_madn(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMADN2h(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] =
|
||||
(unsigned short)(VR[vs][i])
|
||||
* VR[vt][(0x6 & 03) + (i & 0xC)];
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += result[i];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
|
||||
SV[i] = VR[vt][(0x6 & 0x3) + (i & 0xC)];
|
||||
do_madn(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMADN3h(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] =
|
||||
(unsigned short)(VR[vs][i])
|
||||
* VR[vt][(0x7 & 03) + (i & 0xC)];
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += result[i];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
|
||||
SV[i] = VR[vt][(0x7 & 0x3) + (i & 0xC)];
|
||||
do_madn(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMADN0w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] =
|
||||
(unsigned short)(VR[vs][i])
|
||||
* VR[vt][(0x8 & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += result[i];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
|
||||
SV[i] = VR[vt][(0x8 & 0x7) + (i & 0x0)];
|
||||
do_madn(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMADN1w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] =
|
||||
(unsigned short)(VR[vs][i])
|
||||
* VR[vt][(0x9 & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += result[i];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
|
||||
SV[i] = VR[vt][(0x9 & 0x7) + (i & 0x0)];
|
||||
do_madn(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMADN2w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] =
|
||||
(unsigned short)(VR[vs][i])
|
||||
* VR[vt][(0xA & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += result[i];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
|
||||
SV[i] = VR[vt][(0xA & 0x7) + (i & 0x0)];
|
||||
do_madn(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMADN3w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] =
|
||||
(unsigned short)(VR[vs][i])
|
||||
* VR[vt][(0xB & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += result[i];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
|
||||
SV[i] = VR[vt][(0xB & 0x7) + (i & 0x0)];
|
||||
do_madn(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMADN4w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] =
|
||||
(unsigned short)(VR[vs][i])
|
||||
* VR[vt][(0xC & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += result[i];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
|
||||
SV[i] = VR[vt][(0xC & 0x7) + (i & 0x0)];
|
||||
do_madn(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMADN5w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] =
|
||||
(unsigned short)(VR[vs][i])
|
||||
* VR[vt][(0xD & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += result[i];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
|
||||
SV[i] = VR[vt][(0xD & 0x7) + (i & 0x0)];
|
||||
do_madn(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMADN6w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] =
|
||||
(unsigned short)(VR[vs][i])
|
||||
* VR[vt][(0xE & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += result[i];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
|
||||
SV[i] = VR[vt][(0xE & 0x7) + (i & 0x0)];
|
||||
do_madn(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMADN7w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] =
|
||||
(unsigned short)(VR[vs][i])
|
||||
* VR[vt][(0xF & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW += result[i];
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
|
||||
SV[i] = VR[vt][(0xF & 0x7) + (i & 0x0)];
|
||||
do_madn(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
|
75
vu/vmov.h
75
vu/vmov.h
@ -1,16 +1,5 @@
|
||||
#include "vu.h"
|
||||
|
||||
static void VMOV(int vd, int de, int vt, int e)
|
||||
{
|
||||
register int i;
|
||||
|
||||
/* MovIn = (int)VR[vt][e & 07]; */
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR_T(i);
|
||||
VR_D(de &= 07) = VACC[de].s[LO];
|
||||
return;
|
||||
}
|
||||
|
||||
static void VMOVv0(void)
|
||||
{
|
||||
register int i;
|
||||
@ -19,8 +8,8 @@ static void VMOVv0(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0x0 & 0x0) + (i & 0x7)];
|
||||
VR[vd][de] = VACC[00].s[LO];
|
||||
ACC_L(i) = VR[vt][(0x0 & 0x0) + (i & 0x7)];
|
||||
VR[vd][de] = ACC_L(00);
|
||||
return;
|
||||
}
|
||||
static void VMOVv1(void)
|
||||
@ -31,8 +20,8 @@ static void VMOVv1(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0x1 & 0x0) + (i & 0x7)];
|
||||
VR[vd][de] = VACC[01].s[LO];
|
||||
ACC_L(i) = VR[vt][(0x1 & 0x0) + (i & 0x7)];
|
||||
VR[vd][de] = ACC_L(01);
|
||||
return;
|
||||
}
|
||||
static void VMOV0q(void)
|
||||
@ -43,8 +32,8 @@ static void VMOV0q(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0x2 & 0x1) + (i & 0xE)];
|
||||
VR[vd][de] = VACC[02].s[LO];
|
||||
ACC_L(i) = VR[vt][(0x2 & 0x1) + (i & 0xE)];
|
||||
VR[vd][de] = ACC_L(02);
|
||||
return;
|
||||
}
|
||||
static void VMOV1q(void)
|
||||
@ -55,8 +44,8 @@ static void VMOV1q(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0x3 & 0x1) + (i & 0xE)];
|
||||
VR[vd][de] = VACC[03].s[LO];
|
||||
ACC_L(i) = VR[vt][(0x3 & 0x1) + (i & 0xE)];
|
||||
VR[vd][de] = ACC_L(03);
|
||||
return;
|
||||
}
|
||||
static void VMOV0h(void)
|
||||
@ -67,8 +56,8 @@ static void VMOV0h(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0x4 & 0x3) + (i & 0xC)];
|
||||
VR[vd][de] = VACC[04].s[LO];
|
||||
ACC_L(i) = VR[vt][(0x4 & 0x3) + (i & 0xC)];
|
||||
VR[vd][de] = ACC_L(04);
|
||||
return;
|
||||
}
|
||||
static void VMOV1h(void)
|
||||
@ -79,8 +68,8 @@ static void VMOV1h(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0x5 & 0x3) + (i & 0xC)];
|
||||
VR[vd][de] = VACC[05].s[LO];
|
||||
ACC_L(i) = VR[vt][(0x5 & 0x3) + (i & 0xC)];
|
||||
VR[vd][de] = ACC_L(05);
|
||||
return;
|
||||
}
|
||||
static void VMOV2h(void)
|
||||
@ -91,8 +80,8 @@ static void VMOV2h(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0x6 & 0x3) + (i & 0xC)];
|
||||
VR[vd][de] = VACC[06].s[LO];
|
||||
ACC_L(i) = VR[vt][(0x6 & 0x3) + (i & 0xC)];
|
||||
VR[vd][de] = ACC_L(06);
|
||||
return;
|
||||
}
|
||||
static void VMOV3h(void)
|
||||
@ -103,8 +92,8 @@ static void VMOV3h(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0x7 & 0x3) + (i & 0xC)];
|
||||
VR[vd][de] = VACC[07].s[LO];
|
||||
ACC_L(i) = VR[vt][(0x7 & 0x3) + (i & 0xC)];
|
||||
VR[vd][de] = ACC_L(07);
|
||||
return;
|
||||
}
|
||||
static void VMOV0w(void)
|
||||
@ -115,8 +104,8 @@ static void VMOV0w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0x8 & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = VACC[00].s[LO];
|
||||
ACC_L(i) = VR[vt][(0x8 & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = ACC_L(00);
|
||||
return;
|
||||
}
|
||||
static void VMOV1w(void)
|
||||
@ -127,8 +116,8 @@ static void VMOV1w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0x9 & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = VACC[01].s[LO];
|
||||
ACC_L(i) = VR[vt][(0x9 & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = ACC_L(01);
|
||||
return;
|
||||
}
|
||||
static void VMOV2w(void)
|
||||
@ -139,8 +128,8 @@ static void VMOV2w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0xA & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = VACC[02].s[LO];
|
||||
ACC_L(i) = VR[vt][(0xA & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = ACC_L(02);
|
||||
return;
|
||||
}
|
||||
static void VMOV3w(void)
|
||||
@ -151,8 +140,8 @@ static void VMOV3w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0xB & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = VACC[03].s[LO];
|
||||
ACC_L(i) = VR[vt][(0xB & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = ACC_L(03);
|
||||
return;
|
||||
}
|
||||
static void VMOV4w(void)
|
||||
@ -163,8 +152,8 @@ static void VMOV4w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0xC & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = VACC[04].s[LO];
|
||||
ACC_L(i) = VR[vt][(0xC & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = ACC_L(04);
|
||||
return;
|
||||
}
|
||||
static void VMOV5w(void)
|
||||
@ -175,8 +164,8 @@ static void VMOV5w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0xD & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = VACC[05].s[LO];
|
||||
ACC_L(i) = VR[vt][(0xD & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = ACC_L(05);
|
||||
return;
|
||||
}
|
||||
static void VMOV6w(void)
|
||||
@ -187,8 +176,8 @@ static void VMOV6w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0xE & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = VACC[06].s[LO];
|
||||
ACC_L(i) = VR[vt][(0xE & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = ACC_L(06);
|
||||
return;
|
||||
}
|
||||
static void VMOV7w(void)
|
||||
@ -199,7 +188,7 @@ static void VMOV7w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0xF & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = VACC[07].s[LO];
|
||||
ACC_L(i) = VR[vt][(0xF & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = ACC_L(07);
|
||||
return;
|
||||
}
|
||||
|
71
vu/vmrg.h
71
vu/vmrg.h
@ -1,16 +1,5 @@
|
||||
#include "vu.h"
|
||||
|
||||
static void VMRG(int vd, int vs, int vt, int e)
|
||||
{
|
||||
register int i;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
ACC_R(i) = VCC & (0x0001 << i) ? VR[vs][i] : VR_T(i);
|
||||
for (i = 0; i < N; i++)
|
||||
ACC_W(i) = ACC_R(i);
|
||||
return;
|
||||
}
|
||||
|
||||
void do_mrg(void)
|
||||
{
|
||||
int cmp[8];
|
||||
@ -34,9 +23,9 @@ static void VMRG_v(void)
|
||||
|
||||
do_mrg();
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = result[i] ? VR[vs][i] : VR[vt][i];
|
||||
ACC_L(i) = result[i] ? VR[vs][i] : VR[vt][i];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VMRG0q(void)
|
||||
@ -48,9 +37,9 @@ static void VMRG0q(void)
|
||||
|
||||
do_mrg();
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = result[i] ? VR[vs][i] : VR[vt][(0x2 & 01) + (i & 0xE)];
|
||||
ACC_L(i) = result[i] ? VR[vs][i] : VR[vt][(0x2 & 01) + (i & 0xE)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VMRG1q(void)
|
||||
@ -62,9 +51,9 @@ static void VMRG1q(void)
|
||||
|
||||
do_mrg();
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = result[i] ? VR[vs][i] : VR[vt][(0x3 & 01) + (i & 0xE)];
|
||||
ACC_L(i) = result[i] ? VR[vs][i] : VR[vt][(0x3 & 01) + (i & 0xE)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VMRG0h(void)
|
||||
@ -76,9 +65,9 @@ static void VMRG0h(void)
|
||||
|
||||
do_mrg();
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = result[i] ? VR[vs][i] : VR[vt][(0x4 & 03) + (i & 0xC)];
|
||||
ACC_L(i) = result[i] ? VR[vs][i] : VR[vt][(0x4 & 03) + (i & 0xC)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VMRG1h(void)
|
||||
@ -90,9 +79,9 @@ static void VMRG1h(void)
|
||||
|
||||
do_mrg();
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = result[i] ? VR[vs][i] : VR[vt][(0x5 & 03) + (i & 0xC)];
|
||||
ACC_L(i) = result[i] ? VR[vs][i] : VR[vt][(0x5 & 03) + (i & 0xC)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VMRG2h(void)
|
||||
@ -104,9 +93,9 @@ static void VMRG2h(void)
|
||||
|
||||
do_mrg();
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = result[i] ? VR[vs][i] : VR[vt][(0x6 & 03) + (i & 0xC)];
|
||||
ACC_L(i) = result[i] ? VR[vs][i] : VR[vt][(0x6 & 03) + (i & 0xC)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VMRG3h(void)
|
||||
@ -118,9 +107,9 @@ static void VMRG3h(void)
|
||||
|
||||
do_mrg();
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = result[i] ? VR[vs][i] : VR[vt][(0x7 & 03) + (i & 0xC)];
|
||||
ACC_L(i) = result[i] ? VR[vs][i] : VR[vt][(0x7 & 03) + (i & 0xC)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VMRG0w(void)
|
||||
@ -132,9 +121,9 @@ static void VMRG0w(void)
|
||||
|
||||
do_mrg();
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = result[i] ? VR[vs][i] : VR[vt][(0x8 & 07) + (i & 0x0)];
|
||||
ACC_L(i) = result[i] ? VR[vs][i] : VR[vt][(0x8 & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VMRG1w(void)
|
||||
@ -146,9 +135,9 @@ static void VMRG1w(void)
|
||||
|
||||
do_mrg();
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = result[i] ? VR[vs][i] : VR[vt][(0x9 & 07) + (i & 0x0)];
|
||||
ACC_L(i) = result[i] ? VR[vs][i] : VR[vt][(0x9 & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VMRG2w(void)
|
||||
@ -160,9 +149,9 @@ static void VMRG2w(void)
|
||||
|
||||
do_mrg();
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = result[i] ? VR[vs][i] : VR[vt][(0xA & 07) + (i & 0x0)];
|
||||
ACC_L(i) = result[i] ? VR[vs][i] : VR[vt][(0xA & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VMRG3w(void)
|
||||
@ -174,9 +163,9 @@ static void VMRG3w(void)
|
||||
|
||||
do_mrg();
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = result[i] ? VR[vs][i] : VR[vt][(0xB & 07) + (i & 0x0)];
|
||||
ACC_L(i) = result[i] ? VR[vs][i] : VR[vt][(0xB & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VMRG4w(void)
|
||||
@ -188,9 +177,9 @@ static void VMRG4w(void)
|
||||
|
||||
do_mrg();
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = result[i] ? VR[vs][i] : VR[vt][(0xC & 07) + (i & 0x0)];
|
||||
ACC_L(i) = result[i] ? VR[vs][i] : VR[vt][(0xC & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VMRG5w(void)
|
||||
@ -202,9 +191,9 @@ static void VMRG5w(void)
|
||||
|
||||
do_mrg();
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = result[i] ? VR[vs][i] : VR[vt][(0xD & 07) + (i & 0x0)];
|
||||
ACC_L(i) = result[i] ? VR[vs][i] : VR[vt][(0xD & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VMRG6w(void)
|
||||
@ -216,9 +205,9 @@ static void VMRG6w(void)
|
||||
|
||||
do_mrg();
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = result[i] ? VR[vs][i] : VR[vt][(0xE & 07) + (i & 0x0)];
|
||||
ACC_L(i) = result[i] ? VR[vs][i] : VR[vt][(0xE & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VMRG7w(void)
|
||||
@ -230,8 +219,8 @@ static void VMRG7w(void)
|
||||
|
||||
do_mrg();
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = result[i] ? VR[vs][i] : VR[vt][(0xF & 07) + (i & 0x0)];
|
||||
ACC_L(i) = result[i] ? VR[vs][i] : VR[vt][(0xF & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
|
121
vu/vmudh.h
121
vu/vmudh.h
@ -1,225 +1,210 @@
|
||||
#include "vu.h"
|
||||
|
||||
static void VMUDH(int vd, int vs, int vt, int e)
|
||||
INLINE void do_mudh(short* VD, signed short* VS, signed short* VT)
|
||||
{
|
||||
long acc[N];
|
||||
register int i;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
VACC[i].DW = VR[vs][i] * VR_T(i);
|
||||
VACC[i].DW <<= 16;
|
||||
}
|
||||
SIGNED_CLAMP(VMUL_PTR, SM_MUL_X);
|
||||
acc[i] = VS[i] * VT[i];
|
||||
for (i = 0; i < N; i++)
|
||||
ACC_H(i) = (acc[i] >> 16);
|
||||
for (i = 0; i < N; i++)
|
||||
ACC_M(i) = (short)(acc[i]);
|
||||
for (i = 0; i < N; i++)
|
||||
ACC_L(i) = 0x0000;
|
||||
SIGNED_CLAMP(VD, SM_MUL_X);
|
||||
return;
|
||||
}
|
||||
|
||||
static void VMUDH_v(void)
|
||||
{
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = VR[vs][i] * VR[vt][i];
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW <<= 16;
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
do_mudh(VR[vd], VR[vs], VR[vt]);
|
||||
return;
|
||||
}
|
||||
static void VMUDH0q(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = VR[vs][i] * VR[vt][(0x2 & 01) + (i & 0xE)];
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW <<= 16;
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0x2 & 0x1) + (i & 0xE)];
|
||||
do_mudh(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMUDH1q(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = VR[vs][i] * VR[vt][(0x3 & 01) + (i & 0xE)];
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW <<= 16;
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0x3 & 0x1) + (i & 0xE)];
|
||||
do_mudh(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMUDH0h(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = VR[vs][i] * VR[vt][(0x4 & 03) + (i & 0xC)];
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW <<= 16;
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0x4 & 0x3) + (i & 0xC)];
|
||||
do_mudh(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMUDH1h(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = VR[vs][i] * VR[vt][(0x5 & 03) + (i & 0xC)];
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW <<= 16;
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0x5 & 0x3) + (i & 0xC)];
|
||||
do_mudh(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMUDH2h(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = VR[vs][i] * VR[vt][(0x6 & 03) + (i & 0xC)];
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW <<= 16;
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0x6 & 0x3) + (i & 0xC)];
|
||||
do_mudh(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMUDH3h(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = VR[vs][i] * VR[vt][(0x7 & 03) + (i & 0xC)];
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW <<= 16;
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0x7 & 0x3) + (i & 0xC)];
|
||||
do_mudh(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMUDH0w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = VR[vs][i] * VR[vt][(0x8 & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW <<= 16;
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0x8 & 0x7) + (i & 0x0)];
|
||||
do_mudh(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMUDH1w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = VR[vs][i] * VR[vt][(0x9 & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW <<= 16;
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0x9 & 0x7) + (i & 0x0)];
|
||||
do_mudh(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMUDH2w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = VR[vs][i] * VR[vt][(0xA & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW <<= 16;
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0xA & 0x7) + (i & 0x0)];
|
||||
do_mudh(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMUDH3w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = VR[vs][i] * VR[vt][(0xB & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW <<= 16;
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0xB & 0x7) + (i & 0x0)];
|
||||
do_mudh(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMUDH4w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = VR[vs][i] * VR[vt][(0xC & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW <<= 16;
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0xC & 0x7) + (i & 0x0)];
|
||||
do_mudh(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMUDH5w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = VR[vs][i] * VR[vt][(0xD & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW <<= 16;
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0xD & 0x7) + (i & 0x0)];
|
||||
do_mudh(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMUDH6w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = VR[vs][i] * VR[vt][(0xE & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW <<= 16;
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0xE & 0x7) + (i & 0x0)];
|
||||
do_mudh(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMUDH7w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = VR[vs][i] * VR[vt][(0xF & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW <<= 16;
|
||||
SIGNED_CLAMP(VR[vd], SM_MUL_X);
|
||||
SV[i] = VR[vt][(0xF & 0x7) + (i & 0x0)];
|
||||
do_mudh(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
|
171
vu/vmudl.h
171
vu/vmudl.h
@ -1,272 +1,213 @@
|
||||
#include "vu.h"
|
||||
|
||||
static void VMUDL(int vd, int vs, int vt, int e)
|
||||
INLINE void do_mudl(short* VD, unsigned short* VS, unsigned short* VT)
|
||||
{
|
||||
register unsigned int product;
|
||||
long acc[N];
|
||||
register int i;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
product = (unsigned short)VR[vs][i] * (unsigned short)VR_T(i);
|
||||
VACC[i].DW = product >> 16;
|
||||
}
|
||||
for (i = 0; i < N; i++) /* Sign-clamp bits 15..0 of ACC to dest. VR. */
|
||||
VR_D(i) = VACC[i].s[LO]; /* No arithmetic checks needed. */
|
||||
acc[i] = VS[i] * VT[i];
|
||||
for (i = 0; i < N; i++)
|
||||
acc[i] = acc[i] >> 16;
|
||||
for (i = 0; i < N; i++)
|
||||
ACC_H(i) = 0x0000;
|
||||
for (i = 0; i < N; i++)
|
||||
ACC_M(i) = 0x0000;
|
||||
for (i = 0; i < N; i++)
|
||||
ACC_L(i) = acc[i];
|
||||
for (i = 0; i < N; i++)
|
||||
VD[i] = ACC_L(i); /* no possibilities to clamp */
|
||||
return;
|
||||
}
|
||||
|
||||
static void VMUDL_v(void)
|
||||
{
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
MUDL_acc[i].W =
|
||||
(unsigned short)(VR[vs][i])
|
||||
* (unsigned short)(VR[vt][i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = MUDL_acc[i].H[1];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = MUDL_acc[i].H[1];
|
||||
do_mudl(VR[vd], VR[vs], VR[vt]);
|
||||
return;
|
||||
}
|
||||
static void VMUDL0q(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
MUDL_acc[i].W =
|
||||
(unsigned short)(VR[vs][i])
|
||||
* (unsigned short)(VR[vt][(0x2 & 01) + (i & 0xE)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = MUDL_acc[i].H[1];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = MUDL_acc[i].H[1];
|
||||
SV[i] = VR[vt][(0x2 & 0x1) + (i & 0xE)];
|
||||
do_mudl(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMUDL1q(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
MUDL_acc[i].W =
|
||||
(unsigned short)(VR[vs][i])
|
||||
* (unsigned short)(VR[vt][(0x3 & 01) + (i & 0xE)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = MUDL_acc[i].H[1];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = MUDL_acc[i].H[1];
|
||||
SV[i] = VR[vt][(0x3 & 0x1) + (i & 0xE)];
|
||||
do_mudl(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMUDL0h(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
MUDL_acc[i].W =
|
||||
(unsigned short)(VR[vs][i])
|
||||
* (unsigned short)(VR[vt][(0x4 & 03) + (i & 0xC)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = MUDL_acc[i].H[1];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = MUDL_acc[i].H[1];
|
||||
SV[i] = VR[vt][(0x4 & 0x3) + (i & 0xC)];
|
||||
do_mudl(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMUDL1h(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
MUDL_acc[i].W =
|
||||
(unsigned short)(VR[vs][i])
|
||||
* (unsigned short)(VR[vt][(0x5 & 03) + (i & 0xC)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = MUDL_acc[i].H[1];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = MUDL_acc[i].H[1];
|
||||
SV[i] = VR[vt][(0x5 & 0x3) + (i & 0xC)];
|
||||
do_mudl(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMUDL2h(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
MUDL_acc[i].W =
|
||||
(unsigned short)(VR[vs][i])
|
||||
* (unsigned short)(VR[vt][(0x6 & 03) + (i & 0xC)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = MUDL_acc[i].H[1];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = MUDL_acc[i].H[1];
|
||||
SV[i] = VR[vt][(0x6 & 0x3) + (i & 0xC)];
|
||||
do_mudl(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMUDL3h(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
MUDL_acc[i].W =
|
||||
(unsigned short)(VR[vs][i])
|
||||
* (unsigned short)(VR[vt][(0x7 & 03) + (i & 0xC)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = MUDL_acc[i].H[1];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = MUDL_acc[i].H[1];
|
||||
SV[i] = VR[vt][(0x7 & 0x3) + (i & 0xC)];
|
||||
do_mudl(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMUDL0w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
MUDL_acc[i].W =
|
||||
(unsigned short)(VR[vs][i])
|
||||
* (unsigned short)(VR[vt][(0x8 & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = MUDL_acc[i].H[1];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = MUDL_acc[i].H[1];
|
||||
SV[i] = VR[vt][(0x8 & 0x7) + (i & 0x0)];
|
||||
do_mudl(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMUDL1w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
MUDL_acc[i].W =
|
||||
(unsigned short)(VR[vs][i])
|
||||
* (unsigned short)(VR[vt][(0x9 & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = MUDL_acc[i].H[1];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = MUDL_acc[i].H[1];
|
||||
SV[i] = VR[vt][(0x9 & 0x7) + (i & 0x0)];
|
||||
do_mudl(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMUDL2w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
MUDL_acc[i].W =
|
||||
(unsigned short)(VR[vs][i])
|
||||
* (unsigned short)(VR[vt][(0xA & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = MUDL_acc[i].H[1];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = MUDL_acc[i].H[1];
|
||||
SV[i] = VR[vt][(0xA & 0x7) + (i & 0x0)];
|
||||
do_mudl(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMUDL3w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
MUDL_acc[i].W =
|
||||
(unsigned short)(VR[vs][i])
|
||||
* (unsigned short)(VR[vt][(0xB & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = MUDL_acc[i].H[1];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = MUDL_acc[i].H[1];
|
||||
SV[i] = VR[vt][(0xB & 0x7) + (i & 0x0)];
|
||||
do_mudl(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMUDL4w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
MUDL_acc[i].W =
|
||||
(unsigned short)(VR[vs][i])
|
||||
* (unsigned short)(VR[vt][(0xC & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = MUDL_acc[i].H[1];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = MUDL_acc[i].H[1];
|
||||
SV[i] = VR[vt][(0xC & 0x7) + (i & 0x0)];
|
||||
do_mudl(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMUDL5w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
MUDL_acc[i].W =
|
||||
(unsigned short)(VR[vs][i])
|
||||
* (unsigned short)(VR[vt][(0xD & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = MUDL_acc[i].H[1];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = MUDL_acc[i].H[1];
|
||||
SV[i] = VR[vt][(0xD & 0x7) + (i & 0x0)];
|
||||
do_mudl(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMUDL6w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
MUDL_acc[i].W =
|
||||
(unsigned short)(VR[vs][i])
|
||||
* (unsigned short)(VR[vt][(0xE & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = MUDL_acc[i].H[1];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = MUDL_acc[i].H[1];
|
||||
SV[i] = VR[vt][(0xE & 0x7) + (i & 0x0)];
|
||||
do_mudl(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMUDL7w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
MUDL_acc[i].W =
|
||||
(unsigned short)(VR[vs][i])
|
||||
* (unsigned short)(VR[vt][(0xF & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = MUDL_acc[i].H[1];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = MUDL_acc[i].H[1];
|
||||
SV[i] = VR[vt][(0xF & 0x7) + (i & 0x0)];
|
||||
do_mudl(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
|
105
vu/vmudm.h
105
vu/vmudm.h
@ -1,208 +1,211 @@
|
||||
#include "vu.h"
|
||||
|
||||
static void VMUDM(int vd, int vs, int vt, int e)
|
||||
INLINE void do_mudm(short* VD, signed short* VS, unsigned short* VT)
|
||||
{
|
||||
long acc[N];
|
||||
register int i;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = VR[vs][i] * (unsigned short)VR_T(i);
|
||||
for (i = 0; i < N; i++) /* Sign-clamp bits 31..16 of ACC to dest. VR. */
|
||||
VR_D(i) = VACC[i].s[MD]; /* No saturate checks needed. */
|
||||
acc[i] = VS[i] * VT[i];
|
||||
for (i = 0; i < N; i++)
|
||||
ACC_H(i) = VS[i] >> 15;
|
||||
for (i = 0; i < N; i++)
|
||||
ACC_M(i) = (acc[i] >> 16);
|
||||
for (i = 0; i < N; i++)
|
||||
ACC_L(i) = acc[i];
|
||||
for (i = 0; i < N; i++)
|
||||
VD[i] = ACC_M(i); /* no possibilities to clamp */
|
||||
return;
|
||||
}
|
||||
|
||||
static void VMUDM_v(void)
|
||||
{
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = VR[vs][i] * (unsigned short)(VR[vt][i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[MD];
|
||||
do_mudm(VR[vd], VR[vs], VR[vt]);
|
||||
return;
|
||||
}
|
||||
static void VMUDM0q(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = VR[vs][i] * (unsigned short)VR[vt][(0x2 & 01) + (i & 0xE)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[MD];
|
||||
SV[i] = VR[vt][(0x2 & 0x1) + (i & 0xE)];
|
||||
do_mudm(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMUDM1q(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = VR[vs][i] * (unsigned short)VR[vt][(0x3 & 01) + (i & 0xE)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[MD];
|
||||
SV[i] = VR[vt][(0x3 & 0x1) + (i & 0xE)];
|
||||
do_mudm(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMUDM0h(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = VR[vs][i] * (unsigned short)VR[vt][(0x4 & 03) + (i & 0xC)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[MD];
|
||||
SV[i] = VR[vt][(0x4 & 0x3) + (i & 0xC)];
|
||||
do_mudm(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMUDM1h(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = VR[vs][i] * (unsigned short)VR[vt][(0x5 & 03) + (i & 0xC)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[MD];
|
||||
SV[i] = VR[vt][(0x5 & 0x3) + (i & 0xC)];
|
||||
do_mudm(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMUDM2h(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = VR[vs][i] * (unsigned short)VR[vt][(0x6 & 03) + (i & 0xC)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[MD];
|
||||
SV[i] = VR[vt][(0x6 & 0x3) + (i & 0xC)];
|
||||
do_mudm(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMUDM3h(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = VR[vs][i] * (unsigned short)VR[vt][(0x7 & 03) + (i & 0xC)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[MD];
|
||||
SV[i] = VR[vt][(0x7 & 0x3) + (i & 0xC)];
|
||||
do_mudm(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMUDM0w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = VR[vs][i] * (unsigned short)VR[vt][(0x8 & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[MD];
|
||||
SV[i] = VR[vt][(0x8 & 0x7) + (i & 0x0)];
|
||||
do_mudm(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMUDM1w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = VR[vs][i] * (unsigned short)VR[vt][(0x9 & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[MD];
|
||||
SV[i] = VR[vt][(0x9 & 0x7) + (i & 0x0)];
|
||||
do_mudm(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMUDM2w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = VR[vs][i] * (unsigned short)VR[vt][(0xA & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[MD];
|
||||
SV[i] = VR[vt][(0xA & 0x7) + (i & 0x0)];
|
||||
do_mudm(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMUDM3w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = VR[vs][i] * (unsigned short)VR[vt][(0xB & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[MD];
|
||||
SV[i] = VR[vt][(0xB & 0x7) + (i & 0x0)];
|
||||
do_mudm(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMUDM4w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = VR[vs][i] * (unsigned short)VR[vt][(0xC & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[MD];
|
||||
SV[i] = VR[vt][(0xC & 0x7) + (i & 0x0)];
|
||||
do_mudm(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMUDM5w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = VR[vs][i] * (unsigned short)VR[vt][(0xD & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[MD];
|
||||
SV[i] = VR[vt][(0xD & 0x7) + (i & 0x0)];
|
||||
do_mudm(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMUDM6w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = VR[vs][i] * (unsigned short)VR[vt][(0xE & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[MD];
|
||||
SV[i] = VR[vt][(0xE & 0x7) + (i & 0x0)];
|
||||
do_mudm(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMUDM7w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = VR[vs][i] * (unsigned short)VR[vt][(0xF & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[MD];
|
||||
SV[i] = VR[vt][(0xF & 0x7) + (i & 0x0)];
|
||||
do_mudm(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
|
105
vu/vmudn.h
105
vu/vmudn.h
@ -1,208 +1,211 @@
|
||||
#include "vu.h"
|
||||
|
||||
static void VMUDN(int vd, int vs, int vt, int e)
|
||||
INLINE void do_mudn(short* VD, unsigned short* VS, signed short* VT)
|
||||
{
|
||||
long acc[N];
|
||||
register int i;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = (unsigned short)VR[vs][i] * VR_T(i);
|
||||
for (i = 0; i < N; i++) /* Sign-clamp bits 15..0 of ACC to dest. VR. */
|
||||
VR_D(i) = VACC[i].s[LO]; /* No arithmetic checks needed. */
|
||||
acc[i] = VS[i] * VT[i];
|
||||
for (i = 0; i < N; i++)
|
||||
ACC_H(i) = VT[i] >> 15;
|
||||
for (i = 0; i < N; i++)
|
||||
ACC_M(i) = (acc[i] >> 16);
|
||||
for (i = 0; i < N; i++)
|
||||
ACC_L(i) = acc[i];
|
||||
for (i = 0; i < N; i++)
|
||||
VD[i] = ACC_L(i); /* no possibilities to clamp */
|
||||
return;
|
||||
}
|
||||
|
||||
static void VMUDN_v(void)
|
||||
{
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = (unsigned short)(VR[vs][i]) * VR[vt][i];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
do_mudn(VR[vd], VR[vs], VR[vt]);
|
||||
return;
|
||||
}
|
||||
static void VMUDN0q(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = (unsigned short)VR[vs][i] * VR[vt][(0x2 & 01) + (i & 0xE)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
SV[i] = VR[vt][(0x2 & 0x1) + (i & 0xE)];
|
||||
do_mudn(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMUDN1q(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = (unsigned short)VR[vs][i] * VR[vt][(0x3 & 01) + (i & 0xE)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
SV[i] = VR[vt][(0x3 & 0x1) + (i & 0xE)];
|
||||
do_mudn(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMUDN0h(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = (unsigned short)VR[vs][i] * VR[vt][(0x4 & 03) + (i & 0xC)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
SV[i] = VR[vt][(0x4 & 0x3) + (i & 0xC)];
|
||||
do_mudn(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMUDN1h(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = (unsigned short)VR[vs][i] * VR[vt][(0x5 & 03) + (i & 0xC)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
SV[i] = VR[vt][(0x5 & 0x3) + (i & 0xC)];
|
||||
do_mudn(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMUDN2h(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = (unsigned short)VR[vs][i] * VR[vt][(0x6 & 03) + (i & 0xC)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
SV[i] = VR[vt][(0x6 & 0x3) + (i & 0xC)];
|
||||
do_mudn(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMUDN3h(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = (unsigned short)VR[vs][i] * VR[vt][(0x7 & 03) + (i & 0xC)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
SV[i] = VR[vt][(0x7 & 0x3) + (i & 0xC)];
|
||||
do_mudn(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMUDN0w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = (unsigned short)VR[vs][i] * VR[vt][(0x8 & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
SV[i] = VR[vt][(0x8 & 0x7) + (i & 0x0)];
|
||||
do_mudn(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMUDN1w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = (unsigned short)VR[vs][i] * VR[vt][(0x9 & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
SV[i] = VR[vt][(0x9 & 0x7) + (i & 0x0)];
|
||||
do_mudn(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMUDN2w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = (unsigned short)VR[vs][i] * VR[vt][(0xA & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
SV[i] = VR[vt][(0xA & 0x7) + (i & 0x0)];
|
||||
do_mudn(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMUDN3w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = (unsigned short)VR[vs][i] * VR[vt][(0xB & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
SV[i] = VR[vt][(0xB & 0x7) + (i & 0x0)];
|
||||
do_mudn(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMUDN4w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = (unsigned short)VR[vs][i] * VR[vt][(0xC & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
SV[i] = VR[vt][(0xC & 0x7) + (i & 0x0)];
|
||||
do_mudn(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMUDN5w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = (unsigned short)VR[vs][i] * VR[vt][(0xD & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
SV[i] = VR[vt][(0xD & 0x7) + (i & 0x0)];
|
||||
do_mudn(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMUDN6w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = (unsigned short)VR[vs][i] * VR[vt][(0xE & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
SV[i] = VR[vt][(0xE & 0x7) + (i & 0x0)];
|
||||
do_mudn(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMUDN7w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = (unsigned short)VR[vs][i] * VR[vt][(0xF & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
SV[i] = VR[vt][(0xF & 0x7) + (i & 0x0)];
|
||||
do_mudn(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
|
139
vu/vmulf.h
139
vu/vmulf.h
@ -1,240 +1,215 @@
|
||||
#include "vu.h"
|
||||
|
||||
static void VMULF(int vd, int vs, int vt, int e)
|
||||
INLINE void do_mulf(short* VD, short* VS, short* VT)
|
||||
{
|
||||
long acc[N];
|
||||
register int i;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = (VR[vs][i]*VR_T(i) << 1) + 0x8000;
|
||||
acc[i] = (VS[i]*VT[i]) << 1;
|
||||
for (i = 0; i < N; i++)
|
||||
VR_D(i) = VACC[i].s[MD];
|
||||
for (i = 0; i < N; i++) /* Only one reachable value can expose overflow. */
|
||||
VR_D(i) -= !!(VR_D(i) & 0x8000);
|
||||
acc[i] = acc[i] + 0x8000;
|
||||
for (i = 0; i < N; i++)
|
||||
ACC_H(i) = (VS[i] ^ VT[i]) >> 15;
|
||||
for (i = 0; i < N; i++)
|
||||
ACC_M(i) = (short)(acc[i] >> 16);
|
||||
for (i = 0; i < N; i++)
|
||||
ACC_L(i) = (short)(acc[i] >> 0);
|
||||
for (i = 0; i < N; i++)
|
||||
VD[i] = ACC_M(i);
|
||||
for (i = 0; i < N; i++)
|
||||
VD[i] = VD[i] - !!(VD[i] & 0x8000); /* only possible product to clamp */
|
||||
return;
|
||||
}
|
||||
|
||||
static void VMULF_v(void)
|
||||
{
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = 2*VR[vs][i]*VR[vt][i] + 0x8000;
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[MD];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] -= !!(VR[vd][i] & 0x8000);
|
||||
do_mulf(VR[vd], VR[vs], VR[vt]);
|
||||
return;
|
||||
}
|
||||
static void VMULF0q(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0x2 & 01) + (i & 0xE)] + 0x8000;
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[MD];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] -= !!(VR[vd][i] & 0x8000);
|
||||
SV[i] = VR[vt][(0x2 & 0x1) + (i & 0xE)];
|
||||
do_mulf(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMULF1q(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0x3 & 01) + (i & 0xE)] + 0x8000;
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[MD];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] -= !!(VR[vd][i] & 0x8000);
|
||||
SV[i] = VR[vt][(0x3 & 0x1) + (i & 0xE)];
|
||||
do_mulf(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMULF0h(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0x4 & 03) + (i & 0xC)] + 0x8000;
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[MD];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] -= !!(VR[vd][i] & 0x8000);
|
||||
SV[i] = VR[vt][(0x4 & 0x3) + (i & 0xC)];
|
||||
do_mulf(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMULF1h(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0x5 & 03) + (i & 0xC)] + 0x8000;
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[MD];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] -= !!(VR[vd][i] & 0x8000);
|
||||
SV[i] = VR[vt][(0x5 & 0x3) + (i & 0xC)];
|
||||
do_mulf(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMULF2h(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0x6 & 03) + (i & 0xC)] + 0x8000;
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[MD];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] -= !!(VR[vd][i] & 0x8000);
|
||||
SV[i] = VR[vt][(0x6 & 0x3) + (i & 0xC)];
|
||||
do_mulf(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMULF3h(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0x7 & 03) + (i & 0xC)] + 0x8000;
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[MD];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] -= !!(VR[vd][i] & 0x8000);
|
||||
SV[i] = VR[vt][(0x7 & 0x3) + (i & 0xC)];
|
||||
do_mulf(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMULF0w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0x8 & 07) + (i & 0x0)] + 0x8000;
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[MD];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] -= !!(VR[vd][i] & 0x8000);
|
||||
SV[i] = VR[vt][(0x8 & 0x7) + (i & 0x0)];
|
||||
do_mulf(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMULF1w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0x9 & 07) + (i & 0x0)] + 0x8000;
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[MD];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] -= !!(VR[vd][i] & 0x8000);
|
||||
SV[i] = VR[vt][(0x9 & 0x7) + (i & 0x0)];
|
||||
do_mulf(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMULF2w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0xA & 07) + (i & 0x0)] + 0x8000;
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[MD];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] -= !!(VR[vd][i] & 0x8000);
|
||||
SV[i] = VR[vt][(0xA & 0x7) + (i & 0x0)];
|
||||
do_mulf(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMULF3w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0xB & 07) + (i & 0x0)] + 0x8000;
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[MD];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] -= !!(VR[vd][i] & 0x8000);
|
||||
SV[i] = VR[vt][(0xB & 0x7) + (i & 0x0)];
|
||||
do_mulf(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMULF4w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0xC & 07) + (i & 0x0)] + 0x8000;
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[MD];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] -= !!(VR[vd][i] & 0x8000);
|
||||
SV[i] = VR[vt][(0xC & 0x7) + (i & 0x0)];
|
||||
do_mulf(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMULF5w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0xD & 07) + (i & 0x0)] + 0x8000;
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[MD];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] -= !!(VR[vd][i] & 0x8000);
|
||||
SV[i] = VR[vt][(0xD & 0x7) + (i & 0x0)];
|
||||
do_mulf(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMULF6w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0xE & 07) + (i & 0x0)] + 0x8000;
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[MD];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] -= !!(VR[vd][i] & 0x8000);
|
||||
SV[i] = VR[vt][(0xE & 0x7) + (i & 0x0)];
|
||||
do_mulf(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMULF7w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0xF & 07) + (i & 0x0)] + 0x8000;
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[MD];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] -= !!(VR[vd][i] & 0x8000);
|
||||
SV[i] = VR[vt][(0xF & 0x7) + (i & 0x0)];
|
||||
do_mulf(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
|
175
vu/vmulu.h
175
vu/vmulu.h
@ -1,272 +1,217 @@
|
||||
#include "vu.h"
|
||||
|
||||
static void VMULU(int vd, int vs, int vt, int e)
|
||||
INLINE void do_mulu(short* VD, short* VS, short* VT)
|
||||
{
|
||||
long acc[N];
|
||||
register int i;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = (VR[vs][i]*VR_T(i) << 1) + 0x8000;
|
||||
for (i = 0; i < N; i++) /* Zero-clamp bits 31..16 of ACC to dest. VR. */
|
||||
{
|
||||
VR_D(i) = VACC[i].s[MD]; /* VD = ACC[31..16] */
|
||||
VR_D(i) |= VR_D(i) >> 15; /* VD |= -(result == 0x80008000) */
|
||||
VR_D(i) &= ~VACC[i].HW[03]; /* VD = (ACC < 0) ? 0 : ACC[31..16]; */
|
||||
}
|
||||
acc[i] = (VS[i]*VT[i]) << 1;
|
||||
for (i = 0; i < N; i++)
|
||||
acc[i] = acc[i] + 0x8000;
|
||||
for (i = 0; i < N; i++)
|
||||
ACC_H(i) = (VS[i] ^ VT[i]) >> 15;
|
||||
for (i = 0; i < N; i++)
|
||||
ACC_M(i) = (short)(acc[i] >> 16);
|
||||
for (i = 0; i < N; i++)
|
||||
ACC_L(i) = (short)(acc[i] >> 0);
|
||||
for (i = 0; i < N; i++)
|
||||
VD[i] = ACC_M(i);
|
||||
for (i = 0; i < N; i++)
|
||||
VD[i] |= ACC_M(i) >> 15; /* VD |= -(result == 0x000080008000) */
|
||||
for (i = 0; i < N; i++)
|
||||
VD[i] &= ~ACC_H(i); /* VD &= -(result >= 0x000000000000) */
|
||||
return;
|
||||
}
|
||||
|
||||
static void VMULU_v(void)
|
||||
{
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = 2*VR[vs][i]*VR[vt][i] + 0x8000;
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[MD];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] |= VACC[i].s[MD] >> 15;
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] &= VACC[i].s[HI] >> 15;
|
||||
do_mulu(VR[vd], VR[vs], VR[vt]);
|
||||
return;
|
||||
}
|
||||
static void VMULU0q(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0x2 & 01) + (i & 0xE)] + 0x8000;
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[MD];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] |= VACC[i].s[MD] >> 15;
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] &= VACC[i].s[HI] >> 15;
|
||||
SV[i] = VR[vt][(0x2 & 0x1) + (i & 0xE)];
|
||||
do_mulu(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMULU1q(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0x3 & 01) + (i & 0xE)] + 0x8000;
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[MD];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] |= VACC[i].s[MD] >> 15;
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] &= VACC[i].s[HI] >> 15;
|
||||
SV[i] = VR[vt][(0x3 & 0x1) + (i & 0xE)];
|
||||
do_mulu(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMULU0h(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0x4 & 03) + (i & 0xC)] + 0x8000;
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[MD];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] |= VACC[i].s[MD] >> 15;
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] &= VACC[i].s[HI] >> 15;
|
||||
SV[i] = VR[vt][(0x4 & 0x3) + (i & 0xC)];
|
||||
do_mulu(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMULU1h(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0x5 & 03) + (i & 0xC)] + 0x8000;
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[MD];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] |= VACC[i].s[MD] >> 15;
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] &= VACC[i].s[HI] >> 15;
|
||||
SV[i] = VR[vt][(0x5 & 0x3) + (i & 0xC)];
|
||||
do_mulu(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMULU2h(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0x6 & 03) + (i & 0xC)] + 0x8000;
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[MD];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] |= VACC[i].s[MD] >> 15;
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] &= VACC[i].s[HI] >> 15;
|
||||
SV[i] = VR[vt][(0x6 & 0x3) + (i & 0xC)];
|
||||
do_mulu(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMULU3h(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0x7 & 03) + (i & 0xC)] + 0x8000;
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[MD];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] |= VACC[i].s[MD] >> 15;
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] &= VACC[i].s[HI] >> 15;
|
||||
SV[i] = VR[vt][(0x7 & 0x3) + (i & 0xC)];
|
||||
do_mulu(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMULU0w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0x8 & 07) + (i & 0x0)] + 0x8000;
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[MD];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] |= VACC[i].s[MD] >> 15;
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] &= VACC[i].s[HI] >> 15;
|
||||
SV[i] = VR[vt][(0x8 & 0x7) + (i & 0x0)];
|
||||
do_mulu(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMULU1w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0x9 & 07) + (i & 0x0)] + 0x8000;
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[MD];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] |= VACC[i].s[MD] >> 15;
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] &= VACC[i].s[HI] >> 15;
|
||||
SV[i] = VR[vt][(0x9 & 0x7) + (i & 0x0)];
|
||||
do_mulu(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMULU2w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0xA & 07) + (i & 0x0)] + 0x8000;
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[MD];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] |= VACC[i].s[MD] >> 15;
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] &= VACC[i].s[HI] >> 15;
|
||||
SV[i] = VR[vt][(0xA & 0x7) + (i & 0x0)];
|
||||
do_mulu(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMULU3w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0xB & 07) + (i & 0x0)] + 0x8000;
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[MD];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] |= VACC[i].s[MD] >> 15;
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] &= VACC[i].s[HI] >> 15;
|
||||
SV[i] = VR[vt][(0xB & 0x7) + (i & 0x0)];
|
||||
do_mulu(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMULU4w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0xC & 07) + (i & 0x0)] + 0x8000;
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[MD];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] |= VACC[i].s[MD] >> 15;
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] &= VACC[i].s[HI] >> 15;
|
||||
SV[i] = VR[vt][(0xC & 0x7) + (i & 0x0)];
|
||||
do_mulu(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMULU5w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0xD & 07) + (i & 0x0)] + 0x8000;
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[MD];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] |= VACC[i].s[MD] >> 15;
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] &= VACC[i].s[HI] >> 15;
|
||||
SV[i] = VR[vt][(0xD & 0x7) + (i & 0x0)];
|
||||
do_mulu(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMULU6w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0xE & 07) + (i & 0x0)] + 0x8000;
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[MD];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] |= VACC[i].s[MD] >> 15;
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] &= VACC[i].s[HI] >> 15;
|
||||
SV[i] = VR[vt][(0xE & 0x7) + (i & 0x0)];
|
||||
do_mulu(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
static void VMULU7w(void)
|
||||
{
|
||||
short SV[N];
|
||||
register int i;
|
||||
const int vd = inst.R.sa;
|
||||
const int vs = inst.R.rd;
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0xF & 07) + (i & 0x0)] + 0x8000;
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[MD];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] |= VACC[i].s[MD] >> 15;
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] &= VACC[i].s[HI] >> 15;
|
||||
SV[i] = VR[vt][(0xF & 0x7) + (i & 0x0)];
|
||||
do_mulu(VR[vd], VR[vs], SV);
|
||||
return;
|
||||
}
|
||||
|
71
vu/vnand.h
71
vu/vnand.h
@ -1,16 +1,5 @@
|
||||
#include "vu.h"
|
||||
|
||||
static void VNAND(int vd, int vs, int vt, int e)
|
||||
{
|
||||
register int i;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
ACC_R(i) = ~(VR[vs][i] & VR_T(i));
|
||||
for (i = 0; i < N; i++)
|
||||
ACC_W(i) = ACC_R(i);
|
||||
return;
|
||||
}
|
||||
|
||||
static void VNAND_v(void)
|
||||
{
|
||||
register int i;
|
||||
@ -19,9 +8,9 @@ static void VNAND_v(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = ~(VR[vs][i] & VR[vt][i]);
|
||||
ACC_L(i) = ~(VR[vs][i] & VR[vt][i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNAND0q(void)
|
||||
@ -32,9 +21,9 @@ static void VNAND0q(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = ~(VR[vs][i] & VR[vt][(0x2 & 01) + (i & 0xE)]);
|
||||
ACC_L(i) = ~(VR[vs][i] & VR[vt][(0x2 & 01) + (i & 0xE)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNAND1q(void)
|
||||
@ -45,9 +34,9 @@ static void VNAND1q(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = ~(VR[vs][i] & VR[vt][(0x3 & 01) + (i & 0xE)]);
|
||||
ACC_L(i) = ~(VR[vs][i] & VR[vt][(0x3 & 01) + (i & 0xE)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNAND0h(void)
|
||||
@ -58,9 +47,9 @@ static void VNAND0h(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = ~(VR[vs][i] & VR[vt][(0x4 & 03) + (i & 0xC)]);
|
||||
ACC_L(i) = ~(VR[vs][i] & VR[vt][(0x4 & 03) + (i & 0xC)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNAND1h(void)
|
||||
@ -71,9 +60,9 @@ static void VNAND1h(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = ~(VR[vs][i] & VR[vt][(0x5 & 03) + (i & 0xC)]);
|
||||
ACC_L(i) = ~(VR[vs][i] & VR[vt][(0x5 & 03) + (i & 0xC)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNAND2h(void)
|
||||
@ -84,9 +73,9 @@ static void VNAND2h(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = ~(VR[vs][i] & VR[vt][(0x6 & 03) + (i & 0xC)]);
|
||||
ACC_L(i) = ~(VR[vs][i] & VR[vt][(0x6 & 03) + (i & 0xC)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNAND3h(void)
|
||||
@ -97,9 +86,9 @@ static void VNAND3h(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = ~(VR[vs][i] & VR[vt][(0x7 & 03) + (i & 0xC)]);
|
||||
ACC_L(i) = ~(VR[vs][i] & VR[vt][(0x7 & 03) + (i & 0xC)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNAND0w(void)
|
||||
@ -110,9 +99,9 @@ static void VNAND0w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = ~(VR[vs][i] & VR[vt][(0x8 & 07) + (i & 0x0)]);
|
||||
ACC_L(i) = ~(VR[vs][i] & VR[vt][(0x8 & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNAND1w(void)
|
||||
@ -123,9 +112,9 @@ static void VNAND1w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = ~(VR[vs][i] & VR[vt][(0x9 & 07) + (i & 0x0)]);
|
||||
ACC_L(i) = ~(VR[vs][i] & VR[vt][(0x9 & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNAND2w(void)
|
||||
@ -136,9 +125,9 @@ static void VNAND2w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = ~(VR[vs][i] & VR[vt][(0xA & 07) + (i & 0x0)]);
|
||||
ACC_L(i) = ~(VR[vs][i] & VR[vt][(0xA & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNAND3w(void)
|
||||
@ -149,9 +138,9 @@ static void VNAND3w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = ~(VR[vs][i] & VR[vt][(0xB & 07) + (i & 0x0)]);
|
||||
ACC_L(i) = ~(VR[vs][i] & VR[vt][(0xB & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNAND4w(void)
|
||||
@ -162,9 +151,9 @@ static void VNAND4w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = ~(VR[vs][i] & VR[vt][(0xC & 07) + (i & 0x0)]);
|
||||
ACC_L(i) = ~(VR[vs][i] & VR[vt][(0xC & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNAND5w(void)
|
||||
@ -175,9 +164,9 @@ static void VNAND5w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = ~(VR[vs][i] & VR[vt][(0xD & 07) + (i & 0x0)]);
|
||||
ACC_L(i) = ~(VR[vs][i] & VR[vt][(0xD & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNAND6w(void)
|
||||
@ -188,9 +177,9 @@ static void VNAND6w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = ~(VR[vs][i] & VR[vt][(0xE & 07) + (i & 0x0)]);
|
||||
ACC_L(i) = ~(VR[vs][i] & VR[vt][(0xE & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNAND7w(void)
|
||||
@ -201,8 +190,8 @@ static void VNAND7w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = ~(VR[vs][i] & VR[vt][(0xF & 07) + (i & 0x0)]);
|
||||
ACC_L(i) = ~(VR[vs][i] & VR[vt][(0xF & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
|
58
vu/vne.h
58
vu/vne.h
@ -1,29 +1,5 @@
|
||||
#include "vu.h"
|
||||
|
||||
static void VNE(int vd, int vs, int vt, int e)
|
||||
{
|
||||
int ne; /* not equal or, unless !(NOTEQUAL), equal */
|
||||
register unsigned char VCO_VCE;
|
||||
register int i;
|
||||
|
||||
VCC = 0x0000;
|
||||
VCO_VCE = ~(unsigned char)(VCO >> 8);
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
const signed short VS = VR[vs][i];
|
||||
const signed short VT = VR_T(i);
|
||||
|
||||
ne = (~VCO_VCE >> i) & 0x01;
|
||||
ne |= (VS != VT);
|
||||
VCC |= ne <<= i;
|
||||
ACC_R(i) = VS; /* More accurately, `ACC_R(i) = ne ? VS : VT`. */
|
||||
}
|
||||
for (i = 0; i < N; i++)
|
||||
ACC_W(i) = ACC_R(i);
|
||||
VCO = 0x0000;
|
||||
return;
|
||||
}
|
||||
|
||||
void do_ne(int vs)
|
||||
{
|
||||
int ne[8];
|
||||
@ -41,10 +17,10 @@ void do_ne(int vs)
|
||||
VCC |= 0 << (i + 0x8);
|
||||
#if (0)
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = ne[i] ? VR[vs][i] : VC[i]; /* correct but redundant */
|
||||
ACC_L(i) = ne[i] ? VR[vs][i] : VC[i]; /* correct but redundant */
|
||||
#else
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vs][i];
|
||||
ACC_L(i) = VR[vs][i];
|
||||
#endif
|
||||
return;
|
||||
}
|
||||
@ -60,7 +36,7 @@ static void VNE_v(void)
|
||||
VC[i] = VR[vt][i];
|
||||
do_ne(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNE0q(void)
|
||||
@ -74,7 +50,7 @@ static void VNE0q(void)
|
||||
VC[i] = VR[vt][(0x2 & 01) + (i & 0xE)];
|
||||
do_ne(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNE1q(void)
|
||||
@ -88,7 +64,7 @@ static void VNE1q(void)
|
||||
VC[i] = VR[vt][(0x3 & 01) + (i & 0xE)];
|
||||
do_ne(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNE0h(void)
|
||||
@ -102,7 +78,7 @@ static void VNE0h(void)
|
||||
VC[i] = VR[vt][(0x4 & 03) + (i & 0xC)];
|
||||
do_ne(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNE1h(void)
|
||||
@ -116,7 +92,7 @@ static void VNE1h(void)
|
||||
VC[i] = VR[vt][(0x5 & 03) + (i & 0xC)];
|
||||
do_ne(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNE2h(void)
|
||||
@ -130,7 +106,7 @@ static void VNE2h(void)
|
||||
VC[i] = VR[vt][(0x6 & 03) + (i & 0xC)];
|
||||
do_ne(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNE3h(void)
|
||||
@ -144,7 +120,7 @@ static void VNE3h(void)
|
||||
VC[i] = VR[vt][(0x7 & 03) + (i & 0xC)];
|
||||
do_ne(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNE0w(void)
|
||||
@ -158,7 +134,7 @@ static void VNE0w(void)
|
||||
VC[i] = VR[vt][(0x8 & 07) + (i & 0x0)];
|
||||
do_ne(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNE1w(void)
|
||||
@ -172,7 +148,7 @@ static void VNE1w(void)
|
||||
VC[i] = VR[vt][(0x9 & 07) + (i & 0x0)];
|
||||
do_ne(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNE2w(void)
|
||||
@ -186,7 +162,7 @@ static void VNE2w(void)
|
||||
VC[i] = VR[vt][(0xA & 07) + (i & 0x0)];
|
||||
do_ne(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNE3w(void)
|
||||
@ -200,7 +176,7 @@ static void VNE3w(void)
|
||||
VC[i] = VR[vt][(0xB & 07) + (i & 0x0)];
|
||||
do_ne(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNE4w(void)
|
||||
@ -214,7 +190,7 @@ static void VNE4w(void)
|
||||
VC[i] = VR[vt][(0xC & 07) + (i & 0x0)];
|
||||
do_ne(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNE5w(void)
|
||||
@ -228,7 +204,7 @@ static void VNE5w(void)
|
||||
VC[i] = VR[vt][(0xD & 07) + (i & 0x0)];
|
||||
do_ne(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNE6w(void)
|
||||
@ -242,7 +218,7 @@ static void VNE6w(void)
|
||||
VC[i] = VR[vt][(0xE & 07) + (i & 0x0)];
|
||||
do_ne(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNE7w(void)
|
||||
@ -256,6 +232,6 @@ static void VNE7w(void)
|
||||
VC[i] = VR[vt][(0xF & 07) + (i & 0x0)];
|
||||
do_ne(vs);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
|
71
vu/vnor.h
71
vu/vnor.h
@ -1,16 +1,5 @@
|
||||
#include "vu.h"
|
||||
|
||||
static void VNOR(int vd, int vs, int vt, int e)
|
||||
{
|
||||
register int i;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
ACC_R(i) = ~(VR[vs][i] | VR_T(i));
|
||||
for (i = 0; i < N; i++)
|
||||
ACC_W(i) = ACC_R(i);
|
||||
return;
|
||||
}
|
||||
|
||||
static void VNOR_v(void)
|
||||
{
|
||||
register int i;
|
||||
@ -19,9 +8,9 @@ static void VNOR_v(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = ~(VR[vs][i] | VR[vt][i]);
|
||||
ACC_L(i) = ~(VR[vs][i] | VR[vt][i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNOR0q(void)
|
||||
@ -32,9 +21,9 @@ static void VNOR0q(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = ~(VR[vs][i] | VR[vt][(0x2 & 01) + (i & 0xE)]);
|
||||
ACC_L(i) = ~(VR[vs][i] | VR[vt][(0x2 & 01) + (i & 0xE)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNOR1q(void)
|
||||
@ -45,9 +34,9 @@ static void VNOR1q(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = ~(VR[vs][i] | VR[vt][(0x3 & 01) + (i & 0xE)]);
|
||||
ACC_L(i) = ~(VR[vs][i] | VR[vt][(0x3 & 01) + (i & 0xE)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNOR0h(void)
|
||||
@ -58,9 +47,9 @@ static void VNOR0h(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = ~(VR[vs][i] | VR[vt][(0x4 & 03) + (i & 0xC)]);
|
||||
ACC_L(i) = ~(VR[vs][i] | VR[vt][(0x4 & 03) + (i & 0xC)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNOR1h(void)
|
||||
@ -71,9 +60,9 @@ static void VNOR1h(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = ~(VR[vs][i] | VR[vt][(0x5 & 03) + (i & 0xC)]);
|
||||
ACC_L(i) = ~(VR[vs][i] | VR[vt][(0x5 & 03) + (i & 0xC)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNOR2h(void)
|
||||
@ -84,9 +73,9 @@ static void VNOR2h(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = ~(VR[vs][i] | VR[vt][(0x6 & 03) + (i & 0xC)]);
|
||||
ACC_L(i) = ~(VR[vs][i] | VR[vt][(0x6 & 03) + (i & 0xC)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNOR3h(void)
|
||||
@ -97,9 +86,9 @@ static void VNOR3h(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = ~(VR[vs][i] | VR[vt][(0x7 & 03) + (i & 0xC)]);
|
||||
ACC_L(i) = ~(VR[vs][i] | VR[vt][(0x7 & 03) + (i & 0xC)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNOR0w(void)
|
||||
@ -110,9 +99,9 @@ static void VNOR0w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = ~(VR[vs][i] | VR[vt][(0x8 & 07) + (i & 0x0)]);
|
||||
ACC_L(i) = ~(VR[vs][i] | VR[vt][(0x8 & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNOR1w(void)
|
||||
@ -123,9 +112,9 @@ static void VNOR1w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = ~(VR[vs][i] | VR[vt][(0x9 & 07) + (i & 0x0)]);
|
||||
ACC_L(i) = ~(VR[vs][i] | VR[vt][(0x9 & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNOR2w(void)
|
||||
@ -136,9 +125,9 @@ static void VNOR2w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = ~(VR[vs][i] | VR[vt][(0xA & 07) + (i & 0x0)]);
|
||||
ACC_L(i) = ~(VR[vs][i] | VR[vt][(0xA & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNOR3w(void)
|
||||
@ -149,9 +138,9 @@ static void VNOR3w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = ~(VR[vs][i] | VR[vt][(0xB & 07) + (i & 0x0)]);
|
||||
ACC_L(i) = ~(VR[vs][i] | VR[vt][(0xB & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNOR4w(void)
|
||||
@ -162,9 +151,9 @@ static void VNOR4w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = ~(VR[vs][i] | VR[vt][(0xC & 07) + (i & 0x0)]);
|
||||
ACC_L(i) = ~(VR[vs][i] | VR[vt][(0xC & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNOR5w(void)
|
||||
@ -175,9 +164,9 @@ static void VNOR5w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = ~(VR[vs][i] | VR[vt][(0xD & 07) + (i & 0x0)]);
|
||||
ACC_L(i) = ~(VR[vs][i] | VR[vt][(0xD & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNOR6w(void)
|
||||
@ -188,9 +177,9 @@ static void VNOR6w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = ~(VR[vs][i] | VR[vt][(0xE & 07) + (i & 0x0)]);
|
||||
ACC_L(i) = ~(VR[vs][i] | VR[vt][(0xE & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNOR7w(void)
|
||||
@ -201,8 +190,8 @@ static void VNOR7w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = ~(VR[vs][i] | VR[vt][(0xF & 07) + (i & 0x0)]);
|
||||
ACC_L(i) = ~(VR[vs][i] | VR[vt][(0xF & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
|
71
vu/vnxor.h
71
vu/vnxor.h
@ -1,16 +1,5 @@
|
||||
#include "vu.h"
|
||||
|
||||
static void VNXOR(int vd, int vs, int vt, int e)
|
||||
{
|
||||
register int i;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
ACC_R(i) = ~(VR[vs][i] ^ VR_T(i));
|
||||
for (i = 0; i < N; i++)
|
||||
ACC_W(i) = ACC_R(i);
|
||||
return;
|
||||
}
|
||||
|
||||
static void VNXOR_v(void)
|
||||
{
|
||||
register int i;
|
||||
@ -19,9 +8,9 @@ static void VNXOR_v(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = ~(VR[vs][i] ^ VR[vt][i]);
|
||||
ACC_L(i) = ~(VR[vs][i] ^ VR[vt][i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNXOR0q(void)
|
||||
@ -32,9 +21,9 @@ static void VNXOR0q(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = ~(VR[vs][i] ^ VR[vt][(0x2 & 01) + (i & 0xE)]);
|
||||
ACC_L(i) = ~(VR[vs][i] ^ VR[vt][(0x2 & 01) + (i & 0xE)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNXOR1q(void)
|
||||
@ -45,9 +34,9 @@ static void VNXOR1q(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = ~(VR[vs][i] ^ VR[vt][(0x3 & 01) + (i & 0xE)]);
|
||||
ACC_L(i) = ~(VR[vs][i] ^ VR[vt][(0x3 & 01) + (i & 0xE)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNXOR0h(void)
|
||||
@ -58,9 +47,9 @@ static void VNXOR0h(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = ~(VR[vs][i] ^ VR[vt][(0x4 & 03) + (i & 0xC)]);
|
||||
ACC_L(i) = ~(VR[vs][i] ^ VR[vt][(0x4 & 03) + (i & 0xC)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNXOR1h(void)
|
||||
@ -71,9 +60,9 @@ static void VNXOR1h(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = ~(VR[vs][i] ^ VR[vt][(0x5 & 03) + (i & 0xC)]);
|
||||
ACC_L(i) = ~(VR[vs][i] ^ VR[vt][(0x5 & 03) + (i & 0xC)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNXOR2h(void)
|
||||
@ -84,9 +73,9 @@ static void VNXOR2h(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = ~(VR[vs][i] ^ VR[vt][(0x6 & 03) + (i & 0xC)]);
|
||||
ACC_L(i) = ~(VR[vs][i] ^ VR[vt][(0x6 & 03) + (i & 0xC)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNXOR3h(void)
|
||||
@ -97,9 +86,9 @@ static void VNXOR3h(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = ~(VR[vs][i] ^ VR[vt][(0x7 & 03) + (i & 0xC)]);
|
||||
ACC_L(i) = ~(VR[vs][i] ^ VR[vt][(0x7 & 03) + (i & 0xC)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNXOR0w(void)
|
||||
@ -110,9 +99,9 @@ static void VNXOR0w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = ~(VR[vs][i] ^ VR[vt][(0x8 & 07) + (i & 0x0)]);
|
||||
ACC_L(i) = ~(VR[vs][i] ^ VR[vt][(0x8 & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNXOR1w(void)
|
||||
@ -123,9 +112,9 @@ static void VNXOR1w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = ~(VR[vs][i] ^ VR[vt][(0x9 & 07) + (i & 0x0)]);
|
||||
ACC_L(i) = ~(VR[vs][i] ^ VR[vt][(0x9 & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNXOR2w(void)
|
||||
@ -136,9 +125,9 @@ static void VNXOR2w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = ~(VR[vs][i] ^ VR[vt][(0xA & 07) + (i & 0x0)]);
|
||||
ACC_L(i) = ~(VR[vs][i] ^ VR[vt][(0xA & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNXOR3w(void)
|
||||
@ -149,9 +138,9 @@ static void VNXOR3w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = ~(VR[vs][i] ^ VR[vt][(0xB & 07) + (i & 0x0)]);
|
||||
ACC_L(i) = ~(VR[vs][i] ^ VR[vt][(0xB & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNXOR4w(void)
|
||||
@ -162,9 +151,9 @@ static void VNXOR4w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = ~(VR[vs][i] ^ VR[vt][(0xC & 07) + (i & 0x0)]);
|
||||
ACC_L(i) = ~(VR[vs][i] ^ VR[vt][(0xC & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNXOR5w(void)
|
||||
@ -175,9 +164,9 @@ static void VNXOR5w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = ~(VR[vs][i] ^ VR[vt][(0xD & 07) + (i & 0x0)]);
|
||||
ACC_L(i) = ~(VR[vs][i] ^ VR[vt][(0xD & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNXOR6w(void)
|
||||
@ -188,9 +177,9 @@ static void VNXOR6w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = ~(VR[vs][i] ^ VR[vt][(0xE & 07) + (i & 0x0)]);
|
||||
ACC_L(i) = ~(VR[vs][i] ^ VR[vt][(0xE & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VNXOR7w(void)
|
||||
@ -201,8 +190,8 @@ static void VNXOR7w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = ~(VR[vs][i] ^ VR[vt][(0xF & 07) + (i & 0x0)]);
|
||||
ACC_L(i) = ~(VR[vs][i] ^ VR[vt][(0xF & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
|
71
vu/vor.h
71
vu/vor.h
@ -1,16 +1,5 @@
|
||||
#include "vu.h"
|
||||
|
||||
static void VOR(int vd, int vs, int vt, int e)
|
||||
{
|
||||
register int i;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
ACC_R(i) = VR[vs][i] | VR_T(i);
|
||||
for (i = 0; i < N; i++)
|
||||
ACC_W(i) = ACC_R(i);
|
||||
return;
|
||||
}
|
||||
|
||||
static void VOR_v(void)
|
||||
{
|
||||
register int i;
|
||||
@ -19,9 +8,9 @@ static void VOR_v(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vs][i] | VR[vt][i];
|
||||
ACC_L(i) = VR[vs][i] | VR[vt][i];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VOR0q(void)
|
||||
@ -32,9 +21,9 @@ static void VOR0q(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vs][i] | VR[vt][(0x2 & 01) + (i & 0xE)];
|
||||
ACC_L(i) = VR[vs][i] | VR[vt][(0x2 & 01) + (i & 0xE)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VOR1q(void)
|
||||
@ -45,9 +34,9 @@ static void VOR1q(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vs][i] | VR[vt][(0x3 & 01) + (i & 0xE)];
|
||||
ACC_L(i) = VR[vs][i] | VR[vt][(0x3 & 01) + (i & 0xE)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VOR0h(void)
|
||||
@ -58,9 +47,9 @@ static void VOR0h(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vs][i] | VR[vt][(0x4 & 03) + (i & 0xC)];
|
||||
ACC_L(i) = VR[vs][i] | VR[vt][(0x4 & 03) + (i & 0xC)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VOR1h(void)
|
||||
@ -71,9 +60,9 @@ static void VOR1h(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vs][i] | VR[vt][(0x5 & 03) + (i & 0xC)];
|
||||
ACC_L(i) = VR[vs][i] | VR[vt][(0x5 & 03) + (i & 0xC)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VOR2h(void)
|
||||
@ -84,9 +73,9 @@ static void VOR2h(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vs][i] | VR[vt][(0x6 & 03) + (i & 0xC)];
|
||||
ACC_L(i) = VR[vs][i] | VR[vt][(0x6 & 03) + (i & 0xC)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VOR3h(void)
|
||||
@ -97,9 +86,9 @@ static void VOR3h(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vs][i] | VR[vt][(0x7 & 03) + (i & 0xC)];
|
||||
ACC_L(i) = VR[vs][i] | VR[vt][(0x7 & 03) + (i & 0xC)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VOR0w(void)
|
||||
@ -110,9 +99,9 @@ static void VOR0w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vs][i] | VR[vt][(0x8 & 07) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vs][i] | VR[vt][(0x8 & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VOR1w(void)
|
||||
@ -123,9 +112,9 @@ static void VOR1w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vs][i] | VR[vt][(0x9 & 07) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vs][i] | VR[vt][(0x9 & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VOR2w(void)
|
||||
@ -136,9 +125,9 @@ static void VOR2w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vs][i] | VR[vt][(0xA & 07) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vs][i] | VR[vt][(0xA & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VOR3w(void)
|
||||
@ -149,9 +138,9 @@ static void VOR3w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vs][i] | VR[vt][(0xB & 07) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vs][i] | VR[vt][(0xB & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VOR4w(void)
|
||||
@ -162,9 +151,9 @@ static void VOR4w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vs][i] | VR[vt][(0xC & 07) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vs][i] | VR[vt][(0xC & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VOR5w(void)
|
||||
@ -175,9 +164,9 @@ static void VOR5w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vs][i] | VR[vt][(0xD & 07) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vs][i] | VR[vt][(0xD & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VOR6w(void)
|
||||
@ -188,9 +177,9 @@ static void VOR6w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vs][i] | VR[vt][(0xE & 07) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vs][i] | VR[vt][(0xE & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VOR7w(void)
|
||||
@ -201,8 +190,8 @@ static void VOR7w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vs][i] | VR[vt][(0xF & 07) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vs][i] | VR[vt][(0xF & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
|
69
vu/vrcp.h
69
vu/vrcp.h
@ -1,43 +1,6 @@
|
||||
#include "vu.h"
|
||||
#include "divrom.h"
|
||||
|
||||
static void VRCP(int vd, int de, int vt, int e)
|
||||
{
|
||||
unsigned int addr;
|
||||
int data;
|
||||
int fetch;
|
||||
int shift = 32;
|
||||
|
||||
DivIn = (int)VR[vt][e & 07];
|
||||
data = DivIn;
|
||||
if (data < 0)
|
||||
data = -data;
|
||||
do
|
||||
{
|
||||
--shift;
|
||||
if (data & (1 << shift))
|
||||
goto FOUND_MSB;
|
||||
} while (shift); /* while (shift > 0) or ((shift ^ 31) < 32) */
|
||||
shift = 16 ^ 31; /* No bits found in (data == 0x00000000), so shift = 16. */
|
||||
FOUND_MSB:
|
||||
shift ^= 31; /* Right-to-left shift direction conversion. */
|
||||
addr = (data << shift) >> 22;
|
||||
fetch = div_ROM[addr &= 0x000001FF];
|
||||
shift ^= 31; /* Flipped shift direction back to right-. */
|
||||
DivOut = (0x40000000 | (fetch << 14)) >> shift;
|
||||
if (DivIn < 0)
|
||||
DivOut = ~DivOut;
|
||||
else if (DivIn == 0) /* corner case: overflow via division by zero */
|
||||
DivOut = 0x7FFFFFFF;
|
||||
else if (DivIn == -32768) /* corner case: signed underflow barrier */
|
||||
DivOut = 0xFFFF0000;
|
||||
for (addr = 0; addr < N; addr++)
|
||||
VACC[addr].s[LO] = VR_T(addr);
|
||||
VR_D(de &= 07) = (short)DivOut;
|
||||
DPH = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
void do_rcp(int data)
|
||||
{
|
||||
unsigned int addr;
|
||||
@ -78,7 +41,7 @@ static void VRCPv0(void)
|
||||
DivIn = (int)VR[vt][00];
|
||||
do_rcp(DivIn);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0x0 & 0x0) + (i & 0x7)];
|
||||
ACC_L(i) = VR[vt][(0x0 & 0x0) + (i & 0x7)];
|
||||
VR[vd][de] = (short)DivOut;
|
||||
DPH = 0;
|
||||
return;
|
||||
@ -93,7 +56,7 @@ static void VRCPv1(void)
|
||||
DivIn = (int)VR[vt][01];
|
||||
do_rcp(DivIn);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0x1 & 0x0) + (i & 0x7)];
|
||||
ACC_L(i) = VR[vt][(0x1 & 0x0) + (i & 0x7)];
|
||||
VR[vd][de] = (short)DivOut;
|
||||
DPH = 0;
|
||||
return;
|
||||
@ -108,7 +71,7 @@ static void VRCP0q(void)
|
||||
DivIn = (int)VR[vt][02];
|
||||
do_rcp(DivIn);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0x2 & 0x1) + (i & 0xE)];
|
||||
ACC_L(i) = VR[vt][(0x2 & 0x1) + (i & 0xE)];
|
||||
VR[vd][de] = (short)DivOut;
|
||||
DPH = 0;
|
||||
return;
|
||||
@ -123,7 +86,7 @@ static void VRCP1q(void)
|
||||
DivIn = (int)VR[vt][03];
|
||||
do_rcp(DivIn);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0x3 & 0x1) + (i & 0xE)];
|
||||
ACC_L(i) = VR[vt][(0x3 & 0x1) + (i & 0xE)];
|
||||
VR[vd][de] = (short)DivOut;
|
||||
DPH = 0;
|
||||
return;
|
||||
@ -138,7 +101,7 @@ static void VRCP0h(void)
|
||||
DivIn = (int)VR[vt][04];
|
||||
do_rcp(DivIn);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0x4 & 0x3) + (i & 0xC)];
|
||||
ACC_L(i) = VR[vt][(0x4 & 0x3) + (i & 0xC)];
|
||||
VR[vd][de] = (short)DivOut;
|
||||
DPH = 0;
|
||||
return;
|
||||
@ -153,7 +116,7 @@ static void VRCP1h(void)
|
||||
DivIn = (int)VR[vt][05];
|
||||
do_rcp(DivIn);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0x5 & 0x3) + (i & 0xC)];
|
||||
ACC_L(i) = VR[vt][(0x5 & 0x3) + (i & 0xC)];
|
||||
VR[vd][de] = (short)DivOut;
|
||||
DPH = 0;
|
||||
return;
|
||||
@ -168,7 +131,7 @@ static void VRCP2h(void)
|
||||
DivIn = (int)VR[vt][06];
|
||||
do_rcp(DivIn);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0x6 & 0x3) + (i & 0xC)];
|
||||
ACC_L(i) = VR[vt][(0x6 & 0x3) + (i & 0xC)];
|
||||
VR[vd][de] = (short)DivOut;
|
||||
DPH = 0;
|
||||
return;
|
||||
@ -183,7 +146,7 @@ static void VRCP3h(void)
|
||||
DivIn = (int)VR[vt][07];
|
||||
do_rcp(DivIn);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0x7 & 0x3) + (i & 0xC)];
|
||||
ACC_L(i) = VR[vt][(0x7 & 0x3) + (i & 0xC)];
|
||||
VR[vd][de] = (short)DivOut;
|
||||
DPH = 0;
|
||||
return;
|
||||
@ -198,7 +161,7 @@ static void VRCP0w(void)
|
||||
DivIn = (int)VR[vt][00];
|
||||
do_rcp(DivIn);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0x8 & 0x7) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vt][(0x8 & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = (short)DivOut;
|
||||
DPH = 0;
|
||||
return;
|
||||
@ -213,7 +176,7 @@ static void VRCP1w(void)
|
||||
DivIn = (int)VR[vt][01];
|
||||
do_rcp(DivIn);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0x9 & 0x7) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vt][(0x9 & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = (short)DivOut;
|
||||
DPH = 0;
|
||||
return;
|
||||
@ -228,7 +191,7 @@ static void VRCP2w(void)
|
||||
DivIn = (int)VR[vt][02];
|
||||
do_rcp(DivIn);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0xA & 0x7) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vt][(0xA & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = (short)DivOut;
|
||||
DPH = 0;
|
||||
return;
|
||||
@ -243,7 +206,7 @@ static void VRCP3w(void)
|
||||
DivIn = (int)VR[vt][03];
|
||||
do_rcp(DivIn);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0xB & 0x7) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vt][(0xB & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = (short)DivOut;
|
||||
DPH = 0;
|
||||
return;
|
||||
@ -258,7 +221,7 @@ static void VRCP4w(void)
|
||||
DivIn = (int)VR[vt][04];
|
||||
do_rcp(DivIn);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0xC & 0x7) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vt][(0xC & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = (short)DivOut;
|
||||
DPH = 0;
|
||||
return;
|
||||
@ -273,7 +236,7 @@ static void VRCP5w(void)
|
||||
DivIn = (int)VR[vt][05];
|
||||
do_rcp(DivIn);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0xD & 0x7) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vt][(0xD & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = (short)DivOut;
|
||||
DPH = 0;
|
||||
return;
|
||||
@ -288,7 +251,7 @@ static void VRCP6w(void)
|
||||
DivIn = (int)VR[vt][06];
|
||||
do_rcp(DivIn);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0xE & 0x7) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vt][(0xE & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = (short)DivOut;
|
||||
DPH = 0;
|
||||
return;
|
||||
@ -303,7 +266,7 @@ static void VRCP7w(void)
|
||||
DivIn = (int)VR[vt][07];
|
||||
do_rcp(DivIn);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0xF & 0x7) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vt][(0xF & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = (short)DivOut;
|
||||
DPH = 0;
|
||||
return;
|
||||
|
44
vu/vrcph.h
44
vu/vrcph.h
@ -1,18 +1,6 @@
|
||||
#include "vu.h"
|
||||
#include "divrom.h"
|
||||
|
||||
static void VRCPH(int vd, int de, int vt, int e)
|
||||
{
|
||||
register int i;
|
||||
|
||||
DivIn = VR[vt][e & 07] << 16;
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR_T(i);
|
||||
VR_D(de &= 07) = DivOut >> 16; /* store high part */
|
||||
DPH = 1;
|
||||
return;
|
||||
}
|
||||
|
||||
static void VRCPHv0(void)
|
||||
{
|
||||
register int i;
|
||||
@ -22,7 +10,7 @@ static void VRCPHv0(void)
|
||||
|
||||
DivIn = VR[vt][00] << 16;
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0x0 & 0x0) + (i & 0x7)];
|
||||
ACC_L(i) = VR[vt][(0x0 & 0x0) + (i & 0x7)];
|
||||
VR[vd][de] = DivOut >> 16;
|
||||
DPH = 1;
|
||||
return;
|
||||
@ -36,7 +24,7 @@ static void VRCPHv1(void)
|
||||
|
||||
DivIn = VR[vt][01] << 16;
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0x1 & 0x0) + (i & 0x7)];
|
||||
ACC_L(i) = VR[vt][(0x1 & 0x0) + (i & 0x7)];
|
||||
VR[vd][de] = DivOut >> 16;
|
||||
DPH = 1;
|
||||
return;
|
||||
@ -50,7 +38,7 @@ static void VRCPH0q(void)
|
||||
|
||||
DivIn = VR[vt][02] << 16;
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0x2 & 0x1) + (i & 0xE)];
|
||||
ACC_L(i) = VR[vt][(0x2 & 0x1) + (i & 0xE)];
|
||||
VR[vd][de] = DivOut >> 16;
|
||||
DPH = 1;
|
||||
return;
|
||||
@ -64,7 +52,7 @@ static void VRCPH1q(void)
|
||||
|
||||
DivIn = VR[vt][03] << 16;
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0x3 & 0x1) + (i & 0xE)];
|
||||
ACC_L(i) = VR[vt][(0x3 & 0x1) + (i & 0xE)];
|
||||
VR[vd][de] = DivOut >> 16;
|
||||
DPH = 1;
|
||||
return;
|
||||
@ -78,7 +66,7 @@ static void VRCPH0h(void)
|
||||
|
||||
DivIn = VR[vt][04] << 16;
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0x4 & 0x3) + (i & 0xC)];
|
||||
ACC_L(i) = VR[vt][(0x4 & 0x3) + (i & 0xC)];
|
||||
VR[vd][de] = DivOut >> 16;
|
||||
DPH = 1;
|
||||
return;
|
||||
@ -92,7 +80,7 @@ static void VRCPH1h(void)
|
||||
|
||||
DivIn = VR[vt][05] << 16;
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0x5 & 0x3) + (i & 0xC)];
|
||||
ACC_L(i) = VR[vt][(0x5 & 0x3) + (i & 0xC)];
|
||||
VR[vd][de] = DivOut >> 16;
|
||||
DPH = 1;
|
||||
return;
|
||||
@ -106,7 +94,7 @@ static void VRCPH2h(void)
|
||||
|
||||
DivIn = VR[vt][06] << 16;
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0x6 & 0x3) + (i & 0xC)];
|
||||
ACC_L(i) = VR[vt][(0x6 & 0x3) + (i & 0xC)];
|
||||
VR[vd][de] = DivOut >> 16;
|
||||
DPH = 1;
|
||||
return;
|
||||
@ -120,7 +108,7 @@ static void VRCPH3h(void)
|
||||
|
||||
DivIn = VR[vt][07] << 16;
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0x7 & 0x3) + (i & 0xC)];
|
||||
ACC_L(i) = VR[vt][(0x7 & 0x3) + (i & 0xC)];
|
||||
VR[vd][de] = DivOut >> 16;
|
||||
DPH = 1;
|
||||
return;
|
||||
@ -134,7 +122,7 @@ static void VRCPH0w(void)
|
||||
|
||||
DivIn = VR[vt][00] << 16;
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0x8 & 0x7) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vt][(0x8 & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = DivOut >> 16;
|
||||
DPH = 1;
|
||||
return;
|
||||
@ -148,7 +136,7 @@ static void VRCPH1w(void)
|
||||
|
||||
DivIn = VR[vt][01] << 16;
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0x9 & 0x7) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vt][(0x9 & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = DivOut >> 16;
|
||||
DPH = 1;
|
||||
return;
|
||||
@ -162,7 +150,7 @@ static void VRCPH2w(void)
|
||||
|
||||
DivIn = VR[vt][02] << 16;
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0xA & 0x7) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vt][(0xA & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = DivOut >> 16;
|
||||
DPH = 1;
|
||||
return;
|
||||
@ -176,7 +164,7 @@ static void VRCPH3w(void)
|
||||
|
||||
DivIn = VR[vt][03] << 16;
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0xB & 0x7) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vt][(0xB & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = DivOut >> 16;
|
||||
DPH = 1;
|
||||
return;
|
||||
@ -190,7 +178,7 @@ static void VRCPH4w(void)
|
||||
|
||||
DivIn = VR[vt][04] << 16;
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0xC & 0x7) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vt][(0xC & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = DivOut >> 16;
|
||||
DPH = 1;
|
||||
return;
|
||||
@ -204,7 +192,7 @@ static void VRCPH5w(void)
|
||||
|
||||
DivIn = VR[vt][05] << 16;
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0xD & 0x7) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vt][(0xD & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = DivOut >> 16;
|
||||
DPH = 1;
|
||||
return;
|
||||
@ -218,7 +206,7 @@ static void VRCPH6w(void)
|
||||
|
||||
DivIn = VR[vt][06] << 16;
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0xE & 0x7) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vt][(0xE & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = DivOut >> 16;
|
||||
DPH = 1;
|
||||
return;
|
||||
@ -232,7 +220,7 @@ static void VRCPH7w(void)
|
||||
|
||||
DivIn = VR[vt][07] << 16;
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0xF & 0x7) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vt][(0xF & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = DivOut >> 16;
|
||||
DPH = 1;
|
||||
return;
|
||||
|
72
vu/vrcpl.h
72
vu/vrcpl.h
@ -1,46 +1,6 @@
|
||||
#include "vu.h"
|
||||
#include "divrom.h"
|
||||
|
||||
static void VRCPL(int vd, int de, int vt, int e)
|
||||
{
|
||||
unsigned int addr;
|
||||
int data;
|
||||
int fetch;
|
||||
int shift = 32;
|
||||
|
||||
if (DPH)
|
||||
DivIn |= (unsigned short)VR[vt][e & 07];
|
||||
else
|
||||
DivIn = VR[vt][e & 07] & 0x0000FFFF; /* Do not sign-extend. */
|
||||
data = DivIn;
|
||||
if (data < 0)
|
||||
data = -data - (data < -32768); /* -(x) if >=; ~(x) if < */
|
||||
do
|
||||
{
|
||||
--shift;
|
||||
if (data & (1 << shift))
|
||||
goto FOUND_MSB;
|
||||
} while (shift); /* while (shift > 0) or ((shift ^ 31) < 32) */
|
||||
shift = 31 - 16*DPH; /* if (data == 0) shift = DPH ? 16 ^ 31 : 0 ^ 31; */
|
||||
FOUND_MSB:
|
||||
shift ^= 31; /* Right-to-left shift direction conversion. */
|
||||
addr = (data << shift) >> 22;
|
||||
fetch = div_ROM[addr &= 0x000001FF];
|
||||
shift ^= 31; /* Flipped shift direction back to right-. */
|
||||
DivOut = (0x40000000 | (fetch << 14)) >> shift;
|
||||
if (DivIn < 0)
|
||||
DivOut = ~DivOut;
|
||||
else if (DivIn == 0) /* corner case: overflow via division by zero */
|
||||
DivOut = 0x7FFFFFFF;
|
||||
else if (DivIn == -32768) /* corner case: signed underflow barrier */
|
||||
DivOut = 0xFFFF0000;
|
||||
for (addr = 0; addr < N; addr++)
|
||||
VACC[addr].s[LO] = VR_T(addr);
|
||||
VR_D(de &= 07) = (short)DivOut;
|
||||
DPH = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
void do_rcpl(int data)
|
||||
{
|
||||
unsigned int addr;
|
||||
@ -82,7 +42,7 @@ static void VRCPLv0(void)
|
||||
DivIn |= (unsigned short)VR[vt][00];
|
||||
do_rcpl(DivIn);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vd][(0x0 & 0x0) + (i & 0x7)];
|
||||
ACC_L(i) = VR[vd][(0x0 & 0x0) + (i & 0x7)];
|
||||
VR[vd][de] = (short)DivOut;
|
||||
DPH = 0;
|
||||
return;
|
||||
@ -98,7 +58,7 @@ static void VRCPLv1(void)
|
||||
DivIn |= (unsigned short)VR[vt][01];
|
||||
do_rcpl(DivIn);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vd][(0x1 & 0x0) + (i & 0x7)];
|
||||
ACC_L(i) = VR[vd][(0x1 & 0x0) + (i & 0x7)];
|
||||
VR[vd][de] = (short)DivOut;
|
||||
DPH = 0;
|
||||
return;
|
||||
@ -114,7 +74,7 @@ static void VRCPL0q(void)
|
||||
DivIn |= (unsigned short)VR[vt][02];
|
||||
do_rcpl(DivIn);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vd][(0x2 & 0x1) + (i & 0xE)];
|
||||
ACC_L(i) = VR[vd][(0x2 & 0x1) + (i & 0xE)];
|
||||
VR[vd][de] = (short)DivOut;
|
||||
DPH = 0;
|
||||
return;
|
||||
@ -130,7 +90,7 @@ static void VRCPL1q(void)
|
||||
DivIn |= (unsigned short)VR[vt][03];
|
||||
do_rcpl(DivIn);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vd][(0x3 & 0x1) + (i & 0xE)];
|
||||
ACC_L(i) = VR[vd][(0x3 & 0x1) + (i & 0xE)];
|
||||
VR[vd][de] = (short)DivOut;
|
||||
DPH = 0;
|
||||
return;
|
||||
@ -146,7 +106,7 @@ static void VRCPL0h(void)
|
||||
DivIn |= (unsigned short)VR[vt][04];
|
||||
do_rcpl(DivIn);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vd][(0x4 & 0x3) + (i & 0xC)];
|
||||
ACC_L(i) = VR[vd][(0x4 & 0x3) + (i & 0xC)];
|
||||
VR[vd][de] = (short)DivOut;
|
||||
DPH = 0;
|
||||
return;
|
||||
@ -162,7 +122,7 @@ static void VRCPL1h(void)
|
||||
DivIn |= (unsigned short)VR[vt][05];
|
||||
do_rcpl(DivIn);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vd][(0x5 & 0x3) + (i & 0xC)];
|
||||
ACC_L(i) = VR[vd][(0x5 & 0x3) + (i & 0xC)];
|
||||
VR[vd][de] = (short)DivOut;
|
||||
DPH = 0;
|
||||
return;
|
||||
@ -178,7 +138,7 @@ static void VRCPL2h(void)
|
||||
DivIn |= (unsigned short)VR[vt][06];
|
||||
do_rcpl(DivIn);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vd][(0x6 & 0x3) + (i & 0xC)];
|
||||
ACC_L(i) = VR[vd][(0x6 & 0x3) + (i & 0xC)];
|
||||
VR[vd][de] = (short)DivOut;
|
||||
DPH = 0;
|
||||
return;
|
||||
@ -194,7 +154,7 @@ static void VRCPL3h(void)
|
||||
DivIn |= (unsigned short)VR[vt][07];
|
||||
do_rcpl(DivIn);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vd][(0x7 & 0x3) + (i & 0xC)];
|
||||
ACC_L(i) = VR[vd][(0x7 & 0x3) + (i & 0xC)];
|
||||
VR[vd][de] = (short)DivOut;
|
||||
DPH = 0;
|
||||
return;
|
||||
@ -210,7 +170,7 @@ static void VRCPL0w(void)
|
||||
DivIn |= (unsigned short)VR[vt][00];
|
||||
do_rcpl(DivIn);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vd][(0x8 & 0x7) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vd][(0x8 & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = (short)DivOut;
|
||||
DPH = 0;
|
||||
return;
|
||||
@ -226,7 +186,7 @@ static void VRCPL1w(void)
|
||||
DivIn |= (unsigned short)VR[vt][01];
|
||||
do_rcpl(DivIn);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vd][(0x9 & 0x7) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vd][(0x9 & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = (short)DivOut;
|
||||
DPH = 0;
|
||||
return;
|
||||
@ -242,7 +202,7 @@ static void VRCPL2w(void)
|
||||
DivIn |= (unsigned short)VR[vt][02];
|
||||
do_rcpl(DivIn);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vd][(0xA & 0x7) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vd][(0xA & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = (short)DivOut;
|
||||
DPH = 0;
|
||||
return;
|
||||
@ -258,7 +218,7 @@ static void VRCPL3w(void)
|
||||
DivIn |= (unsigned short)VR[vt][03];
|
||||
do_rcpl(DivIn);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vd][(0xB & 0x7) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vd][(0xB & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = (short)DivOut;
|
||||
DPH = 0;
|
||||
return;
|
||||
@ -274,7 +234,7 @@ static void VRCPL4w(void)
|
||||
DivIn |= (unsigned short)VR[vt][04];
|
||||
do_rcpl(DivIn);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vd][(0xC & 0x7) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vd][(0xC & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = (short)DivOut;
|
||||
DPH = 0;
|
||||
return;
|
||||
@ -290,7 +250,7 @@ static void VRCPL5w(void)
|
||||
DivIn |= (unsigned short)VR[vt][05];
|
||||
do_rcpl(DivIn);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vd][(0xD & 0x7) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vd][(0xD & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = (short)DivOut;
|
||||
DPH = 0;
|
||||
return;
|
||||
@ -306,7 +266,7 @@ static void VRCPL6w(void)
|
||||
DivIn |= (unsigned short)VR[vt][06];
|
||||
do_rcpl(DivIn);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vd][(0xE & 0x7) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vd][(0xE & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = (short)DivOut;
|
||||
DPH = 0;
|
||||
return;
|
||||
@ -322,7 +282,7 @@ static void VRCPL7w(void)
|
||||
DivIn |= (unsigned short)VR[vt][07];
|
||||
do_rcpl(DivIn);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vd][(0xF & 0x7) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vd][(0xF & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = (short)DivOut;
|
||||
DPH = 0;
|
||||
return;
|
||||
|
44
vu/vrsqh.h
44
vu/vrsqh.h
@ -1,18 +1,6 @@
|
||||
#include "vu.h"
|
||||
#include "divrom.h"
|
||||
|
||||
static void VRSQH(int vd, int de, int vt, int e)
|
||||
{
|
||||
register int i;
|
||||
|
||||
DivIn = VR[vt][e & 07] << 16;
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR_T(i);
|
||||
VR_D(de &= 07) = DivOut >> 16; /* store high part */
|
||||
DPH = 1;
|
||||
return;
|
||||
}
|
||||
|
||||
static void VRSQHv0(void)
|
||||
{
|
||||
register int i;
|
||||
@ -22,7 +10,7 @@ static void VRSQHv0(void)
|
||||
|
||||
DivIn = VR[vt][00] << 16;
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0x0 & 0x0) + (i & 0x7)];
|
||||
ACC_L(i) = VR[vt][(0x0 & 0x0) + (i & 0x7)];
|
||||
VR[vd][de] = DivOut >> 16;
|
||||
DPH = 1;
|
||||
return;
|
||||
@ -36,7 +24,7 @@ static void VRSQHv1(void)
|
||||
|
||||
DivIn = VR[vt][01] << 16;
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0x1 & 0x0) + (i & 0x7)];
|
||||
ACC_L(i) = VR[vt][(0x1 & 0x0) + (i & 0x7)];
|
||||
VR[vd][de] = DivOut >> 16;
|
||||
DPH = 1;
|
||||
return;
|
||||
@ -50,7 +38,7 @@ static void VRSQH0q(void)
|
||||
|
||||
DivIn = VR[vt][02] << 16;
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0x2 & 0x1) + (i & 0xE)];
|
||||
ACC_L(i) = VR[vt][(0x2 & 0x1) + (i & 0xE)];
|
||||
VR[vd][de] = DivOut >> 16;
|
||||
DPH = 1;
|
||||
return;
|
||||
@ -64,7 +52,7 @@ static void VRSQH1q(void)
|
||||
|
||||
DivIn = VR[vt][03] << 16;
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0x3 & 0x1) + (i & 0xE)];
|
||||
ACC_L(i) = VR[vt][(0x3 & 0x1) + (i & 0xE)];
|
||||
VR[vd][de] = DivOut >> 16;
|
||||
DPH = 1;
|
||||
return;
|
||||
@ -78,7 +66,7 @@ static void VRSQH0h(void)
|
||||
|
||||
DivIn = VR[vt][04] << 16;
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0x4 & 0x3) + (i & 0xC)];
|
||||
ACC_L(i) = VR[vt][(0x4 & 0x3) + (i & 0xC)];
|
||||
VR[vd][de] = DivOut >> 16;
|
||||
DPH = 1;
|
||||
return;
|
||||
@ -92,7 +80,7 @@ static void VRSQH1h(void)
|
||||
|
||||
DivIn = VR[vt][05] << 16;
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0x5 & 0x3) + (i & 0xC)];
|
||||
ACC_L(i) = VR[vt][(0x5 & 0x3) + (i & 0xC)];
|
||||
VR[vd][de] = DivOut >> 16;
|
||||
DPH = 1;
|
||||
return;
|
||||
@ -106,7 +94,7 @@ static void VRSQH2h(void)
|
||||
|
||||
DivIn = VR[vt][06] << 16;
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0x6 & 0x3) + (i & 0xC)];
|
||||
ACC_L(i) = VR[vt][(0x6 & 0x3) + (i & 0xC)];
|
||||
VR[vd][de] = DivOut >> 16;
|
||||
DPH = 1;
|
||||
return;
|
||||
@ -120,7 +108,7 @@ static void VRSQH3h(void)
|
||||
|
||||
DivIn = VR[vt][07] << 16;
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0x7 & 0x3) + (i & 0xC)];
|
||||
ACC_L(i) = VR[vt][(0x7 & 0x3) + (i & 0xC)];
|
||||
VR[vd][de] = DivOut >> 16;
|
||||
DPH = 1;
|
||||
return;
|
||||
@ -134,7 +122,7 @@ static void VRSQH0w(void)
|
||||
|
||||
DivIn = VR[vt][00] << 16;
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0x8 & 0x7) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vt][(0x8 & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = DivOut >> 16;
|
||||
DPH = 1;
|
||||
return;
|
||||
@ -148,7 +136,7 @@ static void VRSQH1w(void)
|
||||
|
||||
DivIn = VR[vt][01] << 16;
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0x9 & 0x7) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vt][(0x9 & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = DivOut >> 16;
|
||||
DPH = 1;
|
||||
return;
|
||||
@ -162,7 +150,7 @@ static void VRSQH2w(void)
|
||||
|
||||
DivIn = VR[vt][02] << 16;
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0xA & 0x7) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vt][(0xA & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = DivOut >> 16;
|
||||
DPH = 1;
|
||||
return;
|
||||
@ -176,7 +164,7 @@ static void VRSQH3w(void)
|
||||
|
||||
DivIn = VR[vt][03] << 16;
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0xB & 0x7) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vt][(0xB & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = DivOut >> 16;
|
||||
DPH = 1;
|
||||
return;
|
||||
@ -190,7 +178,7 @@ static void VRSQH4w(void)
|
||||
|
||||
DivIn = VR[vt][04] << 16;
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0xC & 0x7) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vt][(0xC & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = DivOut >> 16;
|
||||
DPH = 1;
|
||||
return;
|
||||
@ -204,7 +192,7 @@ static void VRSQH5w(void)
|
||||
|
||||
DivIn = VR[vt][05] << 16;
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0xD & 0x7) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vt][(0xD & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = DivOut >> 16;
|
||||
DPH = 1;
|
||||
return;
|
||||
@ -218,7 +206,7 @@ static void VRSQH6w(void)
|
||||
|
||||
DivIn = VR[vt][06] << 16;
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0xE & 0x7) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vt][(0xE & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = DivOut >> 16;
|
||||
DPH = 1;
|
||||
return;
|
||||
@ -232,7 +220,7 @@ static void VRSQH7w(void)
|
||||
|
||||
DivIn = VR[vt][07] << 16;
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0xF & 0x7) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vt][(0xF & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = DivOut >> 16;
|
||||
DPH = 1;
|
||||
return;
|
||||
|
75
vu/vrsql.h
75
vu/vrsql.h
@ -1,49 +1,6 @@
|
||||
#include "vu.h"
|
||||
#include "divrom.h"
|
||||
|
||||
static void VRSQL(int vd, int de, int vt, int e)
|
||||
{
|
||||
unsigned int addr;
|
||||
int data;
|
||||
int fetch;
|
||||
int shift = 32;
|
||||
|
||||
if (DPH)
|
||||
DivIn |= (unsigned short)VR[vt][e & 07];
|
||||
else
|
||||
DivIn = VR[vt][e & 07] & 0x0000FFFF; /* Do not sign-extend. */
|
||||
data = DivIn;
|
||||
if (data < 0)
|
||||
data = -data - (data < -32768); /* -(x) if >=; ~(x) if < */
|
||||
do
|
||||
{
|
||||
--shift;
|
||||
if (data & (1 << shift))
|
||||
goto FOUND_MSB;
|
||||
} while (shift); /* while (shift > 0) or ((shift ^ 31) < 32) */
|
||||
shift = 31 - 16*DPH; /* if (data == 0) shift = DPH ? 16 ^ 31 : 0 ^ 31; */
|
||||
FOUND_MSB:
|
||||
shift ^= 31; /* Right-to-left shift direction conversion. */
|
||||
addr = (data << shift) >> 22;
|
||||
addr &= 0x000001FE;
|
||||
addr |= 0x00000200 | (shift & 1);
|
||||
fetch = div_ROM[addr];
|
||||
shift ^= 31; /* Flipped shift direction back to right-. */
|
||||
shift >>= 1;
|
||||
DivOut = (0x40000000 | (fetch << 14)) >> shift;
|
||||
if (DivIn < 0)
|
||||
DivOut = ~DivOut;
|
||||
else if (DivIn == 0) /* corner case: overflow via division by zero */
|
||||
DivOut = 0x7FFFFFFF;
|
||||
else if (DivIn == -32768) /* corner case: signed underflow barrier */
|
||||
DivOut = 0xFFFF0000;
|
||||
for (addr = 0; addr < N; addr++)
|
||||
VACC[addr].s[LO] = VR_T(addr);
|
||||
VR_D(de &= 07) = (short)DivOut;
|
||||
DPH = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
void do_rsql(int data)
|
||||
{
|
||||
unsigned int addr;
|
||||
@ -88,7 +45,7 @@ static void VRSQLv0(void)
|
||||
DivIn |= (unsigned short)VR[vt][00];
|
||||
do_rsql(DivIn);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0x0 & 0x0) + (i & 0x7)];
|
||||
ACC_L(i) = VR[vt][(0x0 & 0x0) + (i & 0x7)];
|
||||
VR[vd][de] = (short)DivOut;
|
||||
DPH = 0;
|
||||
return;
|
||||
@ -104,7 +61,7 @@ static void VRSQLv1(void)
|
||||
DivIn |= (unsigned short)VR[vt][01];
|
||||
do_rsql(DivIn);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0x1 & 0x0) + (i & 0x7)];
|
||||
ACC_L(i) = VR[vt][(0x1 & 0x0) + (i & 0x7)];
|
||||
VR[vd][de] = (short)DivOut;
|
||||
DPH = 0;
|
||||
return;
|
||||
@ -120,7 +77,7 @@ static void VRSQL0q(void)
|
||||
DivIn |= (unsigned short)VR[vt][02];
|
||||
do_rsql(DivIn);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0x2 & 0x1) + (i & 0xE)];
|
||||
ACC_L(i) = VR[vt][(0x2 & 0x1) + (i & 0xE)];
|
||||
VR[vd][de] = (short)DivOut;
|
||||
DPH = 0;
|
||||
return;
|
||||
@ -136,7 +93,7 @@ static void VRSQL1q(void)
|
||||
DivIn |= (unsigned short)VR[vt][03];
|
||||
do_rsql(DivIn);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0x3 & 0x1) + (i & 0xE)];
|
||||
ACC_L(i) = VR[vt][(0x3 & 0x1) + (i & 0xE)];
|
||||
VR[vd][de] = (short)DivOut;
|
||||
DPH = 0;
|
||||
return;
|
||||
@ -152,7 +109,7 @@ static void VRSQL0h(void)
|
||||
DivIn |= (unsigned short)VR[vt][04];
|
||||
do_rsql(DivIn);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0x4 & 0x3) + (i & 0xC)];
|
||||
ACC_L(i) = VR[vt][(0x4 & 0x3) + (i & 0xC)];
|
||||
VR[vd][de] = (short)DivOut;
|
||||
DPH = 0;
|
||||
return;
|
||||
@ -168,7 +125,7 @@ static void VRSQL1h(void)
|
||||
DivIn |= (unsigned short)VR[vt][05];
|
||||
do_rsql(DivIn);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0x5 & 0x3) + (i & 0xC)];
|
||||
ACC_L(i) = VR[vt][(0x5 & 0x3) + (i & 0xC)];
|
||||
VR[vd][de] = (short)DivOut;
|
||||
DPH = 0;
|
||||
return;
|
||||
@ -184,7 +141,7 @@ static void VRSQL2h(void)
|
||||
DivIn |= (unsigned short)VR[vt][06];
|
||||
do_rsql(DivIn);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0x6 & 0x3) + (i & 0xC)];
|
||||
ACC_L(i) = VR[vt][(0x6 & 0x3) + (i & 0xC)];
|
||||
VR[vd][de] = (short)DivOut;
|
||||
DPH = 0;
|
||||
return;
|
||||
@ -200,7 +157,7 @@ static void VRSQL3h(void)
|
||||
DivIn |= (unsigned short)VR[vt][07];
|
||||
do_rsql(DivIn);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0x7 & 0x3) + (i & 0xC)];
|
||||
ACC_L(i) = VR[vt][(0x7 & 0x3) + (i & 0xC)];
|
||||
VR[vd][de] = (short)DivOut;
|
||||
DPH = 0;
|
||||
return;
|
||||
@ -216,7 +173,7 @@ static void VRSQL0w(void)
|
||||
DivIn |= (unsigned short)VR[vt][00];
|
||||
do_rsql(DivIn);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0x8 & 0x7) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vt][(0x8 & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = (short)DivOut;
|
||||
DPH = 0;
|
||||
return;
|
||||
@ -232,7 +189,7 @@ static void VRSQL1w(void)
|
||||
DivIn |= (unsigned short)VR[vt][01];
|
||||
do_rsql(DivIn);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0x9 & 0x7) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vt][(0x9 & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = (short)DivOut;
|
||||
DPH = 0;
|
||||
return;
|
||||
@ -248,7 +205,7 @@ static void VRSQL2w(void)
|
||||
DivIn |= (unsigned short)VR[vt][02];
|
||||
do_rsql(DivIn);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0xA & 0x7) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vt][(0xA & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = (short)DivOut;
|
||||
DPH = 0;
|
||||
return;
|
||||
@ -264,7 +221,7 @@ static void VRSQL3w(void)
|
||||
DivIn |= (unsigned short)VR[vt][03];
|
||||
do_rsql(DivIn);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0xB & 0x7) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vt][(0xB & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = (short)DivOut;
|
||||
DPH = 0;
|
||||
return;
|
||||
@ -280,7 +237,7 @@ static void VRSQL4w(void)
|
||||
DivIn |= (unsigned short)VR[vt][04];
|
||||
do_rsql(DivIn);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0xC & 0x7) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vt][(0xC & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = (short)DivOut;
|
||||
DPH = 0;
|
||||
return;
|
||||
@ -296,7 +253,7 @@ static void VRSQL5w(void)
|
||||
DivIn |= (unsigned short)VR[vt][05];
|
||||
do_rsql(DivIn);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0xD & 0x7) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vt][(0xD & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = (short)DivOut;
|
||||
DPH = 0;
|
||||
return;
|
||||
@ -312,7 +269,7 @@ static void VRSQL6w(void)
|
||||
DivIn |= (unsigned short)VR[vt][06];
|
||||
do_rsql(DivIn);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0xE & 0x7) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vt][(0xE & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = (short)DivOut;
|
||||
DPH = 0;
|
||||
return;
|
||||
@ -328,7 +285,7 @@ static void VRSQL7w(void)
|
||||
DivIn |= (unsigned short)VR[vt][07];
|
||||
do_rsql(DivIn);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vt][(0xF & 0x7) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vt][(0xF & 0x7) + (i & 0x0)];
|
||||
VR[vd][de] = (short)DivOut;
|
||||
DPH = 0;
|
||||
return;
|
||||
|
41
vu/vsaw.h
41
vu/vsaw.h
@ -1,15 +1,16 @@
|
||||
#include "vu.h"
|
||||
|
||||
static void VSAW(int vd, int vs, int vt, int e)
|
||||
{
|
||||
register int i;
|
||||
#ifdef VU_EMULATE_SCALAR_ACCUMULATOR_READ
|
||||
static void VSAR(int vd, int vs, int vt, int e)
|
||||
{
|
||||
short oldval[N];
|
||||
register int i;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] = VR[vs][i];
|
||||
#endif
|
||||
vs = vt = 0;
|
||||
/* Even though `vt` is ignored in VSAR, according to official sources as well
|
||||
* as reversing, lots of games seem to specify it as nonzero, possibly to
|
||||
oldval[i] = VR[vs][i];
|
||||
vt = 0;
|
||||
/* Even though VT is ignored in VSAR, according to official sources as well
|
||||
* as reversing, lots of games seem to specify it as non-zero, possibly to
|
||||
* avoid register stalling or other VU hazards. Not really certain why yet.
|
||||
*/
|
||||
e ^= 0x8;
|
||||
@ -17,24 +18,20 @@ static void VSAW(int vd, int vs, int vt, int e)
|
||||
* Currently this code is safer because &= is less likely to catch oddities.
|
||||
* Either way, documentation shows that the switch range is 0:2, not 8:A.
|
||||
*/
|
||||
e = 2 - e;
|
||||
if (e < 0)
|
||||
if (e > 2)
|
||||
{
|
||||
message("VSAR\nInvalid mask.", 2);
|
||||
for (i = vs; i < 8; i++)
|
||||
VR_D(i) = 0x0000; /* override behavior (zilmar) */
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = 0x0000; /* override behavior (zilmar) */
|
||||
}
|
||||
else
|
||||
for (i = vs; i < 8; i++)
|
||||
VR_D(i) = VACC[i].s[e];
|
||||
#ifdef VU_EMULATE_SCALAR_ACCUMULATOR_READ
|
||||
e ^= 03;
|
||||
--e;
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[e][i];
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[e] = result[i]; /* ... = VR[vs][i]; */
|
||||
#endif
|
||||
VACC[e][i] = oldval[i]; /* ... = VS */
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void VSAWH(void)
|
||||
{
|
||||
@ -42,7 +39,7 @@ static void VSAWH(void)
|
||||
const int vd = inst.R.sa;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[HI];
|
||||
VR[vd][i] = ACC_H(i);
|
||||
return;
|
||||
}
|
||||
static void VSAWM(void)
|
||||
@ -51,7 +48,7 @@ static void VSAWM(void)
|
||||
const int vd = inst.R.sa;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[MD];
|
||||
VR[vd][i] = ACC_M(i);
|
||||
return;
|
||||
}
|
||||
static void VSAWL(void)
|
||||
@ -60,6 +57,6 @@ static void VSAWL(void)
|
||||
const int vd = inst.R.sa;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
|
48
vu/vsub.h
48
vu/vsub.h
@ -1,23 +1,5 @@
|
||||
#include "vu.h"
|
||||
|
||||
static void VSUB(int vd, int vs, int vt, int e)
|
||||
{
|
||||
register int i;
|
||||
|
||||
for (i = 0; i < N; i++) /* Try to vectorize the subtracts to be parallel. */
|
||||
result[i] = VR[vs][i] - VR_T(i);
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
result[i] -= VCO & 0x0001;
|
||||
VCO >>= 1;
|
||||
}
|
||||
VCO = 0x0000; /* Clear the remaining, upper NOTEQUAL bits. */
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)result[i];
|
||||
SIGNED_CLAMP(VMUL_PTR, SM_ADD_A);
|
||||
return;
|
||||
}
|
||||
|
||||
void clr_bi(void) /* clear CARRY and borrow in to accumulators */
|
||||
{
|
||||
int bi[8];
|
||||
@ -41,7 +23,7 @@ static void VSUB_v(void)
|
||||
result[i] = VR[vs][i] - VR[vt][i];
|
||||
clr_bi();
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
SIGNED_CLAMP(VR[vd], SM_ADD_A);
|
||||
return;
|
||||
}
|
||||
@ -56,7 +38,7 @@ static void VSUB0q(void)
|
||||
result[i] = VR[vs][i] - VR[vt][(0x2 & 01) + (i & 0xE)];
|
||||
clr_bi();
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
SIGNED_CLAMP(VR[vd], SM_ADD_A);
|
||||
return;
|
||||
}
|
||||
@ -71,7 +53,7 @@ static void VSUB1q(void)
|
||||
result[i] = VR[vs][i] - VR[vt][(0x3 & 01) + (i & 0xE)];
|
||||
clr_bi();
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
SIGNED_CLAMP(VR[vd], SM_ADD_A);
|
||||
return;
|
||||
}
|
||||
@ -86,7 +68,7 @@ static void VSUB0h(void)
|
||||
result[i] = VR[vs][i] - VR[vt][(0x4 & 03) + (i & 0xC)];
|
||||
clr_bi();
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
SIGNED_CLAMP(VR[vd], SM_ADD_A);
|
||||
return;
|
||||
}
|
||||
@ -101,7 +83,7 @@ static void VSUB1h(void)
|
||||
result[i] = VR[vs][i] - VR[vt][(0x5 & 03) + (i & 0xC)];
|
||||
clr_bi();
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
SIGNED_CLAMP(VR[vd], SM_ADD_A);
|
||||
return;
|
||||
}
|
||||
@ -116,7 +98,7 @@ static void VSUB2h(void)
|
||||
result[i] = VR[vs][i] - VR[vt][(0x6 & 03) + (i & 0xC)];
|
||||
clr_bi();
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
SIGNED_CLAMP(VR[vd], SM_ADD_A);
|
||||
return;
|
||||
}
|
||||
@ -131,7 +113,7 @@ static void VSUB3h(void)
|
||||
result[i] = VR[vs][i] - VR[vt][(0x7 & 03) + (i & 0xC)];
|
||||
clr_bi();
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
SIGNED_CLAMP(VR[vd], SM_ADD_A);
|
||||
return;
|
||||
}
|
||||
@ -146,7 +128,7 @@ static void VSUB0w(void)
|
||||
result[i] = VR[vs][i] - VR[vt][(0x8 & 07) + (i & 0x0)];
|
||||
clr_bi();
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
SIGNED_CLAMP(VR[vd], SM_ADD_A);
|
||||
return;
|
||||
}
|
||||
@ -161,7 +143,7 @@ static void VSUB1w(void)
|
||||
result[i] = VR[vs][i] - VR[vt][(0x9 & 07) + (i & 0x0)];
|
||||
clr_bi();
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
SIGNED_CLAMP(VR[vd], SM_ADD_A);
|
||||
return;
|
||||
}
|
||||
@ -176,7 +158,7 @@ static void VSUB2w(void)
|
||||
result[i] = VR[vs][i] - VR[vt][(0xA & 07) + (i & 0x0)];
|
||||
clr_bi();
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
SIGNED_CLAMP(VR[vd], SM_ADD_A);
|
||||
return;
|
||||
}
|
||||
@ -191,7 +173,7 @@ static void VSUB3w(void)
|
||||
result[i] = VR[vs][i] - VR[vt][(0xB & 07) + (i & 0x0)];
|
||||
clr_bi();
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
SIGNED_CLAMP(VR[vd], SM_ADD_A);
|
||||
return;
|
||||
}
|
||||
@ -206,7 +188,7 @@ static void VSUB4w(void)
|
||||
result[i] = VR[vs][i] - VR[vt][(0xC & 07) + (i & 0x0)];
|
||||
clr_bi();
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
SIGNED_CLAMP(VR[vd], SM_ADD_A);
|
||||
return;
|
||||
}
|
||||
@ -221,7 +203,7 @@ static void VSUB5w(void)
|
||||
result[i] = VR[vs][i] - VR[vt][(0xD & 07) + (i & 0x0)];
|
||||
clr_bi();
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
SIGNED_CLAMP(VR[vd], SM_ADD_A);
|
||||
return;
|
||||
}
|
||||
@ -236,7 +218,7 @@ static void VSUB6w(void)
|
||||
result[i] = VR[vs][i] - VR[vt][(0xE & 07) + (i & 0x0)];
|
||||
clr_bi();
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
SIGNED_CLAMP(VR[vd], SM_ADD_A);
|
||||
return;
|
||||
}
|
||||
@ -251,7 +233,7 @@ static void VSUB7w(void)
|
||||
result[i] = VR[vs][i] - VR[vt][(0xF & 07) + (i & 0x0)];
|
||||
clr_bi();
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
SIGNED_CLAMP(VR[vd], SM_ADD_A);
|
||||
return;
|
||||
}
|
||||
|
80
vu/vsubc.h
80
vu/vsubc.h
@ -1,25 +1,5 @@
|
||||
#include "vu.h"
|
||||
|
||||
static void VSUBC(int vd, int vs, int vt, int e)
|
||||
{
|
||||
register int i;
|
||||
|
||||
VCO = 0x0000;
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] = (unsigned short)VR[vs][i] - (unsigned short)VR_T(i);
|
||||
for (i = 0; i < N; i++)
|
||||
ACC_R(i) = (short)result[i];
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
if (result[i] == 0) continue; /* If VS == VT, neither flag is set. */
|
||||
VCO |= (result[i] < 0) << i; /* CARRY, because VS - VT < 0 */
|
||||
VCO |= (0x01 << 8) << i; /* NOTEQUAL, because VS - VT != 0 */
|
||||
}
|
||||
for (i = 0; i < N; i++)
|
||||
ACC_W(i) = ACC_R(i);
|
||||
return;
|
||||
}
|
||||
|
||||
#if (0)
|
||||
#define SETBI(i) (result[i] < 0)
|
||||
#else
|
||||
@ -62,9 +42,9 @@ static void VSUBC_v(void)
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] = (unsigned short)(VR[vs][i]) - (unsigned short)(VR[vt][i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
set_bo();
|
||||
return;
|
||||
}
|
||||
@ -80,9 +60,9 @@ static void VSUBC0q(void)
|
||||
(unsigned short)(VR[vs][i])
|
||||
- (unsigned short)(VR[vt][(0x2 & 01) + (i & 0xE)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
set_bo();
|
||||
return;
|
||||
}
|
||||
@ -98,9 +78,9 @@ static void VSUBC1q(void)
|
||||
(unsigned short)(VR[vs][i])
|
||||
- (unsigned short)(VR[vt][(0x3 & 01) + (i & 0xE)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
set_bo();
|
||||
return;
|
||||
}
|
||||
@ -116,9 +96,9 @@ static void VSUBC0h(void)
|
||||
(unsigned short)(VR[vs][i])
|
||||
- (unsigned short)(VR[vt][(0x4 & 03) + (i & 0xC)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
set_bo();
|
||||
return;
|
||||
}
|
||||
@ -134,9 +114,9 @@ static void VSUBC1h(void)
|
||||
(unsigned short)(VR[vs][i])
|
||||
- (unsigned short)(VR[vt][(0x5 & 03) + (i & 0xC)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
set_bo();
|
||||
return;
|
||||
}
|
||||
@ -152,9 +132,9 @@ static void VSUBC2h(void)
|
||||
(unsigned short)(VR[vs][i])
|
||||
- (unsigned short)(VR[vt][(0x6 & 03) + (i & 0xC)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
set_bo();
|
||||
return;
|
||||
}
|
||||
@ -170,9 +150,9 @@ static void VSUBC3h(void)
|
||||
(unsigned short)(VR[vs][i])
|
||||
- (unsigned short)(VR[vt][(0x7 & 03) + (i & 0xC)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
set_bo();
|
||||
return;
|
||||
}
|
||||
@ -188,9 +168,9 @@ static void VSUBC0w(void)
|
||||
(unsigned short)(VR[vs][i])
|
||||
- (unsigned short)(VR[vt][(0x8 & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
set_bo();
|
||||
return;
|
||||
}
|
||||
@ -206,9 +186,9 @@ static void VSUBC1w(void)
|
||||
(unsigned short)(VR[vs][i])
|
||||
- (unsigned short)(VR[vt][(0x9 & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
set_bo();
|
||||
return;
|
||||
}
|
||||
@ -224,9 +204,9 @@ static void VSUBC2w(void)
|
||||
(unsigned short)(VR[vs][i])
|
||||
- (unsigned short)(VR[vt][(0xA & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
set_bo();
|
||||
return;
|
||||
}
|
||||
@ -242,9 +222,9 @@ static void VSUBC3w(void)
|
||||
(unsigned short)(VR[vs][i])
|
||||
- (unsigned short)(VR[vt][(0xB & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
set_bo();
|
||||
return;
|
||||
}
|
||||
@ -260,9 +240,9 @@ static void VSUBC4w(void)
|
||||
(unsigned short)(VR[vs][i])
|
||||
- (unsigned short)(VR[vt][(0xC & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
set_bo();
|
||||
return;
|
||||
}
|
||||
@ -278,9 +258,9 @@ static void VSUBC5w(void)
|
||||
(unsigned short)(VR[vs][i])
|
||||
- (unsigned short)(VR[vt][(0xD & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
set_bo();
|
||||
return;
|
||||
}
|
||||
@ -296,9 +276,9 @@ static void VSUBC6w(void)
|
||||
(unsigned short)(VR[vs][i])
|
||||
- (unsigned short)(VR[vt][(0xE & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
set_bo();
|
||||
return;
|
||||
}
|
||||
@ -314,9 +294,9 @@ static void VSUBC7w(void)
|
||||
(unsigned short)(VR[vs][i])
|
||||
- (unsigned short)(VR[vt][(0xF & 07) + (i & 0x0)]);
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = (short)(result[i]);
|
||||
ACC_L(i) = (short)(result[i]);
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
set_bo();
|
||||
return;
|
||||
}
|
||||
|
119
vu/vu.h
119
vu/vu.h
@ -1,7 +1,7 @@
|
||||
/******************************************************************************\
|
||||
* Project: MSP Emulation Layer for Vector Unit Computational Operations *
|
||||
* Authors: Iconoclast *
|
||||
* Release: 2013.09.11 *
|
||||
* Release: 2013.09.13 *
|
||||
* License: none (public domain) *
|
||||
\******************************************************************************/
|
||||
#ifndef _VU_H
|
||||
@ -12,6 +12,8 @@
|
||||
#define MACHINE_SIZE_48_MIN
|
||||
#endif
|
||||
|
||||
typedef long long INT64;
|
||||
|
||||
/*
|
||||
* vector-scalar element decoding
|
||||
*
|
||||
@ -47,6 +49,9 @@ static const int ei[16][8] = {
|
||||
{ 07, 07, 07, 07, 07, 07, 07, 07 } /* 7 */
|
||||
};
|
||||
|
||||
#define N 8
|
||||
/* N: number of processor elements in SIMD processor */
|
||||
|
||||
/*
|
||||
* RSP virtual registers (of vector unit)
|
||||
* The most important are the 32 general-purpose vector registers.
|
||||
@ -55,8 +60,8 @@ static const int ei[16][8] = {
|
||||
* For ?WC2 we may need to do byte-precision access just as directly.
|
||||
* This is amended by using the `VU_S` and `VU_B` macros defined in `rsp.h`.
|
||||
*/
|
||||
short VR[32][8];
|
||||
short VC[8]; /* vector/scalar coefficient */
|
||||
short VR[32][N];
|
||||
short VC[N]; /* vector/scalar coefficient */
|
||||
|
||||
/* #define EMULATE_VECTOR_RESULT_BUFFER */
|
||||
/*
|
||||
@ -120,9 +125,6 @@ int sub_mask[16] = {
|
||||
0x7, 0x7, 0x7, 0x7, 0x7, 0x7, 0x7, 0x7
|
||||
};
|
||||
|
||||
#define N 8
|
||||
/* N: number of processor elements in SIMD processor */
|
||||
|
||||
void SHUFFLE_VECTOR(int vt, int e)
|
||||
{
|
||||
register int i, j;
|
||||
@ -149,19 +151,15 @@ void SHUFFLE_VECTOR(int vt, int e)
|
||||
return;
|
||||
}
|
||||
|
||||
typedef long long INT64;
|
||||
|
||||
#if (0)
|
||||
/*
|
||||
* accumulator-indexing macros
|
||||
* accumulator-indexing macros (little endian: not suitable for VSAW)
|
||||
*/
|
||||
#define LO 00
|
||||
#define MD 01
|
||||
#define HI 02
|
||||
#define HI 02
|
||||
#define MD 01
|
||||
#define LO 00
|
||||
|
||||
static union ACC {
|
||||
#ifdef MACHINE_SIZE_48_MIN
|
||||
signed e: 48; /* There are eight elements in the accumulator. */
|
||||
#endif
|
||||
short int s[3]; /* Each element has a low, middle, and high 16-bit slice. */
|
||||
signed char SB[6];
|
||||
/* 64-bit access: */
|
||||
@ -170,27 +168,32 @@ static union ACC {
|
||||
unsigned short UHW[4];
|
||||
int W[2];
|
||||
unsigned int UW[2];
|
||||
long long int DW;
|
||||
unsigned long long UDW;
|
||||
} VACC[8];
|
||||
INT64 DW;
|
||||
} VACC[N];
|
||||
#define ACC_L(i) (VACC[i].s[LO])
|
||||
#define ACC_M(i) (VACC[i].s[MD])
|
||||
#define ACC_H(i) (VACC[i].s[HI])
|
||||
|
||||
#else
|
||||
/*
|
||||
* special macro service for clamping accumulators
|
||||
*
|
||||
* Clamping on the RSP is the same as traditional vector units, not just SGI.
|
||||
* This algorithm, therefore, is public domain material.
|
||||
*
|
||||
* In almost all cases, the RSP requests clamping to bits 47..16 of each acc.
|
||||
* We therefore compare the 32-bit (signed int)(acc >> 16) and clamp it down
|
||||
* to, usually, 16-bit results (0x8000 if < -32768, 0x7FFF if > +32767).
|
||||
*
|
||||
* The exception is VMACQ, which requests a clamp index lsb of >> 17.
|
||||
* accumulator-indexing macros (inverted access dimensions, suited for SSE)
|
||||
*/
|
||||
#define CLAMP_BASE(acc, lo) ((signed int)(VACC[acc].DW >> lo))
|
||||
#define HI 00
|
||||
#define MD 01
|
||||
#define LO 02
|
||||
|
||||
short VACC[3][N];
|
||||
/*
|
||||
* This algorithm might have a bug if you invoke shifts greater than 16,
|
||||
* because the 48-bit acc needs to be sign-extended when shifting right here.
|
||||
* short ACC_L[N];
|
||||
* short ACC_M[N];
|
||||
* short ACC_H[N];
|
||||
*/
|
||||
#define ACC_L(i) (VACC[LO][i])
|
||||
#define ACC_M(i) (VACC[MD][i])
|
||||
#define ACC_H(i) (VACC[HI][i])
|
||||
|
||||
#endif
|
||||
|
||||
#define FORCE_STATIC_CLAMP
|
||||
static signed short sclamp[2][2] = {
|
||||
{ 0x0000, -0x8000},
|
||||
@ -220,6 +223,39 @@ enum {
|
||||
|
||||
signed int result[N];
|
||||
|
||||
INLINE void do_store(INT64* acc)
|
||||
{
|
||||
register int i;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
ACC_H(i) = (short)(acc[i] >> 32);
|
||||
for (i = 0; i < N; i++)
|
||||
ACC_M(i) = (short)(acc[i] >> 16);
|
||||
for (i = 0; i < N; i++)
|
||||
ACC_L(i) = (short)(acc[i] >> 0);
|
||||
return;
|
||||
}
|
||||
INLINE void do_acc(INT64* acc)
|
||||
{
|
||||
INT64 base[N];
|
||||
register int i;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
base[N] = ACC_H(i);
|
||||
for (i = 0; i < N; i++)
|
||||
base[N] = base[N] << 16;
|
||||
for (i = 0; i < N; i++)
|
||||
base[N] = base[N] | (unsigned short)ACC_M(i);
|
||||
for (i = 0; i < N; i++)
|
||||
base[N] = base[N] << 16;
|
||||
for (i = 0; i < N; i++)
|
||||
base[N] = base[N] | (unsigned short)ACC_L(i);
|
||||
for (i = 0; i < N; i++)
|
||||
base[N] = base[N] + acc[i];
|
||||
do_store(base);
|
||||
return;
|
||||
}
|
||||
|
||||
void SIGNED_CLAMP(short* VD, int mode)
|
||||
{
|
||||
register int i;
|
||||
@ -227,9 +263,12 @@ void SIGNED_CLAMP(short* VD, int mode)
|
||||
switch (mode)
|
||||
{
|
||||
case SM_MUL_X: /* typical sign-clamp of accumulator-mid (bits 31:16) */
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] = ACC_H(i) << 16;
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] = result[i] | (unsigned short)ACC_M(i);
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
result[i] = *(signed int *)((unsigned char *)(VACC + i) + 2);
|
||||
#ifdef FORCE_STATIC_CLAMP
|
||||
VD[i] = result[i] & 0x0000FFFF;
|
||||
VD[i] &= ~(result[i] - -32768) >> 31; /* min: 0x8000 ^ 0x8000 */
|
||||
@ -244,11 +283,14 @@ void SIGNED_CLAMP(short* VD, int mode)
|
||||
}
|
||||
return;
|
||||
case SM_MUL_Z: /* sign-clamp accumulator-low (bits 15:0) */
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] = ACC_H(i) << 16;
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] = result[i] | (unsigned short)ACC_M(i);
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
result[i] = *(signed int *)((unsigned char *)(VACC + i) + 2);
|
||||
#ifdef FORCE_STATIC_CLAMP
|
||||
VD[i] = VACC[i].DW & 0x00000000FFFF;
|
||||
VD[i] = ACC_L(i);
|
||||
VD[i] &= ~(result[i] - -32768) >> 31;
|
||||
VD[i] |= (+32767 - result[i]) >> 31;
|
||||
continue;
|
||||
@ -262,15 +304,18 @@ void SIGNED_CLAMP(short* VD, int mode)
|
||||
return;
|
||||
case SM_MUL_Q: /* possible DCT inverse quantization (VMACQ only) */
|
||||
for (i = 0; i < N; i++)
|
||||
{
|
||||
result[i] = CLAMP_BASE(i, 17);
|
||||
result[i] = (short)(ACC_H(i) << 31);
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] = result[i] | (ACC_M(i) << 15);
|
||||
for (i = 0; i < N; i++)
|
||||
result[i] = result[i] | ((unsigned short)ACC_L(i) >> 1);
|
||||
for (i = 0; i < N; i++)
|
||||
if (result[i] < -32768)
|
||||
VD[i] = -32768 & ~0x000F;
|
||||
else if (result[i] > +32767)
|
||||
VD[i] = +32767 & ~0x000F;
|
||||
else
|
||||
VD[i] = result[i] & 0x0000FFF0;
|
||||
}
|
||||
return;
|
||||
case SM_ADD_A: /* VADD and VSUB */
|
||||
for (i = 0; i < N; i++)
|
||||
|
71
vu/vxor.h
71
vu/vxor.h
@ -1,16 +1,5 @@
|
||||
#include "vu.h"
|
||||
|
||||
static void VXOR(int vd, int vs, int vt, int e)
|
||||
{
|
||||
register int i;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
ACC_R(i) = VR[vs][i] ^ VR_T(i);
|
||||
for (i = 0; i < N; i++)
|
||||
ACC_W(i) = ACC_R(i);
|
||||
return;
|
||||
}
|
||||
|
||||
static void VXOR_v(void)
|
||||
{
|
||||
register int i;
|
||||
@ -19,9 +8,9 @@ static void VXOR_v(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vs][i] ^ VR[vt][i];
|
||||
ACC_L(i) = VR[vs][i] ^ VR[vt][i];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VXOR0q(void)
|
||||
@ -32,9 +21,9 @@ static void VXOR0q(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vs][i] ^ VR[vt][(0x2 & 01) + (i & 0xE)];
|
||||
ACC_L(i) = VR[vs][i] ^ VR[vt][(0x2 & 01) + (i & 0xE)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VXOR1q(void)
|
||||
@ -45,9 +34,9 @@ static void VXOR1q(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vs][i] ^ VR[vt][(0x3 & 01) + (i & 0xE)];
|
||||
ACC_L(i) = VR[vs][i] ^ VR[vt][(0x3 & 01) + (i & 0xE)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VXOR0h(void)
|
||||
@ -58,9 +47,9 @@ static void VXOR0h(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vs][i] ^ VR[vt][(0x4 & 03) + (i & 0xC)];
|
||||
ACC_L(i) = VR[vs][i] ^ VR[vt][(0x4 & 03) + (i & 0xC)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VXOR1h(void)
|
||||
@ -71,9 +60,9 @@ static void VXOR1h(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vs][i] ^ VR[vt][(0x5 & 03) + (i & 0xC)];
|
||||
ACC_L(i) = VR[vs][i] ^ VR[vt][(0x5 & 03) + (i & 0xC)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VXOR2h(void)
|
||||
@ -84,9 +73,9 @@ static void VXOR2h(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vs][i] ^ VR[vt][(0x6 & 03) + (i & 0xC)];
|
||||
ACC_L(i) = VR[vs][i] ^ VR[vt][(0x6 & 03) + (i & 0xC)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VXOR3h(void)
|
||||
@ -97,9 +86,9 @@ static void VXOR3h(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vs][i] ^ VR[vt][(0x7 & 03) + (i & 0xC)];
|
||||
ACC_L(i) = VR[vs][i] ^ VR[vt][(0x7 & 03) + (i & 0xC)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VXOR0w(void)
|
||||
@ -110,9 +99,9 @@ static void VXOR0w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vs][i] ^ VR[vt][(0x8 & 07) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vs][i] ^ VR[vt][(0x8 & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VXOR1w(void)
|
||||
@ -123,9 +112,9 @@ static void VXOR1w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vs][i] ^ VR[vt][(0x9 & 07) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vs][i] ^ VR[vt][(0x9 & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VXOR2w(void)
|
||||
@ -136,9 +125,9 @@ static void VXOR2w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vs][i] ^ VR[vt][(0xA & 07) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vs][i] ^ VR[vt][(0xA & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VXOR3w(void)
|
||||
@ -149,9 +138,9 @@ static void VXOR3w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vs][i] ^ VR[vt][(0xB & 07) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vs][i] ^ VR[vt][(0xB & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VXOR4w(void)
|
||||
@ -162,9 +151,9 @@ static void VXOR4w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vs][i] ^ VR[vt][(0xC & 07) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vs][i] ^ VR[vt][(0xC & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VXOR5w(void)
|
||||
@ -175,9 +164,9 @@ static void VXOR5w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vs][i] ^ VR[vt][(0xD & 07) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vs][i] ^ VR[vt][(0xD & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VXOR6w(void)
|
||||
@ -188,9 +177,9 @@ static void VXOR6w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vs][i] ^ VR[vt][(0xE & 07) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vs][i] ^ VR[vt][(0xE & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
static void VXOR7w(void)
|
||||
@ -201,8 +190,8 @@ static void VXOR7w(void)
|
||||
const int vt = inst.R.rt;
|
||||
|
||||
for (i = 0; i < N; i++)
|
||||
VACC[i].s[LO] = VR[vs][i] ^ VR[vt][(0xF & 07) + (i & 0x0)];
|
||||
ACC_L(i) = VR[vs][i] ^ VR[vt][(0xF & 07) + (i & 0x0)];
|
||||
for (i = 0; i < N; i++)
|
||||
VR[vd][i] = VACC[i].s[LO];
|
||||
VR[vd][i] = ACC_L(i);
|
||||
return;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user