completely vectorized all accumulator R/W

This commit is contained in:
unknown 2013-09-13 15:04:06 -04:00
parent cd5c32e7af
commit 49bd94cd9f
41 changed files with 1290 additions and 2104 deletions

2
rsp.h
View File

@ -251,7 +251,7 @@ void trace_RSP_registers(void)
for (i = 0; i < 8; i++)
fprintf(
out, "ACC[%o]: [%04X][%04X][%04X]\n", i,
VACC[i].s[HI], VACC[i].s[MD], VACC[i].s[LO]);
ACC_H(i), ACC_M(i), ACC_L(i));
fprintf(out, "\n");
fprintf(out, "DivIn: %i\n", DivIn);
fprintf(out, "DivOut: %i\n", DivOut);

View File

@ -1,34 +1,5 @@
#include "vu.h"
static void VABS(int vd, int vs, int vt, int e)
{
register int i;
#ifdef FORCE_STATIC_CLAMP
for (i = 0; i < N; i++)
{
register signed short ti;
ti = VR_T(i);
ti ^= -(VR[vs][i] < 0); /* ti = ~ti */
ti += (VR[vs][i] < 0) & (ti != 0x7FFF); /* abs(-32768) == +32767 */
ti &= -(VR[vs][i] != 0);
ACC_R(i) = ti;
}
#else
for (i = 0; i < N; i++)
if (VR[vs][i] < 0)
ACC_R(i) = -(VR_T(i) ^ (VR_T(i) == -32768));
else if (VR[vs][i] == 0)
ACC_R(i) = 0x0000;
else
ACC_R(i) = +VR_T(i);
#endif
for (i = 0; i < N; i++)
ACC_W(i) = ACC_R(i);
return;
}
/*
* -1: VT *= -1, because VS < 0 // VT ^= -2 if even, or ^= -1, += 1
* 0: VT *= 0, because VS = 0 // VT ^= VT
@ -89,9 +60,9 @@ static void VABS_v(void)
result[i] = VR[vt][i];
do_abs(vs);
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VABS0q(void)
@ -105,9 +76,9 @@ static void VABS0q(void)
result[i] = VR[vt][(0x2 & 01) + (i & 0xE)];
do_abs(vs);
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VABS1q(void)
@ -121,9 +92,9 @@ static void VABS1q(void)
result[i] = VR[vt][(0x3 & 01) + (i & 0xE)];
do_abs(vs);
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VABS0h(void)
@ -137,9 +108,9 @@ static void VABS0h(void)
result[i] = VR[vt][(0x4 & 03) + (i & 0xC)];
do_abs(vs);
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VABS1h(void)
@ -153,9 +124,9 @@ static void VABS1h(void)
result[i] = VR[vt][(0x5 & 03) + (i & 0xC)];
do_abs(vs);
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VABS2h(void)
@ -169,9 +140,9 @@ static void VABS2h(void)
result[i] = VR[vt][(0x6 & 03) + (i & 0xC)];
do_abs(vs);
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VABS3h(void)
@ -185,9 +156,9 @@ static void VABS3h(void)
result[i] = VR[vt][(0x7 & 03) + (i & 0xC)];
do_abs(vs);
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VABS0w(void)
@ -201,9 +172,9 @@ static void VABS0w(void)
result[i] = VR[vt][(0x8 & 07) + (i & 0x0)];
do_abs(vs);
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VABS1w(void)
@ -217,9 +188,9 @@ static void VABS1w(void)
result[i] = VR[vt][(0x9 & 07) + (i & 0x0)];
do_abs(vs);
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VABS2w(void)
@ -233,9 +204,9 @@ static void VABS2w(void)
result[i] = VR[vt][(0xA & 07) + (i & 0x0)];
do_abs(vs);
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VABS3w(void)
@ -249,9 +220,9 @@ static void VABS3w(void)
result[i] = VR[vt][(0xB & 07) + (i & 0x0)];
do_abs(vs);
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VABS4w(void)
@ -265,9 +236,9 @@ static void VABS4w(void)
result[i] = VR[vt][(0xC & 07) + (i & 0x0)];
do_abs(vs);
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VABS5w(void)
@ -281,9 +252,9 @@ static void VABS5w(void)
result[i] = VR[vt][(0xD & 07) + (i & 0x0)];
do_abs(vs);
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VABS6w(void)
@ -297,9 +268,9 @@ static void VABS6w(void)
result[i] = VR[vt][(0xE & 07) + (i & 0x0)];
do_abs(vs);
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VABS7w(void)
@ -313,8 +284,8 @@ static void VABS7w(void)
result[i] = VR[vt][(0xF & 07) + (i & 0x0)];
do_abs(vs);
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}

View File

@ -1,23 +1,5 @@
#include "vu.h"
static void VADD(int vd, int vs, int vt, int e)
{
register int i;
for (i = 0; i < N; i++) /* Try to vectorize the adds to be parallel. */
result[i] = VR[vs][i] + VR_T(i);
for (i = 0; i < N; i++)
{
result[i] += VCO & 0x0001;
VCO >>= 1;
}
VCO = 0x0000; /* Clear the remaining, upper NOTEQUAL bits. */
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)result[i];
SIGNED_CLAMP(VMUL_PTR, SM_ADD_A);
return;
}
void clr_ci(void) /* clear CARRY and carry in to accumulators */
{
int ci[8];
@ -41,7 +23,7 @@ static void VADD_v(void)
result[i] = VR[vs][i] + VR[vt][i];
clr_ci();
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
SIGNED_CLAMP(VR[vd], SM_ADD_A);
return;
}
@ -56,7 +38,7 @@ static void VADD0q(void)
result[i] = VR[vs][i] + VR[vt][(0x2 & 01) + (i & 0xE)];
clr_ci();
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
SIGNED_CLAMP(VR[vd], SM_ADD_A);
return;
}
@ -71,7 +53,7 @@ static void VADD1q(void)
result[i] = VR[vs][i] + VR[vt][(0x3 & 01) + (i & 0xE)];
clr_ci();
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
SIGNED_CLAMP(VR[vd], SM_ADD_A);
return;
}
@ -86,7 +68,7 @@ static void VADD0h(void)
result[i] = VR[vs][i] + VR[vt][(0x4 & 03) + (i & 0xC)];
clr_ci();
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
SIGNED_CLAMP(VR[vd], SM_ADD_A);
return;
}
@ -101,7 +83,7 @@ static void VADD1h(void)
result[i] = VR[vs][i] + VR[vt][(0x5 & 03) + (i & 0xC)];
clr_ci();
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
SIGNED_CLAMP(VR[vd], SM_ADD_A);
return;
}
@ -116,7 +98,7 @@ static void VADD2h(void)
result[i] = VR[vs][i] + VR[vt][(0x6 & 03) + (i & 0xC)];
clr_ci();
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
SIGNED_CLAMP(VR[vd], SM_ADD_A);
return;
}
@ -131,7 +113,7 @@ static void VADD3h(void)
result[i] = VR[vs][i] + VR[vt][(0x7 & 03) + (i & 0xC)];
clr_ci();
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
SIGNED_CLAMP(VR[vd], SM_ADD_A);
return;
}
@ -146,7 +128,7 @@ static void VADD0w(void)
result[i] = VR[vs][i] + VR[vt][(0x8 & 07) + (i & 0x0)];
clr_ci();
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
SIGNED_CLAMP(VR[vd], SM_ADD_A);
return;
}
@ -161,7 +143,7 @@ static void VADD1w(void)
result[i] = VR[vs][i] + VR[vt][(0x9 & 07) + (i & 0x0)];
clr_ci();
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
SIGNED_CLAMP(VR[vd], SM_ADD_A);
return;
}
@ -176,7 +158,7 @@ static void VADD2w(void)
result[i] = VR[vs][i] + VR[vt][(0xA & 07) + (i & 0x0)];
clr_ci();
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
SIGNED_CLAMP(VR[vd], SM_ADD_A);
return;
}
@ -191,7 +173,7 @@ static void VADD3w(void)
result[i] = VR[vs][i] + VR[vt][(0xB & 07) + (i & 0x0)];
clr_ci();
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
SIGNED_CLAMP(VR[vd], SM_ADD_A);
return;
}
@ -206,7 +188,7 @@ static void VADD4w(void)
result[i] = VR[vs][i] + VR[vt][(0xC & 07) + (i & 0x0)];
clr_ci();
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
SIGNED_CLAMP(VR[vd], SM_ADD_A);
return;
}
@ -221,7 +203,7 @@ static void VADD5w(void)
result[i] = VR[vs][i] + VR[vt][(0xD & 07) + (i & 0x0)];
clr_ci();
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
SIGNED_CLAMP(VR[vd], SM_ADD_A);
return;
}
@ -236,7 +218,7 @@ static void VADD6w(void)
result[i] = VR[vs][i] + VR[vt][(0xE & 07) + (i & 0x0)];
clr_ci();
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
SIGNED_CLAMP(VR[vd], SM_ADD_A);
return;
}
@ -251,7 +233,7 @@ static void VADD7w(void)
result[i] = VR[vs][i] + VR[vt][(0xF & 07) + (i & 0x0)];
clr_ci();
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
SIGNED_CLAMP(VR[vd], SM_ADD_A);
return;
}

View File

@ -1,21 +1,5 @@
#include "vu.h"
static void VADDC(int vd, int vs, int vt, int e)
{
register int i;
VCO = 0x0000;
for (i = 0; i < N; i++)
result[i] = (unsigned short)VR[vs][i] + (unsigned short)VR_T(i);
for (i = 0; i < N; i++)
ACC_R(i) = (short)result[i];
for (i = 0; i < N; i++)
VCO |= !!(result[i] & ~0x0000FFFF) << i;
for (i = 0; i < N; i++)
ACC_W(i) = ACC_R(i);
return;
}
#if (0)
#define SETCO(i) (result[i] > 0x0000FFFF)
#elif (1)
@ -54,9 +38,9 @@ static void VADDC_v(void)
for (i = 0; i < N; i++)
result[i] = (unsigned short)(VR[vs][i]) + (unsigned short)(VR[vt][i]);
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
set_co();
return;
}
@ -72,9 +56,9 @@ static void VADDC0q(void)
(unsigned short)(VR[vs][i])
+ (unsigned short)(VR[vt][(0x2 & 01) + (i & 0xE)]);
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
set_co();
return;
}
@ -90,9 +74,9 @@ static void VADDC1q(void)
(unsigned short)(VR[vs][i])
+ (unsigned short)(VR[vt][(0x3 & 01) + (i & 0xE)]);
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
set_co();
return;
}
@ -108,9 +92,9 @@ static void VADDC0h(void)
(unsigned short)(VR[vs][i])
+ (unsigned short)(VR[vt][(0x4 & 03) + (i & 0xC)]);
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
set_co();
return;
}
@ -126,9 +110,9 @@ static void VADDC1h(void)
(unsigned short)(VR[vs][i])
+ (unsigned short)(VR[vt][(0x5 & 03) + (i & 0xC)]);
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
set_co();
return;
}
@ -144,9 +128,9 @@ static void VADDC2h(void)
(unsigned short)(VR[vs][i])
+ (unsigned short)(VR[vt][(0x6 & 03) + (i & 0xC)]);
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
set_co();
return;
}
@ -162,9 +146,9 @@ static void VADDC3h(void)
(unsigned short)(VR[vs][i])
+ (unsigned short)(VR[vt][(0x7 & 03) + (i & 0xC)]);
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
set_co();
return;
}
@ -180,9 +164,9 @@ static void VADDC0w(void)
(unsigned short)(VR[vs][i])
+ (unsigned short)(VR[vt][(0x8 & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
set_co();
return;
}
@ -198,9 +182,9 @@ static void VADDC1w(void)
(unsigned short)(VR[vs][i])
+ (unsigned short)(VR[vt][(0x9 & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
set_co();
return;
}
@ -216,9 +200,9 @@ static void VADDC2w(void)
(unsigned short)(VR[vs][i])
+ (unsigned short)(VR[vt][(0xA & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
set_co();
return;
}
@ -234,9 +218,9 @@ static void VADDC3w(void)
(unsigned short)(VR[vs][i])
+ (unsigned short)(VR[vt][(0xB & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
set_co();
return;
}
@ -252,9 +236,9 @@ static void VADDC4w(void)
(unsigned short)(VR[vs][i])
+ (unsigned short)(VR[vt][(0xC & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
set_co();
return;
}
@ -270,9 +254,9 @@ static void VADDC5w(void)
(unsigned short)(VR[vs][i])
+ (unsigned short)(VR[vt][(0xD & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
set_co();
return;
}
@ -288,9 +272,9 @@ static void VADDC6w(void)
(unsigned short)(VR[vs][i])
+ (unsigned short)(VR[vt][(0xE & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
set_co();
return;
}
@ -306,9 +290,9 @@ static void VADDC7w(void)
(unsigned short)(VR[vs][i])
+ (unsigned short)(VR[vt][(0xF & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
set_co();
return;
}

View File

@ -1,16 +1,5 @@
#include "vu.h"
static void VAND(int vd, int vs, int vt, int e)
{
register int i;
for (i = 0; i < N; i++)
ACC_R(i) = VR[vs][i] & VR_T(i);
for (i = 0; i < N; i++)
ACC_W(i) = ACC_R(i);
return;
}
static void VAND_v(void)
{
register int i;
@ -19,9 +8,9 @@ static void VAND_v(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vs][i] & VR[vt][i];
ACC_L(i) = VR[vs][i] & VR[vt][i];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VAND0q(void)
@ -32,9 +21,9 @@ static void VAND0q(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vs][i] & VR[vt][(0x2 & 01) + (i & 0xE)];
ACC_L(i) = VR[vs][i] & VR[vt][(0x2 & 01) + (i & 0xE)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VAND1q(void)
@ -45,9 +34,9 @@ static void VAND1q(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vs][i] & VR[vt][(0x3 & 01) + (i & 0xE)];
ACC_L(i) = VR[vs][i] & VR[vt][(0x3 & 01) + (i & 0xE)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VAND0h(void)
@ -58,9 +47,9 @@ static void VAND0h(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vs][i] & VR[vt][(0x4 & 03) + (i & 0xC)];
ACC_L(i) = VR[vs][i] & VR[vt][(0x4 & 03) + (i & 0xC)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VAND1h(void)
@ -71,9 +60,9 @@ static void VAND1h(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vs][i] & VR[vt][(0x5 & 03) + (i & 0xC)];
ACC_L(i) = VR[vs][i] & VR[vt][(0x5 & 03) + (i & 0xC)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VAND2h(void)
@ -84,9 +73,9 @@ static void VAND2h(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vs][i] & VR[vt][(0x6 & 03) + (i & 0xC)];
ACC_L(i) = VR[vs][i] & VR[vt][(0x6 & 03) + (i & 0xC)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VAND3h(void)
@ -97,9 +86,9 @@ static void VAND3h(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vs][i] & VR[vt][(0x7 & 03) + (i & 0xC)];
ACC_L(i) = VR[vs][i] & VR[vt][(0x7 & 03) + (i & 0xC)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VAND0w(void)
@ -110,9 +99,9 @@ static void VAND0w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vs][i] & VR[vt][(0x8 & 07) + (i & 0x0)];
ACC_L(i) = VR[vs][i] & VR[vt][(0x8 & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VAND1w(void)
@ -123,9 +112,9 @@ static void VAND1w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vs][i] & VR[vt][(0x9 & 07) + (i & 0x0)];
ACC_L(i) = VR[vs][i] & VR[vt][(0x9 & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VAND2w(void)
@ -136,9 +125,9 @@ static void VAND2w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vs][i] & VR[vt][(0xA & 07) + (i & 0x0)];
ACC_L(i) = VR[vs][i] & VR[vt][(0xA & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VAND3w(void)
@ -149,9 +138,9 @@ static void VAND3w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vs][i] & VR[vt][(0xB & 07) + (i & 0x0)];
ACC_L(i) = VR[vs][i] & VR[vt][(0xB & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VAND4w(void)
@ -162,9 +151,9 @@ static void VAND4w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vs][i] & VR[vt][(0xC & 07) + (i & 0x0)];
ACC_L(i) = VR[vs][i] & VR[vt][(0xC & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VAND5w(void)
@ -175,9 +164,9 @@ static void VAND5w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vs][i] & VR[vt][(0xD & 07) + (i & 0x0)];
ACC_L(i) = VR[vs][i] & VR[vt][(0xD & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VAND6w(void)
@ -188,9 +177,9 @@ static void VAND6w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vs][i] & VR[vt][(0xE & 07) + (i & 0x0)];
ACC_L(i) = VR[vs][i] & VR[vt][(0xE & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VAND7w(void)
@ -201,8 +190,8 @@ static void VAND7w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vs][i] & VR[vt][(0xF & 07) + (i & 0x0)];
ACC_L(i) = VR[vs][i] & VR[vt][(0xF & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}

View File

@ -1,45 +1,5 @@
#include "vu.h"
static void VCH(int vd, int vs, int vt, int e)
{
int ge, le, neq;
register int i;
VCO = 0x0000;
VCC = 0x0000;
VCE = 0x00;
for (i = 0; i < N; i++)
{
const signed short VS = VR[vs][i];
const signed short VT = VR_T(i);
const int sn = (VS ^ VT) < 0; /* sn = (unsigned short)(VS ^ VT) >> 15 */
if (sn)
{
ge = (VT < 0);
le = (VS + VT <= 0);
neq = (VS + VT == -1); /* compare extension */
VCE |= neq << i;
neq ^= !(VS + VT == 0); /* !(x | y) = x ^ !(y), if (x & y) != 1 */
ACC_R(i) = le ? -VT : VS;
VCO |= (neq <<= (i + 0x8)) | (sn << (i + 0x0)); /* sn = 1 */
}
else
{
le = (VT < 0);
ge = (VS - VT >= 0);
neq = !(VS - VT == 0);
VCE |= 0x00 << i;
ACC_R(i) = ge ? VT : VS;
VCO |= (neq <<= (i + 0x8)) | (sn << (i + 0x0)); /* sn = 0 */
}
VCC |= (ge <<= (i + 0x8)) | (le <<= (i + 0x0));
}
for (i = 0; i < N; i++)
ACC_W(i) = ACC_R(i);
return;
}
void do_ch(int vs)
{
int eq[8], neq[8], vce[8];
@ -73,7 +33,7 @@ void do_ch(int vs)
for (i = 0; i < N; i++)
ge[i] = sn[i] ? (VC[i] > 0x0000) : (VR[vs][i] >= VC[i]);
for (i = 0; i < N; i++)
VACC[i].s[LO] = (sn[i] ? le[i] : ge[i]) ? VC[i] : VR[vs][i];
ACC_L(i) = (sn[i] ? le[i] : ge[i]) ? VC[i] : VR[vs][i];
VCC = 0x0000;
for (i = 0; i < N; i++)
@ -98,7 +58,7 @@ static void VCH_v(void)
VC[i] = VR[vt][i];
do_ch(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VCH0q(void)
@ -112,7 +72,7 @@ static void VCH0q(void)
VC[i] = VR[vt][(0x2 & 01) + (i & 0xE)];
do_ch(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VCH1q(void)
@ -126,7 +86,7 @@ static void VCH1q(void)
VC[i] = VR[vt][(0x3 & 01) + (i & 0xE)];
do_ch(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VCH0h(void)
@ -140,7 +100,7 @@ static void VCH0h(void)
VC[i] = VR[vt][(0x4 & 03) + (i & 0xC)];
do_ch(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VCH1h(void)
@ -154,7 +114,7 @@ static void VCH1h(void)
VC[i] = VR[vt][(0x5 & 03) + (i & 0xC)];
do_ch(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VCH2h(void)
@ -168,7 +128,7 @@ static void VCH2h(void)
VC[i] = VR[vt][(0x6 & 03) + (i & 0xC)];
do_ch(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VCH3h(void)
@ -182,7 +142,7 @@ static void VCH3h(void)
VC[i] = VR[vt][(0x7 & 03) + (i & 0xC)];
do_ch(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VCH0w(void)
@ -196,7 +156,7 @@ static void VCH0w(void)
VC[i] = VR[vt][(0x8 & 07) + (i & 0x0)];
do_ch(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VCH1w(void)
@ -210,7 +170,7 @@ static void VCH1w(void)
VC[i] = VR[vt][(0x9 & 07) + (i & 0x0)];
do_ch(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VCH2w(void)
@ -224,7 +184,7 @@ static void VCH2w(void)
VC[i] = VR[vt][(0xA & 07) + (i & 0x0)];
do_ch(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VCH3w(void)
@ -238,7 +198,7 @@ static void VCH3w(void)
VC[i] = VR[vt][(0xB & 07) + (i & 0x0)];
do_ch(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VCH4w(void)
@ -252,7 +212,7 @@ static void VCH4w(void)
VC[i] = VR[vt][(0xC & 07) + (i & 0x0)];
do_ch(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VCH5w(void)
@ -266,7 +226,7 @@ static void VCH5w(void)
VC[i] = VR[vt][(0xD & 07) + (i & 0x0)];
do_ch(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VCH6w(void)
@ -280,7 +240,7 @@ static void VCH6w(void)
VC[i] = VR[vt][(0xE & 07) + (i & 0x0)];
do_ch(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VCH7w(void)
@ -294,6 +254,6 @@ static void VCH7w(void)
VC[i] = VR[vt][(0xF & 07) + (i & 0x0)];
do_ch(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}

View File

@ -1,53 +1,5 @@
#include "vu.h"
static void VCL(int vd, int vs, int vt, int e)
{
register const unsigned short VCC_old = VCC;
int ge, le;
register int i;
VCC = 0x0000; /* Undergo the correction phase, factoring old VCC bits. */
for (i = 0; i < N; i++)
{
const unsigned short VS = (unsigned short)VR[vs][i];
const unsigned short VT = (unsigned short)VR_T(i);
const int eq = (~VCO >> (i + 0x8)) & 0x0001; /* !(NOTEQUAL) */
const int sn = (VCO >> (i + 0x0)) & 0x0001; /* CARRY */
le = VCC_old & (0x0001 << i); /* unless (eq & sn) */
ge = VCC_old & (0x0100 << i); /* unless (eq & !sn) */
if (sn)
{
if (eq)
{
const int sum = VS + VT;
const int ce = (VCE >> i) & 0x01;
int lz = ((sum & 0x0000FFFF) == 0x00000000);
int uz = ((sum & 0xFFFF0000) == 0x00000000); /* !carryout */
le = (~ce & (lz & uz)) | (ce & (lz | uz));
le <<= i + 0x0;
}
ACC_R(i) = le ? -VT : VS;
}
else
{
if (eq)
{
ge = (VS - VT >= 0);
ge <<= i + 0x8;
}
ACC_R(i) = ge ? VT : VS;
}
VCC |= ge | le;
}
for (i = 0; i < N; i++)
ACC_W(i) = ACC_R(i);
VCO = 0x0000;
VCE = 0x00;
return;
}
void do_cl(int vs)
{
int eq[8], vce[8];
@ -105,7 +57,7 @@ void do_cl(int vs)
for (i = 0; i < N; i++)
eq[i] = sn[i] ? le[i] : ge[i];
for (i = 0; i < N; i++)
VACC[i].s[LO] = eq[i] ? VC[i] : VR[vs][i];
ACC_L(i) = eq[i] ? VC[i] : VR[vs][i];
VCC = 0x0000;
for (i = 0; i < N; i++)
@ -126,7 +78,7 @@ static void VCL_v(void)
VC[i] = VR[vt][i];
do_cl(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VCL0q(void)
@ -140,7 +92,7 @@ static void VCL0q(void)
VC[i] = VR[vt][(0x2 & 01) + (i & 0xE)];
do_cl(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VCL1q(void)
@ -154,7 +106,7 @@ static void VCL1q(void)
VC[i] = VR[vt][(0x3 & 01) + (i & 0xE)];
do_cl(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VCL0h(void)
@ -168,7 +120,7 @@ static void VCL0h(void)
VC[i] = VR[vt][(0x4 & 03) + (i & 0xC)];
do_cl(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VCL1h(void)
@ -182,7 +134,7 @@ static void VCL1h(void)
VC[i] = VR[vt][(0x5 & 03) + (i & 0xC)];
do_cl(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VCL2h(void)
@ -196,7 +148,7 @@ static void VCL2h(void)
VC[i] = VR[vt][(0x6 & 03) + (i & 0xC)];
do_cl(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VCL3h(void)
@ -210,7 +162,7 @@ static void VCL3h(void)
VC[i] = VR[vt][(0x7 & 03) + (i & 0xC)];
do_cl(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VCL0w(void)
@ -224,7 +176,7 @@ static void VCL0w(void)
VC[i] = VR[vt][(0x8 & 07) + (i & 0x0)];
do_cl(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VCL1w(void)
@ -238,7 +190,7 @@ static void VCL1w(void)
VC[i] = VR[vt][(0x9 & 07) + (i & 0x0)];
do_cl(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VCL2w(void)
@ -252,7 +204,7 @@ static void VCL2w(void)
VC[i] = VR[vt][(0xA & 07) + (i & 0x0)];
do_cl(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VCL3w(void)
@ -266,7 +218,7 @@ static void VCL3w(void)
VC[i] = VR[vt][(0xB & 07) + (i & 0x0)];
do_cl(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VCL4w(void)
@ -280,7 +232,7 @@ static void VCL4w(void)
VC[i] = VR[vt][(0xC & 07) + (i & 0x0)];
do_cl(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VCL5w(void)
@ -294,7 +246,7 @@ static void VCL5w(void)
VC[i] = VR[vt][(0xD & 07) + (i & 0x0)];
do_cl(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VCL6w(void)
@ -308,7 +260,7 @@ static void VCL6w(void)
VC[i] = VR[vt][(0xE & 07) + (i & 0x0)];
do_cl(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VCL7w(void)
@ -322,6 +274,6 @@ static void VCL7w(void)
VC[i] = VR[vt][(0xF & 07) + (i & 0x0)];
do_cl(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}

View File

@ -1,38 +1,5 @@
#include "vu.h"
static void VCR(int vd, int vs, int vt, int e)
{
int ge, le;
register int i;
VCC = 0x0000;
for (i = 0; i < N; i++)
{
const signed short VS = VR[vs][i];
const signed short VT = VR_T(i);
const int sn = (VS ^ VT) < 0; /* sn = (unsigned short)(VS ^ VT) >> 15 */
if (sn)
{
ge = (VT < 0); /* -VT > -0; (-VT - 1) > -1; (~VT) >= 0 */
le = (VS + VT + 1 <= 0); /* VS + VT < 0; VS < -VT: "VS <= ~VT" */
ACC_R(i) = le ? ~VT : VS;
}
else
{
le = (VT < 0);
ge = (VS - VT >= 0); /* VS - VT + 1 > 0; VS > VT - 1: "VS >= VT" */
ACC_R(i) = le ? VT : VS;
}
VCC |= (ge <<= (i + 8)) | (le <<= (i + 0));
}
for (i = 0; i < N; i++)
ACC_W(i) = ACC_R(i);
VCO = 0x0000;
VCE = 0x00;
return;
}
void do_cr(int vs)
{
int ge[8], le[8];
@ -62,7 +29,7 @@ void do_cr(int vs)
for (i = 0; i < N; i++)
VC[i] ^= sn[i]; /* if (sn == ~0) {VT = ~VT;} else {VT = VT;} */
for (i = 0; i < N; i++)
VACC[i].s[LO] = le[i] ? VC[i] : VR[vs][i];
ACC_L(i) = le[i] ? VC[i] : VR[vs][i];
#if (0)
VCC = 0x0000;
for (i = 0; i < N; i++)
@ -92,7 +59,7 @@ static void VCR_v(void)
VC[i] = VR[vt][i];
do_cr(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VCR0q(void)
@ -106,7 +73,7 @@ static void VCR0q(void)
VC[i] = VR[vt][(0x2 & 01) + (i & 0xE)];
do_cr(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VCR1q(void)
@ -120,7 +87,7 @@ static void VCR1q(void)
VC[i] = VR[vt][(0x3 & 01) + (i & 0xE)];
do_cr(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VCR0h(void)
@ -134,7 +101,7 @@ static void VCR0h(void)
VC[i] = VR[vt][(0x4 & 03) + (i & 0xC)];
do_cr(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VCR1h(void)
@ -148,7 +115,7 @@ static void VCR1h(void)
VC[i] = VR[vt][(0x5 & 03) + (i & 0xC)];
do_cr(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VCR2h(void)
@ -162,7 +129,7 @@ static void VCR2h(void)
VC[i] = VR[vt][(0x6 & 03) + (i & 0xC)];
do_cr(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VCR3h(void)
@ -176,7 +143,7 @@ static void VCR3h(void)
VC[i] = VR[vt][(0x7 & 03) + (i & 0xC)];
do_cr(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VCR0w(void)
@ -190,7 +157,7 @@ static void VCR0w(void)
VC[i] = VR[vt][(0x8 & 07) + (i & 0x0)];
do_cr(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VCR1w(void)
@ -204,7 +171,7 @@ static void VCR1w(void)
VC[i] = VR[vt][(0x9 & 07) + (i & 0x0)];
do_cr(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VCR2w(void)
@ -218,7 +185,7 @@ static void VCR2w(void)
VC[i] = VR[vt][(0xA & 07) + (i & 0x0)];
do_cr(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VCR3w(void)
@ -232,7 +199,7 @@ static void VCR3w(void)
VC[i] = VR[vt][(0xB & 07) + (i & 0x0)];
do_cr(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VCR4w(void)
@ -246,7 +213,7 @@ static void VCR4w(void)
VC[i] = VR[vt][(0xC & 07) + (i & 0x0)];
do_cr(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VCR5w(void)
@ -260,7 +227,7 @@ static void VCR5w(void)
VC[i] = VR[vt][(0xD & 07) + (i & 0x0)];
do_cr(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VCR6w(void)
@ -274,7 +241,7 @@ static void VCR6w(void)
VC[i] = VR[vt][(0xE & 07) + (i & 0x0)];
do_cr(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VCR7w(void)
@ -288,6 +255,6 @@ static void VCR7w(void)
VC[i] = VR[vt][(0xF & 07) + (i & 0x0)];
do_cr(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}

View File

@ -1,29 +1,5 @@
#include "vu.h"
static void VEQ(int vd, int vs, int vt, int e)
{
int eq; /* equal, unless (NOTEQUAL) */
register unsigned char VCO_VCE;
register int i;
VCC = 0x0000;
VCO_VCE = ~(unsigned char)(VCO >> 8);
for (i = 0; i < N; i++)
{
const signed short VS = VR[vs][i];
const signed short VT = VR_T(i);
eq = (VCO_VCE >> i) & 0x01;
eq &= (VS == VT);
VCC |= eq <<= i;
ACC_R(i) = VT; /* More accurately, `ACC_R(i) = eq ? VS : VT`. */
}
for (i = 0; i < N; i++)
ACC_W(i) = ACC_R(i);
VCO = 0x0000;
return;
}
void do_eq(int vs)
{
int eq[8];
@ -42,10 +18,10 @@ void do_eq(int vs)
VCC |= 0 << (i + 0x8);
#if (0)
for (i = 0; i < N; i++)
VACC[i].s[LO] = eq[i] ? VR[vs][i] : VC[i]; /* correct but redundant */
ACC_L(i) = eq[i] ? VR[vs][i] : VC[i]; /* correct but redundant */
#else
for (i = 0; i < N; i++)
VACC[i].s[LO] = VC[i];
ACC_L(i) = VC[i];
#endif
return;
}
@ -61,7 +37,7 @@ static void VEQ_v(void)
VC[i] = VR[vt][i];
do_eq(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VEQ0q(void)
@ -75,7 +51,7 @@ static void VEQ0q(void)
VC[i] = VR[vt][(0x2 & 01) + (i & 0xE)];
do_eq(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VEQ1q(void)
@ -89,7 +65,7 @@ static void VEQ1q(void)
VC[i] = VR[vt][(0x3 & 01) + (i & 0xE)];
do_eq(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VEQ0h(void)
@ -103,7 +79,7 @@ static void VEQ0h(void)
VC[i] = VR[vt][(0x4 & 03) + (i & 0xC)];
do_eq(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VEQ1h(void)
@ -117,7 +93,7 @@ static void VEQ1h(void)
VC[i] = VR[vt][(0x5 & 03) + (i & 0xC)];
do_eq(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VEQ2h(void)
@ -131,7 +107,7 @@ static void VEQ2h(void)
VC[i] = VR[vt][(0x6 & 03) + (i & 0xC)];
do_eq(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VEQ3h(void)
@ -145,7 +121,7 @@ static void VEQ3h(void)
VC[i] = VR[vt][(0x7 & 03) + (i & 0xC)];
do_eq(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VEQ0w(void)
@ -159,7 +135,7 @@ static void VEQ0w(void)
VC[i] = VR[vt][(0x8 & 07) + (i & 0x0)];
do_eq(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VEQ1w(void)
@ -173,7 +149,7 @@ static void VEQ1w(void)
VC[i] = VR[vt][(0x9 & 07) + (i & 0x0)];
do_eq(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VEQ2w(void)
@ -187,7 +163,7 @@ static void VEQ2w(void)
VC[i] = VR[vt][(0xA & 07) + (i & 0x0)];
do_eq(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VEQ3w(void)
@ -201,7 +177,7 @@ static void VEQ3w(void)
VC[i] = VR[vt][(0xB & 07) + (i & 0x0)];
do_eq(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VEQ4w(void)
@ -215,7 +191,7 @@ static void VEQ4w(void)
VC[i] = VR[vt][(0xC & 07) + (i & 0x0)];
do_eq(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VEQ5w(void)
@ -229,7 +205,7 @@ static void VEQ5w(void)
VC[i] = VR[vt][(0xD & 07) + (i & 0x0)];
do_eq(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VEQ6w(void)
@ -243,7 +219,7 @@ static void VEQ6w(void)
VC[i] = VR[vt][(0xE & 07) + (i & 0x0)];
do_eq(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VEQ7w(void)
@ -257,6 +233,6 @@ static void VEQ7w(void)
VC[i] = VR[vt][(0xF & 07) + (i & 0x0)];
do_eq(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}

View File

@ -1,30 +1,5 @@
#include "vu.h"
static void VGE(int vd, int vs, int vt, int e)
{
int ge; /* greater than or, unless (CARRY && NOTEQUAL), equal */
register unsigned char VCO_VCE;
register int i;
VCC = 0x0000;
VCO_VCE = ~(unsigned char)(VCO >> 8);
for (i = 0; i < N; i++)
{
const signed short VS = VR[vs][i];
const signed short VT = VR_T(i);
ge = ((~VCO >> i) & 0x0001) | ((VCO_VCE >> i) & 0x01);
ge &= (VS == VT);
ge |= (VS > VT);
VCC |= ge <<= i;
ACC_R(i) = ge ? VS : VT;
}
for (i = 0; i < N; i++)
ACC_W(i) = ACC_R(i);
VCO = 0x0000;
return;
}
void do_ge(int vs)
{
int ge[8];
@ -50,7 +25,7 @@ void do_ge(int vs)
for (i = 0; i < N; i++)
VCC |= 0 << (i + 0x8);
for (i = 0; i < N; i++)
VACC[i].s[LO] = ge[i] ? VR[vs][i] : VC[i];
ACC_L(i) = ge[i] ? VR[vs][i] : VC[i];
return;
}
@ -65,7 +40,7 @@ static void VGE_v(void)
VC[i] = VR[vt][i];
do_ge(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VGE0q(void)
@ -79,7 +54,7 @@ static void VGE0q(void)
VC[i] = VR[vt][(0x2 & 01) + (i & 0xE)];
do_ge(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VGE1q(void)
@ -93,7 +68,7 @@ static void VGE1q(void)
VC[i] = VR[vt][(0x3 & 01) + (i & 0xE)];
do_ge(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VGE0h(void)
@ -107,7 +82,7 @@ static void VGE0h(void)
VC[i] = VR[vt][(0x4 & 03) + (i & 0xC)];
do_ge(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VGE1h(void)
@ -121,7 +96,7 @@ static void VGE1h(void)
VC[i] = VR[vt][(0x5 & 03) + (i & 0xC)];
do_ge(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VGE2h(void)
@ -135,7 +110,7 @@ static void VGE2h(void)
VC[i] = VR[vt][(0x6 & 03) + (i & 0xC)];
do_ge(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VGE3h(void)
@ -149,7 +124,7 @@ static void VGE3h(void)
VC[i] = VR[vt][(0x7 & 03) + (i & 0xC)];
do_ge(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VGE0w(void)
@ -163,7 +138,7 @@ static void VGE0w(void)
VC[i] = VR[vt][(0x8 & 07) + (i & 0x0)];
do_ge(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VGE1w(void)
@ -177,7 +152,7 @@ static void VGE1w(void)
VC[i] = VR[vt][(0x9 & 07) + (i & 0x0)];
do_ge(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VGE2w(void)
@ -191,7 +166,7 @@ static void VGE2w(void)
VC[i] = VR[vt][(0xA & 07) + (i & 0x0)];
do_ge(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VGE3w(void)
@ -205,7 +180,7 @@ static void VGE3w(void)
VC[i] = VR[vt][(0xB & 07) + (i & 0x0)];
do_ge(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VGE4w(void)
@ -219,7 +194,7 @@ static void VGE4w(void)
VC[i] = VR[vt][(0xC & 07) + (i & 0x0)];
do_ge(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VGE5w(void)
@ -233,7 +208,7 @@ static void VGE5w(void)
VC[i] = VR[vt][(0xD & 07) + (i & 0x0)];
do_ge(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VGE6w(void)
@ -247,7 +222,7 @@ static void VGE6w(void)
VC[i] = VR[vt][(0xE & 07) + (i & 0x0)];
do_ge(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VGE7w(void)
@ -261,6 +236,6 @@ static void VGE7w(void)
VC[i] = VR[vt][(0xF & 07) + (i & 0x0)];
do_ge(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}

View File

@ -1,30 +1,5 @@
#include "vu.h"
static void VLT(int vd, int vs, int vt, int e)
{
int lt; /* less than, or if (CARRY && NOTEQUAL), equal */
register unsigned char VCO_VCE;
register int i;
VCC = 0x0000;
VCO_VCE = ~(unsigned char)(VCO >> 8);
for (i = 0; i < N; i++)
{
const signed short VS = VR[vs][i];
const signed short VT = VR_T(i);
lt = ((VCO >> i) & 0x0001) & ((~VCO_VCE >> i) & 0x01);
lt &= (VS == VT);
lt |= (VS < VT);
VCC |= lt <<= i;
ACC_R(i) = lt ? VS : VT;
}
for (i = 0; i < N; i++)
ACC_W(i) = ACC_R(i);
VCO = 0x0000;
return;
}
void do_lt(int vs)
{
int lt[8];
@ -49,7 +24,7 @@ void do_lt(int vs)
for (i = 0; i < N; i++)
VCC |= 0 << (i + 0x8);
for (i = 0; i < N; i++)
VACC[i].s[LO] = lt[i] ? VR[vs][i] : VC[i];
ACC_L(i) = lt[i] ? VR[vs][i] : VC[i];
return;
}
@ -64,7 +39,7 @@ static void VLT_v(void)
VC[i] = VR[vt][i];
do_lt(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VLT0q(void)
@ -78,7 +53,7 @@ static void VLT0q(void)
VC[i] = VR[vt][(0x2 & 01) + (i & 0xE)];
do_lt(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VLT1q(void)
@ -92,7 +67,7 @@ static void VLT1q(void)
VC[i] = VR[vt][(0x3 & 01) + (i & 0xE)];
do_lt(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VLT0h(void)
@ -106,7 +81,7 @@ static void VLT0h(void)
VC[i] = VR[vt][(0x4 & 03) + (i & 0xC)];
do_lt(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VLT1h(void)
@ -120,7 +95,7 @@ static void VLT1h(void)
VC[i] = VR[vt][(0x5 & 03) + (i & 0xC)];
do_lt(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VLT2h(void)
@ -134,7 +109,7 @@ static void VLT2h(void)
VC[i] = VR[vt][(0x6 & 03) + (i & 0xC)];
do_lt(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VLT3h(void)
@ -148,7 +123,7 @@ static void VLT3h(void)
VC[i] = VR[vt][(0x7 & 03) + (i & 0xC)];
do_lt(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VLT0w(void)
@ -162,7 +137,7 @@ static void VLT0w(void)
VC[i] = VR[vt][(0x8 & 07) + (i & 0x0)];
do_lt(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VLT1w(void)
@ -176,7 +151,7 @@ static void VLT1w(void)
VC[i] = VR[vt][(0x9 & 07) + (i & 0x0)];
do_lt(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VLT2w(void)
@ -190,7 +165,7 @@ static void VLT2w(void)
VC[i] = VR[vt][(0xA & 07) + (i & 0x0)];
do_lt(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VLT3w(void)
@ -204,7 +179,7 @@ static void VLT3w(void)
VC[i] = VR[vt][(0xB & 07) + (i & 0x0)];
do_lt(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VLT4w(void)
@ -218,7 +193,7 @@ static void VLT4w(void)
VC[i] = VR[vt][(0xC & 07) + (i & 0x0)];
do_lt(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VLT5w(void)
@ -232,7 +207,7 @@ static void VLT5w(void)
VC[i] = VR[vt][(0xD & 07) + (i & 0x0)];
do_lt(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VLT6w(void)
@ -246,7 +221,7 @@ static void VLT6w(void)
VC[i] = VR[vt][(0xE & 07) + (i & 0x0)];
do_lt(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VLT7w(void)
@ -260,6 +235,6 @@ static void VLT7w(void)
VC[i] = VR[vt][(0xF & 07) + (i & 0x0)];
do_lt(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}

View File

@ -1,192 +1,205 @@
#include "vu.h"
static void VMACF(int vd, int vs, int vt, int e)
INLINE void do_macf(short* VD, short* VS, short* VT)
{
INT64 acc[N];
register int i;
for (i = 0; i < N; i++)
VACC[i].DW += VR[vs][i]*VR_T(i) << 1;
SIGNED_CLAMP(VMUL_PTR, SM_MUL_X);
acc[i] = (VS[i]*VT[i]) << 1;
do_acc(acc);
SIGNED_CLAMP(VD, SM_MUL_X);
return;
}
static void VMACF_v(void)
{
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW += 2*VR[vs][i]*VR[vt][i];
SIGNED_CLAMP(VR[vd], SM_MUL_X);
do_macf(VR[vd], VR[vs], VR[vt]);
return;
}
static void VMACF0q(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0x2 & 01) + (i & 0xE)];
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0x2 & 0x1) + (i & 0xE)];
do_macf(VR[vd], VR[vs], SV);
return;
}
static void VMACF1q(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0x3 & 01) + (i & 0xE)];
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0x3 & 0x1) + (i & 0xE)];
do_macf(VR[vd], VR[vs], SV);
return;
}
static void VMACF0h(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0x4 & 03) + (i & 0xC)];
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0x4 & 0x3) + (i & 0xC)];
do_macf(VR[vd], VR[vs], SV);
return;
}
static void VMACF1h(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0x5 & 03) + (i & 0xC)];
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0x5 & 0x3) + (i & 0xC)];
do_macf(VR[vd], VR[vs], SV);
return;
}
static void VMACF2h(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0x6 & 03) + (i & 0xC)];
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0x6 & 0x3) + (i & 0xC)];
do_macf(VR[vd], VR[vs], SV);
return;
}
static void VMACF3h(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0x7 & 03) + (i & 0xC)];
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0x7 & 0x3) + (i & 0xC)];
do_macf(VR[vd], VR[vs], SV);
return;
}
static void VMACF0w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0x8 & 07) + (i & 0x0)];
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0x8 & 0x7) + (i & 0x0)];
do_macf(VR[vd], VR[vs], SV);
return;
}
static void VMACF1w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0x9 & 07) + (i & 0x0)];
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0x9 & 0x7) + (i & 0x0)];
do_macf(VR[vd], VR[vs], SV);
return;
}
static void VMACF2w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0xA & 07) + (i & 0x0)];
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0xA & 0x7) + (i & 0x0)];
do_macf(VR[vd], VR[vs], SV);
return;
}
static void VMACF3w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0xB & 07) + (i & 0x0)];
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0xB & 0x7) + (i & 0x0)];
do_macf(VR[vd], VR[vs], SV);
return;
}
static void VMACF4w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0xC & 07) + (i & 0x0)];
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0xC & 0x7) + (i & 0x0)];
do_macf(VR[vd], VR[vs], SV);
return;
}
static void VMACF5w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0xD & 07) + (i & 0x0)];
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0xD & 0x7) + (i & 0x0)];
do_macf(VR[vd], VR[vs], SV);
return;
}
static void VMACF6w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0xE & 07) + (i & 0x0)];
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0xE & 0x7) + (i & 0x0)];
do_macf(VR[vd], VR[vs], SV);
return;
}
static void VMACF7w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0xF & 07) + (i & 0x0)];
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0xF & 0x7) + (i & 0x0)];
do_macf(VR[vd], VR[vs], SV);
return;
}

View File

@ -1,31 +1,7 @@
#include "vu.h"
/*
* Note about VMACQ.
*
* Current implementation of VMACQ is experimental.
* It is the surviving op-code of the MPEG-DCT-designated RSP circuitries.
* As such, for it to not be omitted, it is heavily modified from the actual.
*
* It was changed into this newer, archaic algorithm on the hardware.
* Could not find any games using VMACQ, so I gave up waiting for an error
* complaining to me that I should implement it. The below algorithm is in
* conformance to the suggested, explained mode of operation.
*/
static void VMACQ(void)
{
register int i;
const int vd = inst.R.sa;
message("VMACQ", 2); /* untested, any N64 ROMs use this?? */
for (i = 0; i < N; i++)
if (VACC[i].DW & (32 << 16)) /* Bit 21 of acc. must be nonzero. */
continue; /* VACC[i].DW += 0x000000000000; */
else
VACC[i].DW += (VACC[i].s[HI] & 0x8000) ? +32 << 16 : -32 << 16;
for (i = 0; i < N; i++) /* Sign-extend 48-bit to 64-bit supersets. */
VACC[i].HW[03] = (signed short)(VACC[i].s[HI]) >> 15;
SIGNED_CLAMP(VMUL_PTR, SM_MUL_Q);
message("VMACQ\nUnimplemented.", 3); /* untested, any N64 ROMs use this?? */
return;
}

View File

@ -1,6 +1,6 @@
#include "vu.h"
void UNSIGNED_CLAMP(int vd)
INLINE void UNSIGNED_CLAMP(short* VD)
{
register int i;
@ -9,202 +9,215 @@ void UNSIGNED_CLAMP(int vd)
register signed short result;
register short int tmp;
result = VACC[i].s[MD]; /* raw slice before clamping */
tmp = (signed short)(VACC[i].DW >> 31) != 0x0000;
result = ACC_M(i); /* raw slice before clamping */
tmp = (((ACC_H(i) << 1) | !!(ACC_M(i) & 0x8000)) != 0x0000);
result |= -tmp; /* slice overflow */
tmp = VACC[i].s[HI] >> 15; /* Zero- or one-extend. */
tmp = ACC_H(i) >> 15; /* Zero- or one-extend. */
result &= ~tmp; /* slice underflow */
VR[vd][i] = result;
VD[i] = result;
}
}
static void VMACU(int vd, int vs, int vt, int e)
INLINE void do_macu(short* VD, short* VS, short* VT)
{
INT64 acc[N];
register int i;
for (i = 0; i < N; i++)
VACC[i].DW += VR[vs][i]*VR_T(i) << 1;
UNSIGNED_CLAMP(vd);
acc[i] = (VS[i]*VT[i]) << 1;
do_acc(acc);
UNSIGNED_CLAMP(VD);
return;
}
static void VMACU_v(void)
{
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW += 2*VR[vs][i]*VR[vt][i];
UNSIGNED_CLAMP(vd);
do_macu(VR[vd], VR[vs], VR[vt]);
return;
}
static void VMACU0q(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0x2 & 01) + (i & 0xE)];
UNSIGNED_CLAMP(vd);
SV[i] = VR[vt][(0x2 & 0x1) + (i & 0xE)];
do_macu(VR[vd], VR[vs], SV);
return;
}
static void VMACU1q(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0x3 & 01) + (i & 0xE)];
UNSIGNED_CLAMP(vd);
SV[i] = VR[vt][(0x3 & 0x1) + (i & 0xE)];
do_macu(VR[vd], VR[vs], SV);
return;
}
static void VMACU0h(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0x4 & 03) + (i & 0xC)];
UNSIGNED_CLAMP(vd);
SV[i] = VR[vt][(0x4 & 0x3) + (i & 0xC)];
do_macu(VR[vd], VR[vs], SV);
return;
}
static void VMACU1h(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0x5 & 03) + (i & 0xC)];
UNSIGNED_CLAMP(vd);
SV[i] = VR[vt][(0x5 & 0x3) + (i & 0xC)];
do_macu(VR[vd], VR[vs], SV);
return;
}
static void VMACU2h(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0x6 & 03) + (i & 0xC)];
UNSIGNED_CLAMP(vd);
SV[i] = VR[vt][(0x6 & 0x3) + (i & 0xC)];
do_macu(VR[vd], VR[vs], SV);
return;
}
static void VMACU3h(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0x7 & 03) + (i & 0xC)];
UNSIGNED_CLAMP(vd);
SV[i] = VR[vt][(0x7 & 0x3) + (i & 0xC)];
do_macu(VR[vd], VR[vs], SV);
return;
}
static void VMACU0w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0x8 & 07) + (i & 0x0)];
UNSIGNED_CLAMP(vd);
SV[i] = VR[vt][(0x8 & 0x7) + (i & 0x0)];
do_macu(VR[vd], VR[vs], SV);
return;
}
static void VMACU1w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0x9 & 07) + (i & 0x0)];
UNSIGNED_CLAMP(vd);
SV[i] = VR[vt][(0x9 & 0x7) + (i & 0x0)];
do_macu(VR[vd], VR[vs], SV);
return;
}
static void VMACU2w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0xA & 07) + (i & 0x0)];
UNSIGNED_CLAMP(vd);
SV[i] = VR[vt][(0xA & 0x7) + (i & 0x0)];
do_macu(VR[vd], VR[vs], SV);
return;
}
static void VMACU3w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0xB & 07) + (i & 0x0)];
UNSIGNED_CLAMP(vd);
SV[i] = VR[vt][(0xB & 0x7) + (i & 0x0)];
do_macu(VR[vd], VR[vs], SV);
return;
}
static void VMACU4w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0xC & 07) + (i & 0x0)];
UNSIGNED_CLAMP(vd);
SV[i] = VR[vt][(0xC & 0x7) + (i & 0x0)];
do_macu(VR[vd], VR[vs], SV);
return;
}
static void VMACU5w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0xD & 07) + (i & 0x0)];
UNSIGNED_CLAMP(vd);
SV[i] = VR[vt][(0xD & 0x7) + (i & 0x0)];
do_macu(VR[vd], VR[vs], SV);
return;
}
static void VMACU6w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0xE & 07) + (i & 0x0)];
UNSIGNED_CLAMP(vd);
SV[i] = VR[vt][(0xE & 0x7) + (i & 0x0)];
do_macu(VR[vd], VR[vs], SV);
return;
}
static void VMACU7w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW += 2*VR[vs][i]*VR[vt][(0xF & 07) + (i & 0x0)];
UNSIGNED_CLAMP(vd);
SV[i] = VR[vt][(0xF & 0x7) + (i & 0x0)];
do_macu(VR[vd], VR[vs], SV);
return;
}

View File

@ -1,226 +1,207 @@
#include "vu.h"
static void VMADH(int vd, int vs, int vt, int e)
INLINE void do_madh(short* VD, signed short* VS, signed short* VT)
{
register signed long long product;
INT64 acc[N];
register int i;
for (i = 0; i < N; i++)
{
product = VR[vs][i] * VR_T(i);
VACC[i].DW += product << 16;
}
SIGNED_CLAMP(VMUL_PTR, SM_MUL_X);
acc[i] = VS[i] * VT[i];
for (i = 0; i < N; i++)
acc[i] = acc[i] << 16;
do_acc(acc);
SIGNED_CLAMP(VD, SM_MUL_X);
return;
}
static void VMADH_v(void)
{
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
result[i] = VR[vs][i] * VR[vt][i];
for (i = 0; i < N; i++)
VACC[i].DW += (INT64)(result[i]) << 16;
SIGNED_CLAMP(VR[vd], SM_MUL_X);
do_madh(VR[vd], VR[vs], VR[vt]);
return;
}
static void VMADH0q(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
result[i] = VR[vs][i] * VR[vt][(0x2 & 01) + (i & 0xE)];
for (i = 0; i < N; i++)
VACC[i].DW += (INT64)(result[i]) << 16;
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0x2 & 0x1) + (i & 0xE)];
do_madh(VR[vd], VR[vs], SV);
return;
}
static void VMADH1q(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
result[i] = VR[vs][i] * VR[vt][(0x3 & 01) + (i & 0xE)];
for (i = 0; i < N; i++)
VACC[i].DW += (INT64)(result[i]) << 16;
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0x3 & 0x1) + (i & 0xE)];
do_madh(VR[vd], VR[vs], SV);
return;
}
static void VMADH0h(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
result[i] = VR[vs][i] * VR[vt][(0x4 & 03) + (i & 0xC)];
for (i = 0; i < N; i++)
VACC[i].DW += (INT64)(result[i]) << 16;
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0x4 & 0x3) + (i & 0xC)];
do_madh(VR[vd], VR[vs], SV);
return;
}
static void VMADH1h(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
result[i] = VR[vs][i] * VR[vt][(0x5 & 03) + (i & 0xC)];
for (i = 0; i < N; i++)
VACC[i].DW += (INT64)(result[i]) << 16;
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0x5 & 0x3) + (i & 0xC)];
do_madh(VR[vd], VR[vs], SV);
return;
}
static void VMADH2h(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
result[i] = VR[vs][i] * VR[vt][(0x6 & 03) + (i & 0xC)];
for (i = 0; i < N; i++)
VACC[i].DW += (INT64)(result[i]) << 16;
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0x6 & 0x3) + (i & 0xC)];
do_madh(VR[vd], VR[vs], SV);
return;
}
static void VMADH3h(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
result[i] = VR[vs][i] * VR[vt][(0x7 & 03) + (i & 0xC)];
for (i = 0; i < N; i++)
VACC[i].DW += (INT64)(result[i]) << 16;
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0x7 & 0x3) + (i & 0xC)];
do_madh(VR[vd], VR[vs], SV);
return;
}
static void VMADH0w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
result[i] = VR[vs][i] * VR[vt][(0x8 & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VACC[i].DW += (INT64)(result[i]) << 16;
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0x8 & 0x7) + (i & 0x0)];
do_madh(VR[vd], VR[vs], SV);
return;
}
static void VMADH1w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
result[i] = VR[vs][i] * VR[vt][(0x9 & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VACC[i].DW += (INT64)(result[i]) << 16;
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0x9 & 0x7) + (i & 0x0)];
do_madh(VR[vd], VR[vs], SV);
return;
}
static void VMADH2w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
result[i] = VR[vs][i] * VR[vt][(0xA & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VACC[i].DW += (INT64)(result[i]) << 16;
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0xA & 0x7) + (i & 0x0)];
do_madh(VR[vd], VR[vs], SV);
return;
}
static void VMADH3w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
result[i] = VR[vs][i] * VR[vt][(0xB & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VACC[i].DW += (INT64)(result[i]) << 16;
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0xB & 0x7) + (i & 0x0)];
do_madh(VR[vd], VR[vs], SV);
return;
}
static void VMADH4w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
result[i] = VR[vs][i] * VR[vt][(0xC & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VACC[i].DW += (INT64)(result[i]) << 16;
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0xC & 0x7) + (i & 0x0)];
do_madh(VR[vd], VR[vs], SV);
return;
}
static void VMADH5w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
result[i] = VR[vs][i] * VR[vt][(0xD & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VACC[i].DW += (INT64)(result[i]) << 16;
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0xD & 0x7) + (i & 0x0)];
do_madh(VR[vd], VR[vs], SV);
return;
}
static void VMADH6w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
result[i] = VR[vs][i] * VR[vt][(0xE & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VACC[i].DW += (INT64)(result[i]) << 16;
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0xE & 0x7) + (i & 0x0)];
do_madh(VR[vd], VR[vs], SV);
return;
}
static void VMADH7w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
result[i] = VR[vs][i] * VR[vt][(0xF & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VACC[i].DW += (INT64)(result[i]) << 16;
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0xF & 0x7) + (i & 0x0)];
do_madh(VR[vd], VR[vs], SV);
return;
}

View File

@ -1,256 +1,207 @@
#include "vu.h"
static void VMADL(int vd, int vs, int vt, int e)
INLINE void do_madl(short* VD, unsigned short* VS, unsigned short* VT)
{
register unsigned int product;
INT64 acc[N];
register int i;
for (i = 0; i < N; i++)
{
product = (unsigned short)VR[vs][i] * (unsigned short)VR_T(i);
VACC[i].DW += product >> 16;
}
SIGNED_CLAMP(VMUL_PTR, SM_MUL_Z);
acc[i] = VS[i] * VT[i];
for (i = 0; i < N; i++)
acc[i] = acc[i] >> 16;
do_acc(acc);
SIGNED_CLAMP(VD, SM_MUL_Z);
return;
}
static void VMADL_v(void)
{
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
MUDL_acc[i].W =
(unsigned short)(VR[vs][i])
* (unsigned short)(VR[vt][i]);
for (i = 0; i < N; i++)
VACC[i].DW += MUDL_acc[i].H[1];
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
do_madl(VR[vd], VR[vs], VR[vt]);
return;
}
static void VMADL0q(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
MUDL_acc[i].W =
(unsigned short)(VR[vs][i])
* (unsigned short)(VR[vt][(0x2 & 01) + (i & 0xE)]);
for (i = 0; i < N; i++)
VACC[i].DW += MUDL_acc[i].H[1];
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
SV[i] = VR[vt][(0x2 & 0x1) + (i & 0xE)];
do_madl(VR[vd], VR[vs], SV);
return;
}
static void VMADL1q(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
MUDL_acc[i].W =
(unsigned short)(VR[vs][i])
* (unsigned short)(VR[vt][(0x3 & 01) + (i & 0xE)]);
for (i = 0; i < N; i++)
VACC[i].DW += MUDL_acc[i].H[1];
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
SV[i] = VR[vt][(0x3 & 0x1) + (i & 0xE)];
do_madl(VR[vd], VR[vs], SV);
return;
}
static void VMADL0h(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
MUDL_acc[i].W =
(unsigned short)(VR[vs][i])
* (unsigned short)(VR[vt][(0x4 & 03) + (i & 0xC)]);
for (i = 0; i < N; i++)
VACC[i].DW += MUDL_acc[i].H[1];
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
SV[i] = VR[vt][(0x4 & 0x3) + (i & 0xC)];
do_madl(VR[vd], VR[vs], SV);
return;
}
static void VMADL1h(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
MUDL_acc[i].W =
(unsigned short)(VR[vs][i])
* (unsigned short)(VR[vt][(0x5 & 03) + (i & 0xC)]);
for (i = 0; i < N; i++)
VACC[i].DW += MUDL_acc[i].H[1];
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
SV[i] = VR[vt][(0x5 & 0x3) + (i & 0xC)];
do_madl(VR[vd], VR[vs], SV);
return;
}
static void VMADL2h(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
MUDL_acc[i].W =
(unsigned short)(VR[vs][i])
* (unsigned short)(VR[vt][(0x6 & 03) + (i & 0xC)]);
for (i = 0; i < N; i++)
VACC[i].DW += MUDL_acc[i].H[1];
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
SV[i] = VR[vt][(0x6 & 0x3) + (i & 0xC)];
do_madl(VR[vd], VR[vs], SV);
return;
}
static void VMADL3h(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
MUDL_acc[i].W =
(unsigned short)(VR[vs][i])
* (unsigned short)(VR[vt][(0x7 & 03) + (i & 0xC)]);
for (i = 0; i < N; i++)
VACC[i].DW += MUDL_acc[i].H[1];
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
SV[i] = VR[vt][(0x7 & 0x3) + (i & 0xC)];
do_madl(VR[vd], VR[vs], SV);
return;
}
static void VMADL0w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
MUDL_acc[i].W =
(unsigned short)(VR[vs][i])
* (unsigned short)(VR[vt][(0x8 & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VACC[i].DW += MUDL_acc[i].H[1];
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
SV[i] = VR[vt][(0x8 & 0x7) + (i & 0x0)];
do_madl(VR[vd], VR[vs], SV);
return;
}
static void VMADL1w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
MUDL_acc[i].W =
(unsigned short)(VR[vs][i])
* (unsigned short)(VR[vt][(0x9 & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VACC[i].DW += MUDL_acc[i].H[1];
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
SV[i] = VR[vt][(0x9 & 0x7) + (i & 0x0)];
do_madl(VR[vd], VR[vs], SV);
return;
}
static void VMADL2w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
MUDL_acc[i].W =
(unsigned short)(VR[vs][i])
* (unsigned short)(VR[vt][(0xA & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VACC[i].DW += MUDL_acc[i].H[1];
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
SV[i] = VR[vt][(0xA & 0x7) + (i & 0x0)];
do_madl(VR[vd], VR[vs], SV);
return;
}
static void VMADL3w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
MUDL_acc[i].W =
(unsigned short)(VR[vs][i])
* (unsigned short)(VR[vt][(0xB & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VACC[i].DW += MUDL_acc[i].H[1];
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
SV[i] = VR[vt][(0xB & 0x7) + (i & 0x0)];
do_madl(VR[vd], VR[vs], SV);
return;
}
static void VMADL4w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
MUDL_acc[i].W =
(unsigned short)(VR[vs][i])
* (unsigned short)(VR[vt][(0xC & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VACC[i].DW += MUDL_acc[i].H[1];
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
SV[i] = VR[vt][(0xC & 0x7) + (i & 0x0)];
do_madl(VR[vd], VR[vs], SV);
return;
}
static void VMADL5w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
MUDL_acc[i].W =
(unsigned short)(VR[vs][i])
* (unsigned short)(VR[vt][(0xD & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VACC[i].DW += MUDL_acc[i].H[1];
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
SV[i] = VR[vt][(0xD & 0x7) + (i & 0x0)];
do_madl(VR[vd], VR[vs], SV);
return;
}
static void VMADL6w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
MUDL_acc[i].W =
(unsigned short)(VR[vs][i])
* (unsigned short)(VR[vt][(0xE & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VACC[i].DW += MUDL_acc[i].H[1];
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
SV[i] = VR[vt][(0xE & 0x7) + (i & 0x0)];
do_madl(VR[vd], VR[vs], SV);
return;
}
static void VMADL7w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
MUDL_acc[i].W =
(unsigned short)(VR[vs][i])
* (unsigned short)(VR[vt][(0xF & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VACC[i].DW += MUDL_acc[i].H[1];
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
SV[i] = VR[vt][(0xF & 0x7) + (i & 0x0)];
do_madl(VR[vd], VR[vs], SV);
return;
}

View File

@ -1,250 +1,205 @@
#include "vu.h"
static void VMADM(int vd, int vs, int vt, int e)
INLINE void do_madm(short* VD, signed short* VS, unsigned short* VT)
{
INT64 acc[N];
register int i;
for (i = 0; i < N; i++)
VACC[i].DW += VR[vs][i] * (unsigned short)VR_T(i);
SIGNED_CLAMP(VMUL_PTR, SM_MUL_X);
acc[i] = VS[i] * VT[i];
do_acc(acc);
SIGNED_CLAMP(VD, SM_MUL_X);
return;
}
static void VMADM_v(void)
{
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
result[i] = VR[vs][i] * (unsigned short)(VR[vt][i]);
for (i = 0; i < N; i++)
VACC[i].DW += result[i];
SIGNED_CLAMP(VR[vd], SM_MUL_X);
do_madm(VR[vd], VR[vs], VR[vt]);
return;
}
static void VMADM0q(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
result[i] =
VR[vs][i]
* (unsigned short)(VR[vt][(0x2 & 01) + (i & 0xE)]);
for (i = 0; i < N; i++)
VACC[i].DW += result[i];
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0x2 & 0x1) + (i & 0xE)];
do_madm(VR[vd], VR[vs], SV);
return;
}
static void VMADM1q(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
result[i] =
VR[vs][i]
* (unsigned short)(VR[vt][(0x3 & 01) + (i & 0xE)]);
for (i = 0; i < N; i++)
VACC[i].DW += result[i];
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0x3 & 0x1) + (i & 0xE)];
do_madm(VR[vd], VR[vs], SV);
return;
}
static void VMADM0h(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
result[i] =
VR[vs][i]
* (unsigned short)(VR[vt][(0x4 & 03) + (i & 0xC)]);
for (i = 0; i < N; i++)
VACC[i].DW += result[i];
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0x4 & 0x3) + (i & 0xC)];
do_madm(VR[vd], VR[vs], SV);
return;
}
static void VMADM1h(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
result[i] =
VR[vs][i]
* (unsigned short)(VR[vt][(0x5 & 03) + (i & 0xC)]);
for (i = 0; i < N; i++)
VACC[i].DW += result[i];
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0x5 & 0x3) + (i & 0xC)];
do_madm(VR[vd], VR[vs], SV);
return;
}
static void VMADM2h(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
result[i] =
VR[vs][i]
* (unsigned short)(VR[vt][(0x6 & 03) + (i & 0xC)]);
for (i = 0; i < N; i++)
VACC[i].DW += result[i];
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0x6 & 0x3) + (i & 0xC)];
do_madm(VR[vd], VR[vs], SV);
return;
}
static void VMADM3h(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
result[i] =
VR[vs][i]
* (unsigned short)(VR[vt][(0x7 & 03) + (i & 0xC)]);
for (i = 0; i < N; i++)
VACC[i].DW += result[i];
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0x7 & 0x3) + (i & 0xC)];
do_madm(VR[vd], VR[vs], SV);
return;
}
static void VMADM0w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
result[i] =
VR[vs][i]
* (unsigned short)(VR[vt][(0x8 & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VACC[i].DW += result[i];
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0x8 & 0x7) + (i & 0x0)];
do_madm(VR[vd], VR[vs], SV);
return;
}
static void VMADM1w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
result[i] =
VR[vs][i]
* (unsigned short)(VR[vt][(0x9 & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VACC[i].DW += result[i];
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0x9 & 0x7) + (i & 0x0)];
do_madm(VR[vd], VR[vs], SV);
return;
}
static void VMADM2w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
result[i] =
VR[vs][i]
* (unsigned short)(VR[vt][(0xA & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VACC[i].DW += result[i];
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0xA & 0x7) + (i & 0x0)];
do_madm(VR[vd], VR[vs], SV);
return;
}
static void VMADM3w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
result[i] =
VR[vs][i]
* (unsigned short)(VR[vt][(0xB & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VACC[i].DW += result[i];
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0xB & 0x7) + (i & 0x0)];
do_madm(VR[vd], VR[vs], SV);
return;
}
static void VMADM4w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
result[i] =
VR[vs][i]
* (unsigned short)(VR[vt][(0xC & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VACC[i].DW += result[i];
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0xC & 0x7) + (i & 0x0)];
do_madm(VR[vd], VR[vs], SV);
return;
}
static void VMADM5w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
result[i] =
VR[vs][i]
* (unsigned short)(VR[vt][(0xD & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VACC[i].DW += result[i];
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0xD & 0x7) + (i & 0x0)];
do_madm(VR[vd], VR[vs], SV);
return;
}
static void VMADM6w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
result[i] =
VR[vs][i]
* (unsigned short)(VR[vt][(0xE & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VACC[i].DW += result[i];
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0xE & 0x7) + (i & 0x0)];
do_madm(VR[vd], VR[vs], SV);
return;
}
static void VMADM7w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
result[i] =
VR[vs][i]
* (unsigned short)(VR[vt][(0xF & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VACC[i].DW += result[i];
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0xF & 0x7) + (i & 0x0)];
do_madm(VR[vd], VR[vs], SV);
return;
}

View File

@ -1,250 +1,205 @@
#include "vu.h"
static void VMADN(int vd, int vs, int vt, int e)
INLINE void do_madn(short* VD, unsigned short* VS, signed short* VT)
{
INT64 acc[N];
register int i;
for (i = 0; i < N; i++)
VACC[i].DW += (unsigned short)VR[vs][i] * VR_T(i);
SIGNED_CLAMP(VMUL_PTR, SM_MUL_Z);
acc[i] = VS[i] * VT[i];
do_acc(acc);
SIGNED_CLAMP(VD, SM_MUL_Z);
return;
}
static void VMADN_v(void)
{
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
result[i] = (unsigned short)(VR[vs][i]) * VR[vt][i];
for (i = 0; i < N; i++)
VACC[i].DW += result[i];
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
do_madn(VR[vd], VR[vs], VR[vt]);
return;
}
static void VMADN0q(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
result[i] =
(unsigned short)(VR[vs][i])
* VR[vt][(0x2 & 01) + (i & 0xE)];
for (i = 0; i < N; i++)
VACC[i].DW += result[i];
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
SV[i] = VR[vt][(0x2 & 0x1) + (i & 0xE)];
do_madn(VR[vd], VR[vs], SV);
return;
}
static void VMADN1q(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
result[i] =
(unsigned short)(VR[vs][i])
* VR[vt][(0x3 & 01) + (i & 0xE)];
for (i = 0; i < N; i++)
VACC[i].DW += result[i];
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
SV[i] = VR[vt][(0x3 & 0x1) + (i & 0xE)];
do_madn(VR[vd], VR[vs], SV);
return;
}
static void VMADN0h(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
result[i] =
(unsigned short)(VR[vs][i])
* VR[vt][(0x4 & 03) + (i & 0xC)];
for (i = 0; i < N; i++)
VACC[i].DW += result[i];
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
SV[i] = VR[vt][(0x4 & 0x3) + (i & 0xC)];
do_madn(VR[vd], VR[vs], SV);
return;
}
static void VMADN1h(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
result[i] =
(unsigned short)(VR[vs][i])
* VR[vt][(0x5 & 03) + (i & 0xC)];
for (i = 0; i < N; i++)
VACC[i].DW += result[i];
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
SV[i] = VR[vt][(0x5 & 0x3) + (i & 0xC)];
do_madn(VR[vd], VR[vs], SV);
return;
}
static void VMADN2h(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
result[i] =
(unsigned short)(VR[vs][i])
* VR[vt][(0x6 & 03) + (i & 0xC)];
for (i = 0; i < N; i++)
VACC[i].DW += result[i];
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
SV[i] = VR[vt][(0x6 & 0x3) + (i & 0xC)];
do_madn(VR[vd], VR[vs], SV);
return;
}
static void VMADN3h(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
result[i] =
(unsigned short)(VR[vs][i])
* VR[vt][(0x7 & 03) + (i & 0xC)];
for (i = 0; i < N; i++)
VACC[i].DW += result[i];
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
SV[i] = VR[vt][(0x7 & 0x3) + (i & 0xC)];
do_madn(VR[vd], VR[vs], SV);
return;
}
static void VMADN0w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
result[i] =
(unsigned short)(VR[vs][i])
* VR[vt][(0x8 & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VACC[i].DW += result[i];
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
SV[i] = VR[vt][(0x8 & 0x7) + (i & 0x0)];
do_madn(VR[vd], VR[vs], SV);
return;
}
static void VMADN1w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
result[i] =
(unsigned short)(VR[vs][i])
* VR[vt][(0x9 & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VACC[i].DW += result[i];
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
SV[i] = VR[vt][(0x9 & 0x7) + (i & 0x0)];
do_madn(VR[vd], VR[vs], SV);
return;
}
static void VMADN2w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
result[i] =
(unsigned short)(VR[vs][i])
* VR[vt][(0xA & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VACC[i].DW += result[i];
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
SV[i] = VR[vt][(0xA & 0x7) + (i & 0x0)];
do_madn(VR[vd], VR[vs], SV);
return;
}
static void VMADN3w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
result[i] =
(unsigned short)(VR[vs][i])
* VR[vt][(0xB & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VACC[i].DW += result[i];
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
SV[i] = VR[vt][(0xB & 0x7) + (i & 0x0)];
do_madn(VR[vd], VR[vs], SV);
return;
}
static void VMADN4w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
result[i] =
(unsigned short)(VR[vs][i])
* VR[vt][(0xC & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VACC[i].DW += result[i];
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
SV[i] = VR[vt][(0xC & 0x7) + (i & 0x0)];
do_madn(VR[vd], VR[vs], SV);
return;
}
static void VMADN5w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
result[i] =
(unsigned short)(VR[vs][i])
* VR[vt][(0xD & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VACC[i].DW += result[i];
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
SV[i] = VR[vt][(0xD & 0x7) + (i & 0x0)];
do_madn(VR[vd], VR[vs], SV);
return;
}
static void VMADN6w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
result[i] =
(unsigned short)(VR[vs][i])
* VR[vt][(0xE & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VACC[i].DW += result[i];
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
SV[i] = VR[vt][(0xE & 0x7) + (i & 0x0)];
do_madn(VR[vd], VR[vs], SV);
return;
}
static void VMADN7w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
result[i] =
(unsigned short)(VR[vs][i])
* VR[vt][(0xF & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VACC[i].DW += result[i];
SIGNED_CLAMP(VR[vd], SM_MUL_Z);
SV[i] = VR[vt][(0xF & 0x7) + (i & 0x0)];
do_madn(VR[vd], VR[vs], SV);
return;
}

View File

@ -1,16 +1,5 @@
#include "vu.h"
static void VMOV(int vd, int de, int vt, int e)
{
register int i;
/* MovIn = (int)VR[vt][e & 07]; */
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR_T(i);
VR_D(de &= 07) = VACC[de].s[LO];
return;
}
static void VMOVv0(void)
{
register int i;
@ -19,8 +8,8 @@ static void VMOVv0(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0x0 & 0x0) + (i & 0x7)];
VR[vd][de] = VACC[00].s[LO];
ACC_L(i) = VR[vt][(0x0 & 0x0) + (i & 0x7)];
VR[vd][de] = ACC_L(00);
return;
}
static void VMOVv1(void)
@ -31,8 +20,8 @@ static void VMOVv1(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0x1 & 0x0) + (i & 0x7)];
VR[vd][de] = VACC[01].s[LO];
ACC_L(i) = VR[vt][(0x1 & 0x0) + (i & 0x7)];
VR[vd][de] = ACC_L(01);
return;
}
static void VMOV0q(void)
@ -43,8 +32,8 @@ static void VMOV0q(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0x2 & 0x1) + (i & 0xE)];
VR[vd][de] = VACC[02].s[LO];
ACC_L(i) = VR[vt][(0x2 & 0x1) + (i & 0xE)];
VR[vd][de] = ACC_L(02);
return;
}
static void VMOV1q(void)
@ -55,8 +44,8 @@ static void VMOV1q(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0x3 & 0x1) + (i & 0xE)];
VR[vd][de] = VACC[03].s[LO];
ACC_L(i) = VR[vt][(0x3 & 0x1) + (i & 0xE)];
VR[vd][de] = ACC_L(03);
return;
}
static void VMOV0h(void)
@ -67,8 +56,8 @@ static void VMOV0h(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0x4 & 0x3) + (i & 0xC)];
VR[vd][de] = VACC[04].s[LO];
ACC_L(i) = VR[vt][(0x4 & 0x3) + (i & 0xC)];
VR[vd][de] = ACC_L(04);
return;
}
static void VMOV1h(void)
@ -79,8 +68,8 @@ static void VMOV1h(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0x5 & 0x3) + (i & 0xC)];
VR[vd][de] = VACC[05].s[LO];
ACC_L(i) = VR[vt][(0x5 & 0x3) + (i & 0xC)];
VR[vd][de] = ACC_L(05);
return;
}
static void VMOV2h(void)
@ -91,8 +80,8 @@ static void VMOV2h(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0x6 & 0x3) + (i & 0xC)];
VR[vd][de] = VACC[06].s[LO];
ACC_L(i) = VR[vt][(0x6 & 0x3) + (i & 0xC)];
VR[vd][de] = ACC_L(06);
return;
}
static void VMOV3h(void)
@ -103,8 +92,8 @@ static void VMOV3h(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0x7 & 0x3) + (i & 0xC)];
VR[vd][de] = VACC[07].s[LO];
ACC_L(i) = VR[vt][(0x7 & 0x3) + (i & 0xC)];
VR[vd][de] = ACC_L(07);
return;
}
static void VMOV0w(void)
@ -115,8 +104,8 @@ static void VMOV0w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0x8 & 0x7) + (i & 0x0)];
VR[vd][de] = VACC[00].s[LO];
ACC_L(i) = VR[vt][(0x8 & 0x7) + (i & 0x0)];
VR[vd][de] = ACC_L(00);
return;
}
static void VMOV1w(void)
@ -127,8 +116,8 @@ static void VMOV1w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0x9 & 0x7) + (i & 0x0)];
VR[vd][de] = VACC[01].s[LO];
ACC_L(i) = VR[vt][(0x9 & 0x7) + (i & 0x0)];
VR[vd][de] = ACC_L(01);
return;
}
static void VMOV2w(void)
@ -139,8 +128,8 @@ static void VMOV2w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0xA & 0x7) + (i & 0x0)];
VR[vd][de] = VACC[02].s[LO];
ACC_L(i) = VR[vt][(0xA & 0x7) + (i & 0x0)];
VR[vd][de] = ACC_L(02);
return;
}
static void VMOV3w(void)
@ -151,8 +140,8 @@ static void VMOV3w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0xB & 0x7) + (i & 0x0)];
VR[vd][de] = VACC[03].s[LO];
ACC_L(i) = VR[vt][(0xB & 0x7) + (i & 0x0)];
VR[vd][de] = ACC_L(03);
return;
}
static void VMOV4w(void)
@ -163,8 +152,8 @@ static void VMOV4w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0xC & 0x7) + (i & 0x0)];
VR[vd][de] = VACC[04].s[LO];
ACC_L(i) = VR[vt][(0xC & 0x7) + (i & 0x0)];
VR[vd][de] = ACC_L(04);
return;
}
static void VMOV5w(void)
@ -175,8 +164,8 @@ static void VMOV5w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0xD & 0x7) + (i & 0x0)];
VR[vd][de] = VACC[05].s[LO];
ACC_L(i) = VR[vt][(0xD & 0x7) + (i & 0x0)];
VR[vd][de] = ACC_L(05);
return;
}
static void VMOV6w(void)
@ -187,8 +176,8 @@ static void VMOV6w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0xE & 0x7) + (i & 0x0)];
VR[vd][de] = VACC[06].s[LO];
ACC_L(i) = VR[vt][(0xE & 0x7) + (i & 0x0)];
VR[vd][de] = ACC_L(06);
return;
}
static void VMOV7w(void)
@ -199,7 +188,7 @@ static void VMOV7w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0xF & 0x7) + (i & 0x0)];
VR[vd][de] = VACC[07].s[LO];
ACC_L(i) = VR[vt][(0xF & 0x7) + (i & 0x0)];
VR[vd][de] = ACC_L(07);
return;
}

View File

@ -1,16 +1,5 @@
#include "vu.h"
static void VMRG(int vd, int vs, int vt, int e)
{
register int i;
for (i = 0; i < N; i++)
ACC_R(i) = VCC & (0x0001 << i) ? VR[vs][i] : VR_T(i);
for (i = 0; i < N; i++)
ACC_W(i) = ACC_R(i);
return;
}
void do_mrg(void)
{
int cmp[8];
@ -34,9 +23,9 @@ static void VMRG_v(void)
do_mrg();
for (i = 0; i < N; i++)
VACC[i].s[LO] = result[i] ? VR[vs][i] : VR[vt][i];
ACC_L(i) = result[i] ? VR[vs][i] : VR[vt][i];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VMRG0q(void)
@ -48,9 +37,9 @@ static void VMRG0q(void)
do_mrg();
for (i = 0; i < N; i++)
VACC[i].s[LO] = result[i] ? VR[vs][i] : VR[vt][(0x2 & 01) + (i & 0xE)];
ACC_L(i) = result[i] ? VR[vs][i] : VR[vt][(0x2 & 01) + (i & 0xE)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VMRG1q(void)
@ -62,9 +51,9 @@ static void VMRG1q(void)
do_mrg();
for (i = 0; i < N; i++)
VACC[i].s[LO] = result[i] ? VR[vs][i] : VR[vt][(0x3 & 01) + (i & 0xE)];
ACC_L(i) = result[i] ? VR[vs][i] : VR[vt][(0x3 & 01) + (i & 0xE)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VMRG0h(void)
@ -76,9 +65,9 @@ static void VMRG0h(void)
do_mrg();
for (i = 0; i < N; i++)
VACC[i].s[LO] = result[i] ? VR[vs][i] : VR[vt][(0x4 & 03) + (i & 0xC)];
ACC_L(i) = result[i] ? VR[vs][i] : VR[vt][(0x4 & 03) + (i & 0xC)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VMRG1h(void)
@ -90,9 +79,9 @@ static void VMRG1h(void)
do_mrg();
for (i = 0; i < N; i++)
VACC[i].s[LO] = result[i] ? VR[vs][i] : VR[vt][(0x5 & 03) + (i & 0xC)];
ACC_L(i) = result[i] ? VR[vs][i] : VR[vt][(0x5 & 03) + (i & 0xC)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VMRG2h(void)
@ -104,9 +93,9 @@ static void VMRG2h(void)
do_mrg();
for (i = 0; i < N; i++)
VACC[i].s[LO] = result[i] ? VR[vs][i] : VR[vt][(0x6 & 03) + (i & 0xC)];
ACC_L(i) = result[i] ? VR[vs][i] : VR[vt][(0x6 & 03) + (i & 0xC)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VMRG3h(void)
@ -118,9 +107,9 @@ static void VMRG3h(void)
do_mrg();
for (i = 0; i < N; i++)
VACC[i].s[LO] = result[i] ? VR[vs][i] : VR[vt][(0x7 & 03) + (i & 0xC)];
ACC_L(i) = result[i] ? VR[vs][i] : VR[vt][(0x7 & 03) + (i & 0xC)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VMRG0w(void)
@ -132,9 +121,9 @@ static void VMRG0w(void)
do_mrg();
for (i = 0; i < N; i++)
VACC[i].s[LO] = result[i] ? VR[vs][i] : VR[vt][(0x8 & 07) + (i & 0x0)];
ACC_L(i) = result[i] ? VR[vs][i] : VR[vt][(0x8 & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VMRG1w(void)
@ -146,9 +135,9 @@ static void VMRG1w(void)
do_mrg();
for (i = 0; i < N; i++)
VACC[i].s[LO] = result[i] ? VR[vs][i] : VR[vt][(0x9 & 07) + (i & 0x0)];
ACC_L(i) = result[i] ? VR[vs][i] : VR[vt][(0x9 & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VMRG2w(void)
@ -160,9 +149,9 @@ static void VMRG2w(void)
do_mrg();
for (i = 0; i < N; i++)
VACC[i].s[LO] = result[i] ? VR[vs][i] : VR[vt][(0xA & 07) + (i & 0x0)];
ACC_L(i) = result[i] ? VR[vs][i] : VR[vt][(0xA & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VMRG3w(void)
@ -174,9 +163,9 @@ static void VMRG3w(void)
do_mrg();
for (i = 0; i < N; i++)
VACC[i].s[LO] = result[i] ? VR[vs][i] : VR[vt][(0xB & 07) + (i & 0x0)];
ACC_L(i) = result[i] ? VR[vs][i] : VR[vt][(0xB & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VMRG4w(void)
@ -188,9 +177,9 @@ static void VMRG4w(void)
do_mrg();
for (i = 0; i < N; i++)
VACC[i].s[LO] = result[i] ? VR[vs][i] : VR[vt][(0xC & 07) + (i & 0x0)];
ACC_L(i) = result[i] ? VR[vs][i] : VR[vt][(0xC & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VMRG5w(void)
@ -202,9 +191,9 @@ static void VMRG5w(void)
do_mrg();
for (i = 0; i < N; i++)
VACC[i].s[LO] = result[i] ? VR[vs][i] : VR[vt][(0xD & 07) + (i & 0x0)];
ACC_L(i) = result[i] ? VR[vs][i] : VR[vt][(0xD & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VMRG6w(void)
@ -216,9 +205,9 @@ static void VMRG6w(void)
do_mrg();
for (i = 0; i < N; i++)
VACC[i].s[LO] = result[i] ? VR[vs][i] : VR[vt][(0xE & 07) + (i & 0x0)];
ACC_L(i) = result[i] ? VR[vs][i] : VR[vt][(0xE & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VMRG7w(void)
@ -230,8 +219,8 @@ static void VMRG7w(void)
do_mrg();
for (i = 0; i < N; i++)
VACC[i].s[LO] = result[i] ? VR[vs][i] : VR[vt][(0xF & 07) + (i & 0x0)];
ACC_L(i) = result[i] ? VR[vs][i] : VR[vt][(0xF & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}

View File

@ -1,225 +1,210 @@
#include "vu.h"
static void VMUDH(int vd, int vs, int vt, int e)
INLINE void do_mudh(short* VD, signed short* VS, signed short* VT)
{
long acc[N];
register int i;
for (i = 0; i < N; i++)
{
VACC[i].DW = VR[vs][i] * VR_T(i);
VACC[i].DW <<= 16;
}
SIGNED_CLAMP(VMUL_PTR, SM_MUL_X);
acc[i] = VS[i] * VT[i];
for (i = 0; i < N; i++)
ACC_H(i) = (acc[i] >> 16);
for (i = 0; i < N; i++)
ACC_M(i) = (short)(acc[i]);
for (i = 0; i < N; i++)
ACC_L(i) = 0x0000;
SIGNED_CLAMP(VD, SM_MUL_X);
return;
}
static void VMUDH_v(void)
{
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = VR[vs][i] * VR[vt][i];
for (i = 0; i < N; i++)
VACC[i].DW <<= 16;
SIGNED_CLAMP(VR[vd], SM_MUL_X);
do_mudh(VR[vd], VR[vs], VR[vt]);
return;
}
static void VMUDH0q(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = VR[vs][i] * VR[vt][(0x2 & 01) + (i & 0xE)];
for (i = 0; i < N; i++)
VACC[i].DW <<= 16;
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0x2 & 0x1) + (i & 0xE)];
do_mudh(VR[vd], VR[vs], SV);
return;
}
static void VMUDH1q(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = VR[vs][i] * VR[vt][(0x3 & 01) + (i & 0xE)];
for (i = 0; i < N; i++)
VACC[i].DW <<= 16;
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0x3 & 0x1) + (i & 0xE)];
do_mudh(VR[vd], VR[vs], SV);
return;
}
static void VMUDH0h(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = VR[vs][i] * VR[vt][(0x4 & 03) + (i & 0xC)];
for (i = 0; i < N; i++)
VACC[i].DW <<= 16;
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0x4 & 0x3) + (i & 0xC)];
do_mudh(VR[vd], VR[vs], SV);
return;
}
static void VMUDH1h(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = VR[vs][i] * VR[vt][(0x5 & 03) + (i & 0xC)];
for (i = 0; i < N; i++)
VACC[i].DW <<= 16;
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0x5 & 0x3) + (i & 0xC)];
do_mudh(VR[vd], VR[vs], SV);
return;
}
static void VMUDH2h(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = VR[vs][i] * VR[vt][(0x6 & 03) + (i & 0xC)];
for (i = 0; i < N; i++)
VACC[i].DW <<= 16;
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0x6 & 0x3) + (i & 0xC)];
do_mudh(VR[vd], VR[vs], SV);
return;
}
static void VMUDH3h(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = VR[vs][i] * VR[vt][(0x7 & 03) + (i & 0xC)];
for (i = 0; i < N; i++)
VACC[i].DW <<= 16;
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0x7 & 0x3) + (i & 0xC)];
do_mudh(VR[vd], VR[vs], SV);
return;
}
static void VMUDH0w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = VR[vs][i] * VR[vt][(0x8 & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VACC[i].DW <<= 16;
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0x8 & 0x7) + (i & 0x0)];
do_mudh(VR[vd], VR[vs], SV);
return;
}
static void VMUDH1w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = VR[vs][i] * VR[vt][(0x9 & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VACC[i].DW <<= 16;
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0x9 & 0x7) + (i & 0x0)];
do_mudh(VR[vd], VR[vs], SV);
return;
}
static void VMUDH2w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = VR[vs][i] * VR[vt][(0xA & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VACC[i].DW <<= 16;
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0xA & 0x7) + (i & 0x0)];
do_mudh(VR[vd], VR[vs], SV);
return;
}
static void VMUDH3w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = VR[vs][i] * VR[vt][(0xB & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VACC[i].DW <<= 16;
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0xB & 0x7) + (i & 0x0)];
do_mudh(VR[vd], VR[vs], SV);
return;
}
static void VMUDH4w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = VR[vs][i] * VR[vt][(0xC & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VACC[i].DW <<= 16;
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0xC & 0x7) + (i & 0x0)];
do_mudh(VR[vd], VR[vs], SV);
return;
}
static void VMUDH5w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = VR[vs][i] * VR[vt][(0xD & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VACC[i].DW <<= 16;
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0xD & 0x7) + (i & 0x0)];
do_mudh(VR[vd], VR[vs], SV);
return;
}
static void VMUDH6w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = VR[vs][i] * VR[vt][(0xE & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VACC[i].DW <<= 16;
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0xE & 0x7) + (i & 0x0)];
do_mudh(VR[vd], VR[vs], SV);
return;
}
static void VMUDH7w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = VR[vs][i] * VR[vt][(0xF & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VACC[i].DW <<= 16;
SIGNED_CLAMP(VR[vd], SM_MUL_X);
SV[i] = VR[vt][(0xF & 0x7) + (i & 0x0)];
do_mudh(VR[vd], VR[vs], SV);
return;
}

View File

@ -1,272 +1,213 @@
#include "vu.h"
static void VMUDL(int vd, int vs, int vt, int e)
INLINE void do_mudl(short* VD, unsigned short* VS, unsigned short* VT)
{
register unsigned int product;
long acc[N];
register int i;
for (i = 0; i < N; i++)
{
product = (unsigned short)VR[vs][i] * (unsigned short)VR_T(i);
VACC[i].DW = product >> 16;
}
for (i = 0; i < N; i++) /* Sign-clamp bits 15..0 of ACC to dest. VR. */
VR_D(i) = VACC[i].s[LO]; /* No arithmetic checks needed. */
acc[i] = VS[i] * VT[i];
for (i = 0; i < N; i++)
acc[i] = acc[i] >> 16;
for (i = 0; i < N; i++)
ACC_H(i) = 0x0000;
for (i = 0; i < N; i++)
ACC_M(i) = 0x0000;
for (i = 0; i < N; i++)
ACC_L(i) = acc[i];
for (i = 0; i < N; i++)
VD[i] = ACC_L(i); /* no possibilities to clamp */
return;
}
static void VMUDL_v(void)
{
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
MUDL_acc[i].W =
(unsigned short)(VR[vs][i])
* (unsigned short)(VR[vt][i]);
for (i = 0; i < N; i++)
VACC[i].DW = MUDL_acc[i].H[1];
for (i = 0; i < N; i++)
VR[vd][i] = MUDL_acc[i].H[1];
do_mudl(VR[vd], VR[vs], VR[vt]);
return;
}
static void VMUDL0q(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
MUDL_acc[i].W =
(unsigned short)(VR[vs][i])
* (unsigned short)(VR[vt][(0x2 & 01) + (i & 0xE)]);
for (i = 0; i < N; i++)
VACC[i].DW = MUDL_acc[i].H[1];
for (i = 0; i < N; i++)
VR[vd][i] = MUDL_acc[i].H[1];
SV[i] = VR[vt][(0x2 & 0x1) + (i & 0xE)];
do_mudl(VR[vd], VR[vs], SV);
return;
}
static void VMUDL1q(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
MUDL_acc[i].W =
(unsigned short)(VR[vs][i])
* (unsigned short)(VR[vt][(0x3 & 01) + (i & 0xE)]);
for (i = 0; i < N; i++)
VACC[i].DW = MUDL_acc[i].H[1];
for (i = 0; i < N; i++)
VR[vd][i] = MUDL_acc[i].H[1];
SV[i] = VR[vt][(0x3 & 0x1) + (i & 0xE)];
do_mudl(VR[vd], VR[vs], SV);
return;
}
static void VMUDL0h(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
MUDL_acc[i].W =
(unsigned short)(VR[vs][i])
* (unsigned short)(VR[vt][(0x4 & 03) + (i & 0xC)]);
for (i = 0; i < N; i++)
VACC[i].DW = MUDL_acc[i].H[1];
for (i = 0; i < N; i++)
VR[vd][i] = MUDL_acc[i].H[1];
SV[i] = VR[vt][(0x4 & 0x3) + (i & 0xC)];
do_mudl(VR[vd], VR[vs], SV);
return;
}
static void VMUDL1h(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
MUDL_acc[i].W =
(unsigned short)(VR[vs][i])
* (unsigned short)(VR[vt][(0x5 & 03) + (i & 0xC)]);
for (i = 0; i < N; i++)
VACC[i].DW = MUDL_acc[i].H[1];
for (i = 0; i < N; i++)
VR[vd][i] = MUDL_acc[i].H[1];
SV[i] = VR[vt][(0x5 & 0x3) + (i & 0xC)];
do_mudl(VR[vd], VR[vs], SV);
return;
}
static void VMUDL2h(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
MUDL_acc[i].W =
(unsigned short)(VR[vs][i])
* (unsigned short)(VR[vt][(0x6 & 03) + (i & 0xC)]);
for (i = 0; i < N; i++)
VACC[i].DW = MUDL_acc[i].H[1];
for (i = 0; i < N; i++)
VR[vd][i] = MUDL_acc[i].H[1];
SV[i] = VR[vt][(0x6 & 0x3) + (i & 0xC)];
do_mudl(VR[vd], VR[vs], SV);
return;
}
static void VMUDL3h(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
MUDL_acc[i].W =
(unsigned short)(VR[vs][i])
* (unsigned short)(VR[vt][(0x7 & 03) + (i & 0xC)]);
for (i = 0; i < N; i++)
VACC[i].DW = MUDL_acc[i].H[1];
for (i = 0; i < N; i++)
VR[vd][i] = MUDL_acc[i].H[1];
SV[i] = VR[vt][(0x7 & 0x3) + (i & 0xC)];
do_mudl(VR[vd], VR[vs], SV);
return;
}
static void VMUDL0w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
MUDL_acc[i].W =
(unsigned short)(VR[vs][i])
* (unsigned short)(VR[vt][(0x8 & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VACC[i].DW = MUDL_acc[i].H[1];
for (i = 0; i < N; i++)
VR[vd][i] = MUDL_acc[i].H[1];
SV[i] = VR[vt][(0x8 & 0x7) + (i & 0x0)];
do_mudl(VR[vd], VR[vs], SV);
return;
}
static void VMUDL1w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
MUDL_acc[i].W =
(unsigned short)(VR[vs][i])
* (unsigned short)(VR[vt][(0x9 & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VACC[i].DW = MUDL_acc[i].H[1];
for (i = 0; i < N; i++)
VR[vd][i] = MUDL_acc[i].H[1];
SV[i] = VR[vt][(0x9 & 0x7) + (i & 0x0)];
do_mudl(VR[vd], VR[vs], SV);
return;
}
static void VMUDL2w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
MUDL_acc[i].W =
(unsigned short)(VR[vs][i])
* (unsigned short)(VR[vt][(0xA & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VACC[i].DW = MUDL_acc[i].H[1];
for (i = 0; i < N; i++)
VR[vd][i] = MUDL_acc[i].H[1];
SV[i] = VR[vt][(0xA & 0x7) + (i & 0x0)];
do_mudl(VR[vd], VR[vs], SV);
return;
}
static void VMUDL3w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
MUDL_acc[i].W =
(unsigned short)(VR[vs][i])
* (unsigned short)(VR[vt][(0xB & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VACC[i].DW = MUDL_acc[i].H[1];
for (i = 0; i < N; i++)
VR[vd][i] = MUDL_acc[i].H[1];
SV[i] = VR[vt][(0xB & 0x7) + (i & 0x0)];
do_mudl(VR[vd], VR[vs], SV);
return;
}
static void VMUDL4w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
MUDL_acc[i].W =
(unsigned short)(VR[vs][i])
* (unsigned short)(VR[vt][(0xC & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VACC[i].DW = MUDL_acc[i].H[1];
for (i = 0; i < N; i++)
VR[vd][i] = MUDL_acc[i].H[1];
SV[i] = VR[vt][(0xC & 0x7) + (i & 0x0)];
do_mudl(VR[vd], VR[vs], SV);
return;
}
static void VMUDL5w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
MUDL_acc[i].W =
(unsigned short)(VR[vs][i])
* (unsigned short)(VR[vt][(0xD & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VACC[i].DW = MUDL_acc[i].H[1];
for (i = 0; i < N; i++)
VR[vd][i] = MUDL_acc[i].H[1];
SV[i] = VR[vt][(0xD & 0x7) + (i & 0x0)];
do_mudl(VR[vd], VR[vs], SV);
return;
}
static void VMUDL6w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
MUDL_acc[i].W =
(unsigned short)(VR[vs][i])
* (unsigned short)(VR[vt][(0xE & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VACC[i].DW = MUDL_acc[i].H[1];
for (i = 0; i < N; i++)
VR[vd][i] = MUDL_acc[i].H[1];
SV[i] = VR[vt][(0xE & 0x7) + (i & 0x0)];
do_mudl(VR[vd], VR[vs], SV);
return;
}
static void VMUDL7w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
MUDL_acc[i].W =
(unsigned short)(VR[vs][i])
* (unsigned short)(VR[vt][(0xF & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VACC[i].DW = MUDL_acc[i].H[1];
for (i = 0; i < N; i++)
VR[vd][i] = MUDL_acc[i].H[1];
SV[i] = VR[vt][(0xF & 0x7) + (i & 0x0)];
do_mudl(VR[vd], VR[vs], SV);
return;
}

View File

@ -1,208 +1,211 @@
#include "vu.h"
static void VMUDM(int vd, int vs, int vt, int e)
INLINE void do_mudm(short* VD, signed short* VS, unsigned short* VT)
{
long acc[N];
register int i;
for (i = 0; i < N; i++)
VACC[i].DW = VR[vs][i] * (unsigned short)VR_T(i);
for (i = 0; i < N; i++) /* Sign-clamp bits 31..16 of ACC to dest. VR. */
VR_D(i) = VACC[i].s[MD]; /* No saturate checks needed. */
acc[i] = VS[i] * VT[i];
for (i = 0; i < N; i++)
ACC_H(i) = VS[i] >> 15;
for (i = 0; i < N; i++)
ACC_M(i) = (acc[i] >> 16);
for (i = 0; i < N; i++)
ACC_L(i) = acc[i];
for (i = 0; i < N; i++)
VD[i] = ACC_M(i); /* no possibilities to clamp */
return;
}
static void VMUDM_v(void)
{
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = VR[vs][i] * (unsigned short)(VR[vt][i]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[MD];
do_mudm(VR[vd], VR[vs], VR[vt]);
return;
}
static void VMUDM0q(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = VR[vs][i] * (unsigned short)VR[vt][(0x2 & 01) + (i & 0xE)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[MD];
SV[i] = VR[vt][(0x2 & 0x1) + (i & 0xE)];
do_mudm(VR[vd], VR[vs], SV);
return;
}
static void VMUDM1q(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = VR[vs][i] * (unsigned short)VR[vt][(0x3 & 01) + (i & 0xE)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[MD];
SV[i] = VR[vt][(0x3 & 0x1) + (i & 0xE)];
do_mudm(VR[vd], VR[vs], SV);
return;
}
static void VMUDM0h(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = VR[vs][i] * (unsigned short)VR[vt][(0x4 & 03) + (i & 0xC)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[MD];
SV[i] = VR[vt][(0x4 & 0x3) + (i & 0xC)];
do_mudm(VR[vd], VR[vs], SV);
return;
}
static void VMUDM1h(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = VR[vs][i] * (unsigned short)VR[vt][(0x5 & 03) + (i & 0xC)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[MD];
SV[i] = VR[vt][(0x5 & 0x3) + (i & 0xC)];
do_mudm(VR[vd], VR[vs], SV);
return;
}
static void VMUDM2h(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = VR[vs][i] * (unsigned short)VR[vt][(0x6 & 03) + (i & 0xC)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[MD];
SV[i] = VR[vt][(0x6 & 0x3) + (i & 0xC)];
do_mudm(VR[vd], VR[vs], SV);
return;
}
static void VMUDM3h(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = VR[vs][i] * (unsigned short)VR[vt][(0x7 & 03) + (i & 0xC)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[MD];
SV[i] = VR[vt][(0x7 & 0x3) + (i & 0xC)];
do_mudm(VR[vd], VR[vs], SV);
return;
}
static void VMUDM0w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = VR[vs][i] * (unsigned short)VR[vt][(0x8 & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[MD];
SV[i] = VR[vt][(0x8 & 0x7) + (i & 0x0)];
do_mudm(VR[vd], VR[vs], SV);
return;
}
static void VMUDM1w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = VR[vs][i] * (unsigned short)VR[vt][(0x9 & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[MD];
SV[i] = VR[vt][(0x9 & 0x7) + (i & 0x0)];
do_mudm(VR[vd], VR[vs], SV);
return;
}
static void VMUDM2w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = VR[vs][i] * (unsigned short)VR[vt][(0xA & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[MD];
SV[i] = VR[vt][(0xA & 0x7) + (i & 0x0)];
do_mudm(VR[vd], VR[vs], SV);
return;
}
static void VMUDM3w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = VR[vs][i] * (unsigned short)VR[vt][(0xB & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[MD];
SV[i] = VR[vt][(0xB & 0x7) + (i & 0x0)];
do_mudm(VR[vd], VR[vs], SV);
return;
}
static void VMUDM4w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = VR[vs][i] * (unsigned short)VR[vt][(0xC & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[MD];
SV[i] = VR[vt][(0xC & 0x7) + (i & 0x0)];
do_mudm(VR[vd], VR[vs], SV);
return;
}
static void VMUDM5w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = VR[vs][i] * (unsigned short)VR[vt][(0xD & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[MD];
SV[i] = VR[vt][(0xD & 0x7) + (i & 0x0)];
do_mudm(VR[vd], VR[vs], SV);
return;
}
static void VMUDM6w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = VR[vs][i] * (unsigned short)VR[vt][(0xE & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[MD];
SV[i] = VR[vt][(0xE & 0x7) + (i & 0x0)];
do_mudm(VR[vd], VR[vs], SV);
return;
}
static void VMUDM7w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = VR[vs][i] * (unsigned short)VR[vt][(0xF & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[MD];
SV[i] = VR[vt][(0xF & 0x7) + (i & 0x0)];
do_mudm(VR[vd], VR[vs], SV);
return;
}

View File

@ -1,208 +1,211 @@
#include "vu.h"
static void VMUDN(int vd, int vs, int vt, int e)
INLINE void do_mudn(short* VD, unsigned short* VS, signed short* VT)
{
long acc[N];
register int i;
for (i = 0; i < N; i++)
VACC[i].DW = (unsigned short)VR[vs][i] * VR_T(i);
for (i = 0; i < N; i++) /* Sign-clamp bits 15..0 of ACC to dest. VR. */
VR_D(i) = VACC[i].s[LO]; /* No arithmetic checks needed. */
acc[i] = VS[i] * VT[i];
for (i = 0; i < N; i++)
ACC_H(i) = VT[i] >> 15;
for (i = 0; i < N; i++)
ACC_M(i) = (acc[i] >> 16);
for (i = 0; i < N; i++)
ACC_L(i) = acc[i];
for (i = 0; i < N; i++)
VD[i] = ACC_L(i); /* no possibilities to clamp */
return;
}
static void VMUDN_v(void)
{
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = (unsigned short)(VR[vs][i]) * VR[vt][i];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
do_mudn(VR[vd], VR[vs], VR[vt]);
return;
}
static void VMUDN0q(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = (unsigned short)VR[vs][i] * VR[vt][(0x2 & 01) + (i & 0xE)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
SV[i] = VR[vt][(0x2 & 0x1) + (i & 0xE)];
do_mudn(VR[vd], VR[vs], SV);
return;
}
static void VMUDN1q(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = (unsigned short)VR[vs][i] * VR[vt][(0x3 & 01) + (i & 0xE)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
SV[i] = VR[vt][(0x3 & 0x1) + (i & 0xE)];
do_mudn(VR[vd], VR[vs], SV);
return;
}
static void VMUDN0h(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = (unsigned short)VR[vs][i] * VR[vt][(0x4 & 03) + (i & 0xC)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
SV[i] = VR[vt][(0x4 & 0x3) + (i & 0xC)];
do_mudn(VR[vd], VR[vs], SV);
return;
}
static void VMUDN1h(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = (unsigned short)VR[vs][i] * VR[vt][(0x5 & 03) + (i & 0xC)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
SV[i] = VR[vt][(0x5 & 0x3) + (i & 0xC)];
do_mudn(VR[vd], VR[vs], SV);
return;
}
static void VMUDN2h(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = (unsigned short)VR[vs][i] * VR[vt][(0x6 & 03) + (i & 0xC)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
SV[i] = VR[vt][(0x6 & 0x3) + (i & 0xC)];
do_mudn(VR[vd], VR[vs], SV);
return;
}
static void VMUDN3h(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = (unsigned short)VR[vs][i] * VR[vt][(0x7 & 03) + (i & 0xC)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
SV[i] = VR[vt][(0x7 & 0x3) + (i & 0xC)];
do_mudn(VR[vd], VR[vs], SV);
return;
}
static void VMUDN0w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = (unsigned short)VR[vs][i] * VR[vt][(0x8 & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
SV[i] = VR[vt][(0x8 & 0x7) + (i & 0x0)];
do_mudn(VR[vd], VR[vs], SV);
return;
}
static void VMUDN1w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = (unsigned short)VR[vs][i] * VR[vt][(0x9 & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
SV[i] = VR[vt][(0x9 & 0x7) + (i & 0x0)];
do_mudn(VR[vd], VR[vs], SV);
return;
}
static void VMUDN2w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = (unsigned short)VR[vs][i] * VR[vt][(0xA & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
SV[i] = VR[vt][(0xA & 0x7) + (i & 0x0)];
do_mudn(VR[vd], VR[vs], SV);
return;
}
static void VMUDN3w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = (unsigned short)VR[vs][i] * VR[vt][(0xB & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
SV[i] = VR[vt][(0xB & 0x7) + (i & 0x0)];
do_mudn(VR[vd], VR[vs], SV);
return;
}
static void VMUDN4w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = (unsigned short)VR[vs][i] * VR[vt][(0xC & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
SV[i] = VR[vt][(0xC & 0x7) + (i & 0x0)];
do_mudn(VR[vd], VR[vs], SV);
return;
}
static void VMUDN5w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = (unsigned short)VR[vs][i] * VR[vt][(0xD & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
SV[i] = VR[vt][(0xD & 0x7) + (i & 0x0)];
do_mudn(VR[vd], VR[vs], SV);
return;
}
static void VMUDN6w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = (unsigned short)VR[vs][i] * VR[vt][(0xE & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
SV[i] = VR[vt][(0xE & 0x7) + (i & 0x0)];
do_mudn(VR[vd], VR[vs], SV);
return;
}
static void VMUDN7w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = (unsigned short)VR[vs][i] * VR[vt][(0xF & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
SV[i] = VR[vt][(0xF & 0x7) + (i & 0x0)];
do_mudn(VR[vd], VR[vs], SV);
return;
}

View File

@ -1,240 +1,215 @@
#include "vu.h"
static void VMULF(int vd, int vs, int vt, int e)
INLINE void do_mulf(short* VD, short* VS, short* VT)
{
long acc[N];
register int i;
for (i = 0; i < N; i++)
VACC[i].DW = (VR[vs][i]*VR_T(i) << 1) + 0x8000;
acc[i] = (VS[i]*VT[i]) << 1;
for (i = 0; i < N; i++)
VR_D(i) = VACC[i].s[MD];
for (i = 0; i < N; i++) /* Only one reachable value can expose overflow. */
VR_D(i) -= !!(VR_D(i) & 0x8000);
acc[i] = acc[i] + 0x8000;
for (i = 0; i < N; i++)
ACC_H(i) = (VS[i] ^ VT[i]) >> 15;
for (i = 0; i < N; i++)
ACC_M(i) = (short)(acc[i] >> 16);
for (i = 0; i < N; i++)
ACC_L(i) = (short)(acc[i] >> 0);
for (i = 0; i < N; i++)
VD[i] = ACC_M(i);
for (i = 0; i < N; i++)
VD[i] = VD[i] - !!(VD[i] & 0x8000); /* only possible product to clamp */
return;
}
static void VMULF_v(void)
{
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = 2*VR[vs][i]*VR[vt][i] + 0x8000;
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[MD];
for (i = 0; i < N; i++)
VR[vd][i] -= !!(VR[vd][i] & 0x8000);
do_mulf(VR[vd], VR[vs], VR[vt]);
return;
}
static void VMULF0q(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0x2 & 01) + (i & 0xE)] + 0x8000;
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[MD];
for (i = 0; i < N; i++)
VR[vd][i] -= !!(VR[vd][i] & 0x8000);
SV[i] = VR[vt][(0x2 & 0x1) + (i & 0xE)];
do_mulf(VR[vd], VR[vs], SV);
return;
}
static void VMULF1q(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0x3 & 01) + (i & 0xE)] + 0x8000;
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[MD];
for (i = 0; i < N; i++)
VR[vd][i] -= !!(VR[vd][i] & 0x8000);
SV[i] = VR[vt][(0x3 & 0x1) + (i & 0xE)];
do_mulf(VR[vd], VR[vs], SV);
return;
}
static void VMULF0h(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0x4 & 03) + (i & 0xC)] + 0x8000;
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[MD];
for (i = 0; i < N; i++)
VR[vd][i] -= !!(VR[vd][i] & 0x8000);
SV[i] = VR[vt][(0x4 & 0x3) + (i & 0xC)];
do_mulf(VR[vd], VR[vs], SV);
return;
}
static void VMULF1h(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0x5 & 03) + (i & 0xC)] + 0x8000;
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[MD];
for (i = 0; i < N; i++)
VR[vd][i] -= !!(VR[vd][i] & 0x8000);
SV[i] = VR[vt][(0x5 & 0x3) + (i & 0xC)];
do_mulf(VR[vd], VR[vs], SV);
return;
}
static void VMULF2h(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0x6 & 03) + (i & 0xC)] + 0x8000;
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[MD];
for (i = 0; i < N; i++)
VR[vd][i] -= !!(VR[vd][i] & 0x8000);
SV[i] = VR[vt][(0x6 & 0x3) + (i & 0xC)];
do_mulf(VR[vd], VR[vs], SV);
return;
}
static void VMULF3h(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0x7 & 03) + (i & 0xC)] + 0x8000;
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[MD];
for (i = 0; i < N; i++)
VR[vd][i] -= !!(VR[vd][i] & 0x8000);
SV[i] = VR[vt][(0x7 & 0x3) + (i & 0xC)];
do_mulf(VR[vd], VR[vs], SV);
return;
}
static void VMULF0w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0x8 & 07) + (i & 0x0)] + 0x8000;
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[MD];
for (i = 0; i < N; i++)
VR[vd][i] -= !!(VR[vd][i] & 0x8000);
SV[i] = VR[vt][(0x8 & 0x7) + (i & 0x0)];
do_mulf(VR[vd], VR[vs], SV);
return;
}
static void VMULF1w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0x9 & 07) + (i & 0x0)] + 0x8000;
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[MD];
for (i = 0; i < N; i++)
VR[vd][i] -= !!(VR[vd][i] & 0x8000);
SV[i] = VR[vt][(0x9 & 0x7) + (i & 0x0)];
do_mulf(VR[vd], VR[vs], SV);
return;
}
static void VMULF2w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0xA & 07) + (i & 0x0)] + 0x8000;
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[MD];
for (i = 0; i < N; i++)
VR[vd][i] -= !!(VR[vd][i] & 0x8000);
SV[i] = VR[vt][(0xA & 0x7) + (i & 0x0)];
do_mulf(VR[vd], VR[vs], SV);
return;
}
static void VMULF3w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0xB & 07) + (i & 0x0)] + 0x8000;
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[MD];
for (i = 0; i < N; i++)
VR[vd][i] -= !!(VR[vd][i] & 0x8000);
SV[i] = VR[vt][(0xB & 0x7) + (i & 0x0)];
do_mulf(VR[vd], VR[vs], SV);
return;
}
static void VMULF4w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0xC & 07) + (i & 0x0)] + 0x8000;
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[MD];
for (i = 0; i < N; i++)
VR[vd][i] -= !!(VR[vd][i] & 0x8000);
SV[i] = VR[vt][(0xC & 0x7) + (i & 0x0)];
do_mulf(VR[vd], VR[vs], SV);
return;
}
static void VMULF5w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0xD & 07) + (i & 0x0)] + 0x8000;
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[MD];
for (i = 0; i < N; i++)
VR[vd][i] -= !!(VR[vd][i] & 0x8000);
SV[i] = VR[vt][(0xD & 0x7) + (i & 0x0)];
do_mulf(VR[vd], VR[vs], SV);
return;
}
static void VMULF6w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0xE & 07) + (i & 0x0)] + 0x8000;
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[MD];
for (i = 0; i < N; i++)
VR[vd][i] -= !!(VR[vd][i] & 0x8000);
SV[i] = VR[vt][(0xE & 0x7) + (i & 0x0)];
do_mulf(VR[vd], VR[vs], SV);
return;
}
static void VMULF7w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0xF & 07) + (i & 0x0)] + 0x8000;
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[MD];
for (i = 0; i < N; i++)
VR[vd][i] -= !!(VR[vd][i] & 0x8000);
SV[i] = VR[vt][(0xF & 0x7) + (i & 0x0)];
do_mulf(VR[vd], VR[vs], SV);
return;
}

View File

@ -1,272 +1,217 @@
#include "vu.h"
static void VMULU(int vd, int vs, int vt, int e)
INLINE void do_mulu(short* VD, short* VS, short* VT)
{
long acc[N];
register int i;
for (i = 0; i < N; i++)
VACC[i].DW = (VR[vs][i]*VR_T(i) << 1) + 0x8000;
for (i = 0; i < N; i++) /* Zero-clamp bits 31..16 of ACC to dest. VR. */
{
VR_D(i) = VACC[i].s[MD]; /* VD = ACC[31..16] */
VR_D(i) |= VR_D(i) >> 15; /* VD |= -(result == 0x80008000) */
VR_D(i) &= ~VACC[i].HW[03]; /* VD = (ACC < 0) ? 0 : ACC[31..16]; */
}
acc[i] = (VS[i]*VT[i]) << 1;
for (i = 0; i < N; i++)
acc[i] = acc[i] + 0x8000;
for (i = 0; i < N; i++)
ACC_H(i) = (VS[i] ^ VT[i]) >> 15;
for (i = 0; i < N; i++)
ACC_M(i) = (short)(acc[i] >> 16);
for (i = 0; i < N; i++)
ACC_L(i) = (short)(acc[i] >> 0);
for (i = 0; i < N; i++)
VD[i] = ACC_M(i);
for (i = 0; i < N; i++)
VD[i] |= ACC_M(i) >> 15; /* VD |= -(result == 0x000080008000) */
for (i = 0; i < N; i++)
VD[i] &= ~ACC_H(i); /* VD &= -(result >= 0x000000000000) */
return;
}
static void VMULU_v(void)
{
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = 2*VR[vs][i]*VR[vt][i] + 0x8000;
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[MD];
for (i = 0; i < N; i++)
VR[vd][i] |= VACC[i].s[MD] >> 15;
for (i = 0; i < N; i++)
VR[vd][i] &= VACC[i].s[HI] >> 15;
do_mulu(VR[vd], VR[vs], VR[vt]);
return;
}
static void VMULU0q(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0x2 & 01) + (i & 0xE)] + 0x8000;
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[MD];
for (i = 0; i < N; i++)
VR[vd][i] |= VACC[i].s[MD] >> 15;
for (i = 0; i < N; i++)
VR[vd][i] &= VACC[i].s[HI] >> 15;
SV[i] = VR[vt][(0x2 & 0x1) + (i & 0xE)];
do_mulu(VR[vd], VR[vs], SV);
return;
}
static void VMULU1q(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0x3 & 01) + (i & 0xE)] + 0x8000;
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[MD];
for (i = 0; i < N; i++)
VR[vd][i] |= VACC[i].s[MD] >> 15;
for (i = 0; i < N; i++)
VR[vd][i] &= VACC[i].s[HI] >> 15;
SV[i] = VR[vt][(0x3 & 0x1) + (i & 0xE)];
do_mulu(VR[vd], VR[vs], SV);
return;
}
static void VMULU0h(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0x4 & 03) + (i & 0xC)] + 0x8000;
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[MD];
for (i = 0; i < N; i++)
VR[vd][i] |= VACC[i].s[MD] >> 15;
for (i = 0; i < N; i++)
VR[vd][i] &= VACC[i].s[HI] >> 15;
SV[i] = VR[vt][(0x4 & 0x3) + (i & 0xC)];
do_mulu(VR[vd], VR[vs], SV);
return;
}
static void VMULU1h(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0x5 & 03) + (i & 0xC)] + 0x8000;
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[MD];
for (i = 0; i < N; i++)
VR[vd][i] |= VACC[i].s[MD] >> 15;
for (i = 0; i < N; i++)
VR[vd][i] &= VACC[i].s[HI] >> 15;
SV[i] = VR[vt][(0x5 & 0x3) + (i & 0xC)];
do_mulu(VR[vd], VR[vs], SV);
return;
}
static void VMULU2h(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0x6 & 03) + (i & 0xC)] + 0x8000;
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[MD];
for (i = 0; i < N; i++)
VR[vd][i] |= VACC[i].s[MD] >> 15;
for (i = 0; i < N; i++)
VR[vd][i] &= VACC[i].s[HI] >> 15;
SV[i] = VR[vt][(0x6 & 0x3) + (i & 0xC)];
do_mulu(VR[vd], VR[vs], SV);
return;
}
static void VMULU3h(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0x7 & 03) + (i & 0xC)] + 0x8000;
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[MD];
for (i = 0; i < N; i++)
VR[vd][i] |= VACC[i].s[MD] >> 15;
for (i = 0; i < N; i++)
VR[vd][i] &= VACC[i].s[HI] >> 15;
SV[i] = VR[vt][(0x7 & 0x3) + (i & 0xC)];
do_mulu(VR[vd], VR[vs], SV);
return;
}
static void VMULU0w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0x8 & 07) + (i & 0x0)] + 0x8000;
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[MD];
for (i = 0; i < N; i++)
VR[vd][i] |= VACC[i].s[MD] >> 15;
for (i = 0; i < N; i++)
VR[vd][i] &= VACC[i].s[HI] >> 15;
SV[i] = VR[vt][(0x8 & 0x7) + (i & 0x0)];
do_mulu(VR[vd], VR[vs], SV);
return;
}
static void VMULU1w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0x9 & 07) + (i & 0x0)] + 0x8000;
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[MD];
for (i = 0; i < N; i++)
VR[vd][i] |= VACC[i].s[MD] >> 15;
for (i = 0; i < N; i++)
VR[vd][i] &= VACC[i].s[HI] >> 15;
SV[i] = VR[vt][(0x9 & 0x7) + (i & 0x0)];
do_mulu(VR[vd], VR[vs], SV);
return;
}
static void VMULU2w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0xA & 07) + (i & 0x0)] + 0x8000;
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[MD];
for (i = 0; i < N; i++)
VR[vd][i] |= VACC[i].s[MD] >> 15;
for (i = 0; i < N; i++)
VR[vd][i] &= VACC[i].s[HI] >> 15;
SV[i] = VR[vt][(0xA & 0x7) + (i & 0x0)];
do_mulu(VR[vd], VR[vs], SV);
return;
}
static void VMULU3w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0xB & 07) + (i & 0x0)] + 0x8000;
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[MD];
for (i = 0; i < N; i++)
VR[vd][i] |= VACC[i].s[MD] >> 15;
for (i = 0; i < N; i++)
VR[vd][i] &= VACC[i].s[HI] >> 15;
SV[i] = VR[vt][(0xB & 0x7) + (i & 0x0)];
do_mulu(VR[vd], VR[vs], SV);
return;
}
static void VMULU4w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0xC & 07) + (i & 0x0)] + 0x8000;
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[MD];
for (i = 0; i < N; i++)
VR[vd][i] |= VACC[i].s[MD] >> 15;
for (i = 0; i < N; i++)
VR[vd][i] &= VACC[i].s[HI] >> 15;
SV[i] = VR[vt][(0xC & 0x7) + (i & 0x0)];
do_mulu(VR[vd], VR[vs], SV);
return;
}
static void VMULU5w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0xD & 07) + (i & 0x0)] + 0x8000;
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[MD];
for (i = 0; i < N; i++)
VR[vd][i] |= VACC[i].s[MD] >> 15;
for (i = 0; i < N; i++)
VR[vd][i] &= VACC[i].s[HI] >> 15;
SV[i] = VR[vt][(0xD & 0x7) + (i & 0x0)];
do_mulu(VR[vd], VR[vs], SV);
return;
}
static void VMULU6w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0xE & 07) + (i & 0x0)] + 0x8000;
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[MD];
for (i = 0; i < N; i++)
VR[vd][i] |= VACC[i].s[MD] >> 15;
for (i = 0; i < N; i++)
VR[vd][i] &= VACC[i].s[HI] >> 15;
SV[i] = VR[vt][(0xE & 0x7) + (i & 0x0)];
do_mulu(VR[vd], VR[vs], SV);
return;
}
static void VMULU7w(void)
{
short SV[N];
register int i;
const int vd = inst.R.sa;
const int vs = inst.R.rd;
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].DW = 2*VR[vs][i]*VR[vt][(0xF & 07) + (i & 0x0)] + 0x8000;
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[MD];
for (i = 0; i < N; i++)
VR[vd][i] |= VACC[i].s[MD] >> 15;
for (i = 0; i < N; i++)
VR[vd][i] &= VACC[i].s[HI] >> 15;
SV[i] = VR[vt][(0xF & 0x7) + (i & 0x0)];
do_mulu(VR[vd], VR[vs], SV);
return;
}

View File

@ -1,16 +1,5 @@
#include "vu.h"
static void VNAND(int vd, int vs, int vt, int e)
{
register int i;
for (i = 0; i < N; i++)
ACC_R(i) = ~(VR[vs][i] & VR_T(i));
for (i = 0; i < N; i++)
ACC_W(i) = ACC_R(i);
return;
}
static void VNAND_v(void)
{
register int i;
@ -19,9 +8,9 @@ static void VNAND_v(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = ~(VR[vs][i] & VR[vt][i]);
ACC_L(i) = ~(VR[vs][i] & VR[vt][i]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNAND0q(void)
@ -32,9 +21,9 @@ static void VNAND0q(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = ~(VR[vs][i] & VR[vt][(0x2 & 01) + (i & 0xE)]);
ACC_L(i) = ~(VR[vs][i] & VR[vt][(0x2 & 01) + (i & 0xE)]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNAND1q(void)
@ -45,9 +34,9 @@ static void VNAND1q(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = ~(VR[vs][i] & VR[vt][(0x3 & 01) + (i & 0xE)]);
ACC_L(i) = ~(VR[vs][i] & VR[vt][(0x3 & 01) + (i & 0xE)]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNAND0h(void)
@ -58,9 +47,9 @@ static void VNAND0h(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = ~(VR[vs][i] & VR[vt][(0x4 & 03) + (i & 0xC)]);
ACC_L(i) = ~(VR[vs][i] & VR[vt][(0x4 & 03) + (i & 0xC)]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNAND1h(void)
@ -71,9 +60,9 @@ static void VNAND1h(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = ~(VR[vs][i] & VR[vt][(0x5 & 03) + (i & 0xC)]);
ACC_L(i) = ~(VR[vs][i] & VR[vt][(0x5 & 03) + (i & 0xC)]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNAND2h(void)
@ -84,9 +73,9 @@ static void VNAND2h(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = ~(VR[vs][i] & VR[vt][(0x6 & 03) + (i & 0xC)]);
ACC_L(i) = ~(VR[vs][i] & VR[vt][(0x6 & 03) + (i & 0xC)]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNAND3h(void)
@ -97,9 +86,9 @@ static void VNAND3h(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = ~(VR[vs][i] & VR[vt][(0x7 & 03) + (i & 0xC)]);
ACC_L(i) = ~(VR[vs][i] & VR[vt][(0x7 & 03) + (i & 0xC)]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNAND0w(void)
@ -110,9 +99,9 @@ static void VNAND0w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = ~(VR[vs][i] & VR[vt][(0x8 & 07) + (i & 0x0)]);
ACC_L(i) = ~(VR[vs][i] & VR[vt][(0x8 & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNAND1w(void)
@ -123,9 +112,9 @@ static void VNAND1w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = ~(VR[vs][i] & VR[vt][(0x9 & 07) + (i & 0x0)]);
ACC_L(i) = ~(VR[vs][i] & VR[vt][(0x9 & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNAND2w(void)
@ -136,9 +125,9 @@ static void VNAND2w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = ~(VR[vs][i] & VR[vt][(0xA & 07) + (i & 0x0)]);
ACC_L(i) = ~(VR[vs][i] & VR[vt][(0xA & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNAND3w(void)
@ -149,9 +138,9 @@ static void VNAND3w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = ~(VR[vs][i] & VR[vt][(0xB & 07) + (i & 0x0)]);
ACC_L(i) = ~(VR[vs][i] & VR[vt][(0xB & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNAND4w(void)
@ -162,9 +151,9 @@ static void VNAND4w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = ~(VR[vs][i] & VR[vt][(0xC & 07) + (i & 0x0)]);
ACC_L(i) = ~(VR[vs][i] & VR[vt][(0xC & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNAND5w(void)
@ -175,9 +164,9 @@ static void VNAND5w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = ~(VR[vs][i] & VR[vt][(0xD & 07) + (i & 0x0)]);
ACC_L(i) = ~(VR[vs][i] & VR[vt][(0xD & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNAND6w(void)
@ -188,9 +177,9 @@ static void VNAND6w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = ~(VR[vs][i] & VR[vt][(0xE & 07) + (i & 0x0)]);
ACC_L(i) = ~(VR[vs][i] & VR[vt][(0xE & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNAND7w(void)
@ -201,8 +190,8 @@ static void VNAND7w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = ~(VR[vs][i] & VR[vt][(0xF & 07) + (i & 0x0)]);
ACC_L(i) = ~(VR[vs][i] & VR[vt][(0xF & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}

View File

@ -1,29 +1,5 @@
#include "vu.h"
static void VNE(int vd, int vs, int vt, int e)
{
int ne; /* not equal or, unless !(NOTEQUAL), equal */
register unsigned char VCO_VCE;
register int i;
VCC = 0x0000;
VCO_VCE = ~(unsigned char)(VCO >> 8);
for (i = 0; i < N; i++)
{
const signed short VS = VR[vs][i];
const signed short VT = VR_T(i);
ne = (~VCO_VCE >> i) & 0x01;
ne |= (VS != VT);
VCC |= ne <<= i;
ACC_R(i) = VS; /* More accurately, `ACC_R(i) = ne ? VS : VT`. */
}
for (i = 0; i < N; i++)
ACC_W(i) = ACC_R(i);
VCO = 0x0000;
return;
}
void do_ne(int vs)
{
int ne[8];
@ -41,10 +17,10 @@ void do_ne(int vs)
VCC |= 0 << (i + 0x8);
#if (0)
for (i = 0; i < N; i++)
VACC[i].s[LO] = ne[i] ? VR[vs][i] : VC[i]; /* correct but redundant */
ACC_L(i) = ne[i] ? VR[vs][i] : VC[i]; /* correct but redundant */
#else
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vs][i];
ACC_L(i) = VR[vs][i];
#endif
return;
}
@ -60,7 +36,7 @@ static void VNE_v(void)
VC[i] = VR[vt][i];
do_ne(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNE0q(void)
@ -74,7 +50,7 @@ static void VNE0q(void)
VC[i] = VR[vt][(0x2 & 01) + (i & 0xE)];
do_ne(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNE1q(void)
@ -88,7 +64,7 @@ static void VNE1q(void)
VC[i] = VR[vt][(0x3 & 01) + (i & 0xE)];
do_ne(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNE0h(void)
@ -102,7 +78,7 @@ static void VNE0h(void)
VC[i] = VR[vt][(0x4 & 03) + (i & 0xC)];
do_ne(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNE1h(void)
@ -116,7 +92,7 @@ static void VNE1h(void)
VC[i] = VR[vt][(0x5 & 03) + (i & 0xC)];
do_ne(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNE2h(void)
@ -130,7 +106,7 @@ static void VNE2h(void)
VC[i] = VR[vt][(0x6 & 03) + (i & 0xC)];
do_ne(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNE3h(void)
@ -144,7 +120,7 @@ static void VNE3h(void)
VC[i] = VR[vt][(0x7 & 03) + (i & 0xC)];
do_ne(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNE0w(void)
@ -158,7 +134,7 @@ static void VNE0w(void)
VC[i] = VR[vt][(0x8 & 07) + (i & 0x0)];
do_ne(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNE1w(void)
@ -172,7 +148,7 @@ static void VNE1w(void)
VC[i] = VR[vt][(0x9 & 07) + (i & 0x0)];
do_ne(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNE2w(void)
@ -186,7 +162,7 @@ static void VNE2w(void)
VC[i] = VR[vt][(0xA & 07) + (i & 0x0)];
do_ne(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNE3w(void)
@ -200,7 +176,7 @@ static void VNE3w(void)
VC[i] = VR[vt][(0xB & 07) + (i & 0x0)];
do_ne(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNE4w(void)
@ -214,7 +190,7 @@ static void VNE4w(void)
VC[i] = VR[vt][(0xC & 07) + (i & 0x0)];
do_ne(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNE5w(void)
@ -228,7 +204,7 @@ static void VNE5w(void)
VC[i] = VR[vt][(0xD & 07) + (i & 0x0)];
do_ne(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNE6w(void)
@ -242,7 +218,7 @@ static void VNE6w(void)
VC[i] = VR[vt][(0xE & 07) + (i & 0x0)];
do_ne(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNE7w(void)
@ -256,6 +232,6 @@ static void VNE7w(void)
VC[i] = VR[vt][(0xF & 07) + (i & 0x0)];
do_ne(vs);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}

View File

@ -1,16 +1,5 @@
#include "vu.h"
static void VNOR(int vd, int vs, int vt, int e)
{
register int i;
for (i = 0; i < N; i++)
ACC_R(i) = ~(VR[vs][i] | VR_T(i));
for (i = 0; i < N; i++)
ACC_W(i) = ACC_R(i);
return;
}
static void VNOR_v(void)
{
register int i;
@ -19,9 +8,9 @@ static void VNOR_v(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = ~(VR[vs][i] | VR[vt][i]);
ACC_L(i) = ~(VR[vs][i] | VR[vt][i]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNOR0q(void)
@ -32,9 +21,9 @@ static void VNOR0q(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = ~(VR[vs][i] | VR[vt][(0x2 & 01) + (i & 0xE)]);
ACC_L(i) = ~(VR[vs][i] | VR[vt][(0x2 & 01) + (i & 0xE)]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNOR1q(void)
@ -45,9 +34,9 @@ static void VNOR1q(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = ~(VR[vs][i] | VR[vt][(0x3 & 01) + (i & 0xE)]);
ACC_L(i) = ~(VR[vs][i] | VR[vt][(0x3 & 01) + (i & 0xE)]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNOR0h(void)
@ -58,9 +47,9 @@ static void VNOR0h(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = ~(VR[vs][i] | VR[vt][(0x4 & 03) + (i & 0xC)]);
ACC_L(i) = ~(VR[vs][i] | VR[vt][(0x4 & 03) + (i & 0xC)]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNOR1h(void)
@ -71,9 +60,9 @@ static void VNOR1h(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = ~(VR[vs][i] | VR[vt][(0x5 & 03) + (i & 0xC)]);
ACC_L(i) = ~(VR[vs][i] | VR[vt][(0x5 & 03) + (i & 0xC)]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNOR2h(void)
@ -84,9 +73,9 @@ static void VNOR2h(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = ~(VR[vs][i] | VR[vt][(0x6 & 03) + (i & 0xC)]);
ACC_L(i) = ~(VR[vs][i] | VR[vt][(0x6 & 03) + (i & 0xC)]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNOR3h(void)
@ -97,9 +86,9 @@ static void VNOR3h(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = ~(VR[vs][i] | VR[vt][(0x7 & 03) + (i & 0xC)]);
ACC_L(i) = ~(VR[vs][i] | VR[vt][(0x7 & 03) + (i & 0xC)]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNOR0w(void)
@ -110,9 +99,9 @@ static void VNOR0w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = ~(VR[vs][i] | VR[vt][(0x8 & 07) + (i & 0x0)]);
ACC_L(i) = ~(VR[vs][i] | VR[vt][(0x8 & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNOR1w(void)
@ -123,9 +112,9 @@ static void VNOR1w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = ~(VR[vs][i] | VR[vt][(0x9 & 07) + (i & 0x0)]);
ACC_L(i) = ~(VR[vs][i] | VR[vt][(0x9 & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNOR2w(void)
@ -136,9 +125,9 @@ static void VNOR2w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = ~(VR[vs][i] | VR[vt][(0xA & 07) + (i & 0x0)]);
ACC_L(i) = ~(VR[vs][i] | VR[vt][(0xA & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNOR3w(void)
@ -149,9 +138,9 @@ static void VNOR3w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = ~(VR[vs][i] | VR[vt][(0xB & 07) + (i & 0x0)]);
ACC_L(i) = ~(VR[vs][i] | VR[vt][(0xB & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNOR4w(void)
@ -162,9 +151,9 @@ static void VNOR4w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = ~(VR[vs][i] | VR[vt][(0xC & 07) + (i & 0x0)]);
ACC_L(i) = ~(VR[vs][i] | VR[vt][(0xC & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNOR5w(void)
@ -175,9 +164,9 @@ static void VNOR5w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = ~(VR[vs][i] | VR[vt][(0xD & 07) + (i & 0x0)]);
ACC_L(i) = ~(VR[vs][i] | VR[vt][(0xD & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNOR6w(void)
@ -188,9 +177,9 @@ static void VNOR6w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = ~(VR[vs][i] | VR[vt][(0xE & 07) + (i & 0x0)]);
ACC_L(i) = ~(VR[vs][i] | VR[vt][(0xE & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNOR7w(void)
@ -201,8 +190,8 @@ static void VNOR7w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = ~(VR[vs][i] | VR[vt][(0xF & 07) + (i & 0x0)]);
ACC_L(i) = ~(VR[vs][i] | VR[vt][(0xF & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}

View File

@ -1,16 +1,5 @@
#include "vu.h"
static void VNXOR(int vd, int vs, int vt, int e)
{
register int i;
for (i = 0; i < N; i++)
ACC_R(i) = ~(VR[vs][i] ^ VR_T(i));
for (i = 0; i < N; i++)
ACC_W(i) = ACC_R(i);
return;
}
static void VNXOR_v(void)
{
register int i;
@ -19,9 +8,9 @@ static void VNXOR_v(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = ~(VR[vs][i] ^ VR[vt][i]);
ACC_L(i) = ~(VR[vs][i] ^ VR[vt][i]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNXOR0q(void)
@ -32,9 +21,9 @@ static void VNXOR0q(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = ~(VR[vs][i] ^ VR[vt][(0x2 & 01) + (i & 0xE)]);
ACC_L(i) = ~(VR[vs][i] ^ VR[vt][(0x2 & 01) + (i & 0xE)]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNXOR1q(void)
@ -45,9 +34,9 @@ static void VNXOR1q(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = ~(VR[vs][i] ^ VR[vt][(0x3 & 01) + (i & 0xE)]);
ACC_L(i) = ~(VR[vs][i] ^ VR[vt][(0x3 & 01) + (i & 0xE)]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNXOR0h(void)
@ -58,9 +47,9 @@ static void VNXOR0h(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = ~(VR[vs][i] ^ VR[vt][(0x4 & 03) + (i & 0xC)]);
ACC_L(i) = ~(VR[vs][i] ^ VR[vt][(0x4 & 03) + (i & 0xC)]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNXOR1h(void)
@ -71,9 +60,9 @@ static void VNXOR1h(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = ~(VR[vs][i] ^ VR[vt][(0x5 & 03) + (i & 0xC)]);
ACC_L(i) = ~(VR[vs][i] ^ VR[vt][(0x5 & 03) + (i & 0xC)]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNXOR2h(void)
@ -84,9 +73,9 @@ static void VNXOR2h(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = ~(VR[vs][i] ^ VR[vt][(0x6 & 03) + (i & 0xC)]);
ACC_L(i) = ~(VR[vs][i] ^ VR[vt][(0x6 & 03) + (i & 0xC)]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNXOR3h(void)
@ -97,9 +86,9 @@ static void VNXOR3h(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = ~(VR[vs][i] ^ VR[vt][(0x7 & 03) + (i & 0xC)]);
ACC_L(i) = ~(VR[vs][i] ^ VR[vt][(0x7 & 03) + (i & 0xC)]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNXOR0w(void)
@ -110,9 +99,9 @@ static void VNXOR0w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = ~(VR[vs][i] ^ VR[vt][(0x8 & 07) + (i & 0x0)]);
ACC_L(i) = ~(VR[vs][i] ^ VR[vt][(0x8 & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNXOR1w(void)
@ -123,9 +112,9 @@ static void VNXOR1w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = ~(VR[vs][i] ^ VR[vt][(0x9 & 07) + (i & 0x0)]);
ACC_L(i) = ~(VR[vs][i] ^ VR[vt][(0x9 & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNXOR2w(void)
@ -136,9 +125,9 @@ static void VNXOR2w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = ~(VR[vs][i] ^ VR[vt][(0xA & 07) + (i & 0x0)]);
ACC_L(i) = ~(VR[vs][i] ^ VR[vt][(0xA & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNXOR3w(void)
@ -149,9 +138,9 @@ static void VNXOR3w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = ~(VR[vs][i] ^ VR[vt][(0xB & 07) + (i & 0x0)]);
ACC_L(i) = ~(VR[vs][i] ^ VR[vt][(0xB & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNXOR4w(void)
@ -162,9 +151,9 @@ static void VNXOR4w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = ~(VR[vs][i] ^ VR[vt][(0xC & 07) + (i & 0x0)]);
ACC_L(i) = ~(VR[vs][i] ^ VR[vt][(0xC & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNXOR5w(void)
@ -175,9 +164,9 @@ static void VNXOR5w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = ~(VR[vs][i] ^ VR[vt][(0xD & 07) + (i & 0x0)]);
ACC_L(i) = ~(VR[vs][i] ^ VR[vt][(0xD & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNXOR6w(void)
@ -188,9 +177,9 @@ static void VNXOR6w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = ~(VR[vs][i] ^ VR[vt][(0xE & 07) + (i & 0x0)]);
ACC_L(i) = ~(VR[vs][i] ^ VR[vt][(0xE & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VNXOR7w(void)
@ -201,8 +190,8 @@ static void VNXOR7w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = ~(VR[vs][i] ^ VR[vt][(0xF & 07) + (i & 0x0)]);
ACC_L(i) = ~(VR[vs][i] ^ VR[vt][(0xF & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}

View File

@ -1,16 +1,5 @@
#include "vu.h"
static void VOR(int vd, int vs, int vt, int e)
{
register int i;
for (i = 0; i < N; i++)
ACC_R(i) = VR[vs][i] | VR_T(i);
for (i = 0; i < N; i++)
ACC_W(i) = ACC_R(i);
return;
}
static void VOR_v(void)
{
register int i;
@ -19,9 +8,9 @@ static void VOR_v(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vs][i] | VR[vt][i];
ACC_L(i) = VR[vs][i] | VR[vt][i];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VOR0q(void)
@ -32,9 +21,9 @@ static void VOR0q(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vs][i] | VR[vt][(0x2 & 01) + (i & 0xE)];
ACC_L(i) = VR[vs][i] | VR[vt][(0x2 & 01) + (i & 0xE)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VOR1q(void)
@ -45,9 +34,9 @@ static void VOR1q(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vs][i] | VR[vt][(0x3 & 01) + (i & 0xE)];
ACC_L(i) = VR[vs][i] | VR[vt][(0x3 & 01) + (i & 0xE)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VOR0h(void)
@ -58,9 +47,9 @@ static void VOR0h(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vs][i] | VR[vt][(0x4 & 03) + (i & 0xC)];
ACC_L(i) = VR[vs][i] | VR[vt][(0x4 & 03) + (i & 0xC)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VOR1h(void)
@ -71,9 +60,9 @@ static void VOR1h(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vs][i] | VR[vt][(0x5 & 03) + (i & 0xC)];
ACC_L(i) = VR[vs][i] | VR[vt][(0x5 & 03) + (i & 0xC)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VOR2h(void)
@ -84,9 +73,9 @@ static void VOR2h(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vs][i] | VR[vt][(0x6 & 03) + (i & 0xC)];
ACC_L(i) = VR[vs][i] | VR[vt][(0x6 & 03) + (i & 0xC)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VOR3h(void)
@ -97,9 +86,9 @@ static void VOR3h(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vs][i] | VR[vt][(0x7 & 03) + (i & 0xC)];
ACC_L(i) = VR[vs][i] | VR[vt][(0x7 & 03) + (i & 0xC)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VOR0w(void)
@ -110,9 +99,9 @@ static void VOR0w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vs][i] | VR[vt][(0x8 & 07) + (i & 0x0)];
ACC_L(i) = VR[vs][i] | VR[vt][(0x8 & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VOR1w(void)
@ -123,9 +112,9 @@ static void VOR1w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vs][i] | VR[vt][(0x9 & 07) + (i & 0x0)];
ACC_L(i) = VR[vs][i] | VR[vt][(0x9 & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VOR2w(void)
@ -136,9 +125,9 @@ static void VOR2w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vs][i] | VR[vt][(0xA & 07) + (i & 0x0)];
ACC_L(i) = VR[vs][i] | VR[vt][(0xA & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VOR3w(void)
@ -149,9 +138,9 @@ static void VOR3w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vs][i] | VR[vt][(0xB & 07) + (i & 0x0)];
ACC_L(i) = VR[vs][i] | VR[vt][(0xB & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VOR4w(void)
@ -162,9 +151,9 @@ static void VOR4w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vs][i] | VR[vt][(0xC & 07) + (i & 0x0)];
ACC_L(i) = VR[vs][i] | VR[vt][(0xC & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VOR5w(void)
@ -175,9 +164,9 @@ static void VOR5w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vs][i] | VR[vt][(0xD & 07) + (i & 0x0)];
ACC_L(i) = VR[vs][i] | VR[vt][(0xD & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VOR6w(void)
@ -188,9 +177,9 @@ static void VOR6w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vs][i] | VR[vt][(0xE & 07) + (i & 0x0)];
ACC_L(i) = VR[vs][i] | VR[vt][(0xE & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VOR7w(void)
@ -201,8 +190,8 @@ static void VOR7w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vs][i] | VR[vt][(0xF & 07) + (i & 0x0)];
ACC_L(i) = VR[vs][i] | VR[vt][(0xF & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}

View File

@ -1,43 +1,6 @@
#include "vu.h"
#include "divrom.h"
static void VRCP(int vd, int de, int vt, int e)
{
unsigned int addr;
int data;
int fetch;
int shift = 32;
DivIn = (int)VR[vt][e & 07];
data = DivIn;
if (data < 0)
data = -data;
do
{
--shift;
if (data & (1 << shift))
goto FOUND_MSB;
} while (shift); /* while (shift > 0) or ((shift ^ 31) < 32) */
shift = 16 ^ 31; /* No bits found in (data == 0x00000000), so shift = 16. */
FOUND_MSB:
shift ^= 31; /* Right-to-left shift direction conversion. */
addr = (data << shift) >> 22;
fetch = div_ROM[addr &= 0x000001FF];
shift ^= 31; /* Flipped shift direction back to right-. */
DivOut = (0x40000000 | (fetch << 14)) >> shift;
if (DivIn < 0)
DivOut = ~DivOut;
else if (DivIn == 0) /* corner case: overflow via division by zero */
DivOut = 0x7FFFFFFF;
else if (DivIn == -32768) /* corner case: signed underflow barrier */
DivOut = 0xFFFF0000;
for (addr = 0; addr < N; addr++)
VACC[addr].s[LO] = VR_T(addr);
VR_D(de &= 07) = (short)DivOut;
DPH = 0;
return;
}
void do_rcp(int data)
{
unsigned int addr;
@ -78,7 +41,7 @@ static void VRCPv0(void)
DivIn = (int)VR[vt][00];
do_rcp(DivIn);
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0x0 & 0x0) + (i & 0x7)];
ACC_L(i) = VR[vt][(0x0 & 0x0) + (i & 0x7)];
VR[vd][de] = (short)DivOut;
DPH = 0;
return;
@ -93,7 +56,7 @@ static void VRCPv1(void)
DivIn = (int)VR[vt][01];
do_rcp(DivIn);
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0x1 & 0x0) + (i & 0x7)];
ACC_L(i) = VR[vt][(0x1 & 0x0) + (i & 0x7)];
VR[vd][de] = (short)DivOut;
DPH = 0;
return;
@ -108,7 +71,7 @@ static void VRCP0q(void)
DivIn = (int)VR[vt][02];
do_rcp(DivIn);
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0x2 & 0x1) + (i & 0xE)];
ACC_L(i) = VR[vt][(0x2 & 0x1) + (i & 0xE)];
VR[vd][de] = (short)DivOut;
DPH = 0;
return;
@ -123,7 +86,7 @@ static void VRCP1q(void)
DivIn = (int)VR[vt][03];
do_rcp(DivIn);
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0x3 & 0x1) + (i & 0xE)];
ACC_L(i) = VR[vt][(0x3 & 0x1) + (i & 0xE)];
VR[vd][de] = (short)DivOut;
DPH = 0;
return;
@ -138,7 +101,7 @@ static void VRCP0h(void)
DivIn = (int)VR[vt][04];
do_rcp(DivIn);
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0x4 & 0x3) + (i & 0xC)];
ACC_L(i) = VR[vt][(0x4 & 0x3) + (i & 0xC)];
VR[vd][de] = (short)DivOut;
DPH = 0;
return;
@ -153,7 +116,7 @@ static void VRCP1h(void)
DivIn = (int)VR[vt][05];
do_rcp(DivIn);
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0x5 & 0x3) + (i & 0xC)];
ACC_L(i) = VR[vt][(0x5 & 0x3) + (i & 0xC)];
VR[vd][de] = (short)DivOut;
DPH = 0;
return;
@ -168,7 +131,7 @@ static void VRCP2h(void)
DivIn = (int)VR[vt][06];
do_rcp(DivIn);
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0x6 & 0x3) + (i & 0xC)];
ACC_L(i) = VR[vt][(0x6 & 0x3) + (i & 0xC)];
VR[vd][de] = (short)DivOut;
DPH = 0;
return;
@ -183,7 +146,7 @@ static void VRCP3h(void)
DivIn = (int)VR[vt][07];
do_rcp(DivIn);
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0x7 & 0x3) + (i & 0xC)];
ACC_L(i) = VR[vt][(0x7 & 0x3) + (i & 0xC)];
VR[vd][de] = (short)DivOut;
DPH = 0;
return;
@ -198,7 +161,7 @@ static void VRCP0w(void)
DivIn = (int)VR[vt][00];
do_rcp(DivIn);
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0x8 & 0x7) + (i & 0x0)];
ACC_L(i) = VR[vt][(0x8 & 0x7) + (i & 0x0)];
VR[vd][de] = (short)DivOut;
DPH = 0;
return;
@ -213,7 +176,7 @@ static void VRCP1w(void)
DivIn = (int)VR[vt][01];
do_rcp(DivIn);
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0x9 & 0x7) + (i & 0x0)];
ACC_L(i) = VR[vt][(0x9 & 0x7) + (i & 0x0)];
VR[vd][de] = (short)DivOut;
DPH = 0;
return;
@ -228,7 +191,7 @@ static void VRCP2w(void)
DivIn = (int)VR[vt][02];
do_rcp(DivIn);
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0xA & 0x7) + (i & 0x0)];
ACC_L(i) = VR[vt][(0xA & 0x7) + (i & 0x0)];
VR[vd][de] = (short)DivOut;
DPH = 0;
return;
@ -243,7 +206,7 @@ static void VRCP3w(void)
DivIn = (int)VR[vt][03];
do_rcp(DivIn);
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0xB & 0x7) + (i & 0x0)];
ACC_L(i) = VR[vt][(0xB & 0x7) + (i & 0x0)];
VR[vd][de] = (short)DivOut;
DPH = 0;
return;
@ -258,7 +221,7 @@ static void VRCP4w(void)
DivIn = (int)VR[vt][04];
do_rcp(DivIn);
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0xC & 0x7) + (i & 0x0)];
ACC_L(i) = VR[vt][(0xC & 0x7) + (i & 0x0)];
VR[vd][de] = (short)DivOut;
DPH = 0;
return;
@ -273,7 +236,7 @@ static void VRCP5w(void)
DivIn = (int)VR[vt][05];
do_rcp(DivIn);
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0xD & 0x7) + (i & 0x0)];
ACC_L(i) = VR[vt][(0xD & 0x7) + (i & 0x0)];
VR[vd][de] = (short)DivOut;
DPH = 0;
return;
@ -288,7 +251,7 @@ static void VRCP6w(void)
DivIn = (int)VR[vt][06];
do_rcp(DivIn);
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0xE & 0x7) + (i & 0x0)];
ACC_L(i) = VR[vt][(0xE & 0x7) + (i & 0x0)];
VR[vd][de] = (short)DivOut;
DPH = 0;
return;
@ -303,7 +266,7 @@ static void VRCP7w(void)
DivIn = (int)VR[vt][07];
do_rcp(DivIn);
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0xF & 0x7) + (i & 0x0)];
ACC_L(i) = VR[vt][(0xF & 0x7) + (i & 0x0)];
VR[vd][de] = (short)DivOut;
DPH = 0;
return;

View File

@ -1,18 +1,6 @@
#include "vu.h"
#include "divrom.h"
static void VRCPH(int vd, int de, int vt, int e)
{
register int i;
DivIn = VR[vt][e & 07] << 16;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR_T(i);
VR_D(de &= 07) = DivOut >> 16; /* store high part */
DPH = 1;
return;
}
static void VRCPHv0(void)
{
register int i;
@ -22,7 +10,7 @@ static void VRCPHv0(void)
DivIn = VR[vt][00] << 16;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0x0 & 0x0) + (i & 0x7)];
ACC_L(i) = VR[vt][(0x0 & 0x0) + (i & 0x7)];
VR[vd][de] = DivOut >> 16;
DPH = 1;
return;
@ -36,7 +24,7 @@ static void VRCPHv1(void)
DivIn = VR[vt][01] << 16;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0x1 & 0x0) + (i & 0x7)];
ACC_L(i) = VR[vt][(0x1 & 0x0) + (i & 0x7)];
VR[vd][de] = DivOut >> 16;
DPH = 1;
return;
@ -50,7 +38,7 @@ static void VRCPH0q(void)
DivIn = VR[vt][02] << 16;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0x2 & 0x1) + (i & 0xE)];
ACC_L(i) = VR[vt][(0x2 & 0x1) + (i & 0xE)];
VR[vd][de] = DivOut >> 16;
DPH = 1;
return;
@ -64,7 +52,7 @@ static void VRCPH1q(void)
DivIn = VR[vt][03] << 16;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0x3 & 0x1) + (i & 0xE)];
ACC_L(i) = VR[vt][(0x3 & 0x1) + (i & 0xE)];
VR[vd][de] = DivOut >> 16;
DPH = 1;
return;
@ -78,7 +66,7 @@ static void VRCPH0h(void)
DivIn = VR[vt][04] << 16;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0x4 & 0x3) + (i & 0xC)];
ACC_L(i) = VR[vt][(0x4 & 0x3) + (i & 0xC)];
VR[vd][de] = DivOut >> 16;
DPH = 1;
return;
@ -92,7 +80,7 @@ static void VRCPH1h(void)
DivIn = VR[vt][05] << 16;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0x5 & 0x3) + (i & 0xC)];
ACC_L(i) = VR[vt][(0x5 & 0x3) + (i & 0xC)];
VR[vd][de] = DivOut >> 16;
DPH = 1;
return;
@ -106,7 +94,7 @@ static void VRCPH2h(void)
DivIn = VR[vt][06] << 16;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0x6 & 0x3) + (i & 0xC)];
ACC_L(i) = VR[vt][(0x6 & 0x3) + (i & 0xC)];
VR[vd][de] = DivOut >> 16;
DPH = 1;
return;
@ -120,7 +108,7 @@ static void VRCPH3h(void)
DivIn = VR[vt][07] << 16;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0x7 & 0x3) + (i & 0xC)];
ACC_L(i) = VR[vt][(0x7 & 0x3) + (i & 0xC)];
VR[vd][de] = DivOut >> 16;
DPH = 1;
return;
@ -134,7 +122,7 @@ static void VRCPH0w(void)
DivIn = VR[vt][00] << 16;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0x8 & 0x7) + (i & 0x0)];
ACC_L(i) = VR[vt][(0x8 & 0x7) + (i & 0x0)];
VR[vd][de] = DivOut >> 16;
DPH = 1;
return;
@ -148,7 +136,7 @@ static void VRCPH1w(void)
DivIn = VR[vt][01] << 16;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0x9 & 0x7) + (i & 0x0)];
ACC_L(i) = VR[vt][(0x9 & 0x7) + (i & 0x0)];
VR[vd][de] = DivOut >> 16;
DPH = 1;
return;
@ -162,7 +150,7 @@ static void VRCPH2w(void)
DivIn = VR[vt][02] << 16;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0xA & 0x7) + (i & 0x0)];
ACC_L(i) = VR[vt][(0xA & 0x7) + (i & 0x0)];
VR[vd][de] = DivOut >> 16;
DPH = 1;
return;
@ -176,7 +164,7 @@ static void VRCPH3w(void)
DivIn = VR[vt][03] << 16;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0xB & 0x7) + (i & 0x0)];
ACC_L(i) = VR[vt][(0xB & 0x7) + (i & 0x0)];
VR[vd][de] = DivOut >> 16;
DPH = 1;
return;
@ -190,7 +178,7 @@ static void VRCPH4w(void)
DivIn = VR[vt][04] << 16;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0xC & 0x7) + (i & 0x0)];
ACC_L(i) = VR[vt][(0xC & 0x7) + (i & 0x0)];
VR[vd][de] = DivOut >> 16;
DPH = 1;
return;
@ -204,7 +192,7 @@ static void VRCPH5w(void)
DivIn = VR[vt][05] << 16;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0xD & 0x7) + (i & 0x0)];
ACC_L(i) = VR[vt][(0xD & 0x7) + (i & 0x0)];
VR[vd][de] = DivOut >> 16;
DPH = 1;
return;
@ -218,7 +206,7 @@ static void VRCPH6w(void)
DivIn = VR[vt][06] << 16;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0xE & 0x7) + (i & 0x0)];
ACC_L(i) = VR[vt][(0xE & 0x7) + (i & 0x0)];
VR[vd][de] = DivOut >> 16;
DPH = 1;
return;
@ -232,7 +220,7 @@ static void VRCPH7w(void)
DivIn = VR[vt][07] << 16;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0xF & 0x7) + (i & 0x0)];
ACC_L(i) = VR[vt][(0xF & 0x7) + (i & 0x0)];
VR[vd][de] = DivOut >> 16;
DPH = 1;
return;

View File

@ -1,46 +1,6 @@
#include "vu.h"
#include "divrom.h"
static void VRCPL(int vd, int de, int vt, int e)
{
unsigned int addr;
int data;
int fetch;
int shift = 32;
if (DPH)
DivIn |= (unsigned short)VR[vt][e & 07];
else
DivIn = VR[vt][e & 07] & 0x0000FFFF; /* Do not sign-extend. */
data = DivIn;
if (data < 0)
data = -data - (data < -32768); /* -(x) if >=; ~(x) if < */
do
{
--shift;
if (data & (1 << shift))
goto FOUND_MSB;
} while (shift); /* while (shift > 0) or ((shift ^ 31) < 32) */
shift = 31 - 16*DPH; /* if (data == 0) shift = DPH ? 16 ^ 31 : 0 ^ 31; */
FOUND_MSB:
shift ^= 31; /* Right-to-left shift direction conversion. */
addr = (data << shift) >> 22;
fetch = div_ROM[addr &= 0x000001FF];
shift ^= 31; /* Flipped shift direction back to right-. */
DivOut = (0x40000000 | (fetch << 14)) >> shift;
if (DivIn < 0)
DivOut = ~DivOut;
else if (DivIn == 0) /* corner case: overflow via division by zero */
DivOut = 0x7FFFFFFF;
else if (DivIn == -32768) /* corner case: signed underflow barrier */
DivOut = 0xFFFF0000;
for (addr = 0; addr < N; addr++)
VACC[addr].s[LO] = VR_T(addr);
VR_D(de &= 07) = (short)DivOut;
DPH = 0;
return;
}
void do_rcpl(int data)
{
unsigned int addr;
@ -82,7 +42,7 @@ static void VRCPLv0(void)
DivIn |= (unsigned short)VR[vt][00];
do_rcpl(DivIn);
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vd][(0x0 & 0x0) + (i & 0x7)];
ACC_L(i) = VR[vd][(0x0 & 0x0) + (i & 0x7)];
VR[vd][de] = (short)DivOut;
DPH = 0;
return;
@ -98,7 +58,7 @@ static void VRCPLv1(void)
DivIn |= (unsigned short)VR[vt][01];
do_rcpl(DivIn);
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vd][(0x1 & 0x0) + (i & 0x7)];
ACC_L(i) = VR[vd][(0x1 & 0x0) + (i & 0x7)];
VR[vd][de] = (short)DivOut;
DPH = 0;
return;
@ -114,7 +74,7 @@ static void VRCPL0q(void)
DivIn |= (unsigned short)VR[vt][02];
do_rcpl(DivIn);
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vd][(0x2 & 0x1) + (i & 0xE)];
ACC_L(i) = VR[vd][(0x2 & 0x1) + (i & 0xE)];
VR[vd][de] = (short)DivOut;
DPH = 0;
return;
@ -130,7 +90,7 @@ static void VRCPL1q(void)
DivIn |= (unsigned short)VR[vt][03];
do_rcpl(DivIn);
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vd][(0x3 & 0x1) + (i & 0xE)];
ACC_L(i) = VR[vd][(0x3 & 0x1) + (i & 0xE)];
VR[vd][de] = (short)DivOut;
DPH = 0;
return;
@ -146,7 +106,7 @@ static void VRCPL0h(void)
DivIn |= (unsigned short)VR[vt][04];
do_rcpl(DivIn);
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vd][(0x4 & 0x3) + (i & 0xC)];
ACC_L(i) = VR[vd][(0x4 & 0x3) + (i & 0xC)];
VR[vd][de] = (short)DivOut;
DPH = 0;
return;
@ -162,7 +122,7 @@ static void VRCPL1h(void)
DivIn |= (unsigned short)VR[vt][05];
do_rcpl(DivIn);
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vd][(0x5 & 0x3) + (i & 0xC)];
ACC_L(i) = VR[vd][(0x5 & 0x3) + (i & 0xC)];
VR[vd][de] = (short)DivOut;
DPH = 0;
return;
@ -178,7 +138,7 @@ static void VRCPL2h(void)
DivIn |= (unsigned short)VR[vt][06];
do_rcpl(DivIn);
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vd][(0x6 & 0x3) + (i & 0xC)];
ACC_L(i) = VR[vd][(0x6 & 0x3) + (i & 0xC)];
VR[vd][de] = (short)DivOut;
DPH = 0;
return;
@ -194,7 +154,7 @@ static void VRCPL3h(void)
DivIn |= (unsigned short)VR[vt][07];
do_rcpl(DivIn);
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vd][(0x7 & 0x3) + (i & 0xC)];
ACC_L(i) = VR[vd][(0x7 & 0x3) + (i & 0xC)];
VR[vd][de] = (short)DivOut;
DPH = 0;
return;
@ -210,7 +170,7 @@ static void VRCPL0w(void)
DivIn |= (unsigned short)VR[vt][00];
do_rcpl(DivIn);
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vd][(0x8 & 0x7) + (i & 0x0)];
ACC_L(i) = VR[vd][(0x8 & 0x7) + (i & 0x0)];
VR[vd][de] = (short)DivOut;
DPH = 0;
return;
@ -226,7 +186,7 @@ static void VRCPL1w(void)
DivIn |= (unsigned short)VR[vt][01];
do_rcpl(DivIn);
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vd][(0x9 & 0x7) + (i & 0x0)];
ACC_L(i) = VR[vd][(0x9 & 0x7) + (i & 0x0)];
VR[vd][de] = (short)DivOut;
DPH = 0;
return;
@ -242,7 +202,7 @@ static void VRCPL2w(void)
DivIn |= (unsigned short)VR[vt][02];
do_rcpl(DivIn);
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vd][(0xA & 0x7) + (i & 0x0)];
ACC_L(i) = VR[vd][(0xA & 0x7) + (i & 0x0)];
VR[vd][de] = (short)DivOut;
DPH = 0;
return;
@ -258,7 +218,7 @@ static void VRCPL3w(void)
DivIn |= (unsigned short)VR[vt][03];
do_rcpl(DivIn);
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vd][(0xB & 0x7) + (i & 0x0)];
ACC_L(i) = VR[vd][(0xB & 0x7) + (i & 0x0)];
VR[vd][de] = (short)DivOut;
DPH = 0;
return;
@ -274,7 +234,7 @@ static void VRCPL4w(void)
DivIn |= (unsigned short)VR[vt][04];
do_rcpl(DivIn);
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vd][(0xC & 0x7) + (i & 0x0)];
ACC_L(i) = VR[vd][(0xC & 0x7) + (i & 0x0)];
VR[vd][de] = (short)DivOut;
DPH = 0;
return;
@ -290,7 +250,7 @@ static void VRCPL5w(void)
DivIn |= (unsigned short)VR[vt][05];
do_rcpl(DivIn);
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vd][(0xD & 0x7) + (i & 0x0)];
ACC_L(i) = VR[vd][(0xD & 0x7) + (i & 0x0)];
VR[vd][de] = (short)DivOut;
DPH = 0;
return;
@ -306,7 +266,7 @@ static void VRCPL6w(void)
DivIn |= (unsigned short)VR[vt][06];
do_rcpl(DivIn);
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vd][(0xE & 0x7) + (i & 0x0)];
ACC_L(i) = VR[vd][(0xE & 0x7) + (i & 0x0)];
VR[vd][de] = (short)DivOut;
DPH = 0;
return;
@ -322,7 +282,7 @@ static void VRCPL7w(void)
DivIn |= (unsigned short)VR[vt][07];
do_rcpl(DivIn);
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vd][(0xF & 0x7) + (i & 0x0)];
ACC_L(i) = VR[vd][(0xF & 0x7) + (i & 0x0)];
VR[vd][de] = (short)DivOut;
DPH = 0;
return;

View File

@ -1,18 +1,6 @@
#include "vu.h"
#include "divrom.h"
static void VRSQH(int vd, int de, int vt, int e)
{
register int i;
DivIn = VR[vt][e & 07] << 16;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR_T(i);
VR_D(de &= 07) = DivOut >> 16; /* store high part */
DPH = 1;
return;
}
static void VRSQHv0(void)
{
register int i;
@ -22,7 +10,7 @@ static void VRSQHv0(void)
DivIn = VR[vt][00] << 16;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0x0 & 0x0) + (i & 0x7)];
ACC_L(i) = VR[vt][(0x0 & 0x0) + (i & 0x7)];
VR[vd][de] = DivOut >> 16;
DPH = 1;
return;
@ -36,7 +24,7 @@ static void VRSQHv1(void)
DivIn = VR[vt][01] << 16;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0x1 & 0x0) + (i & 0x7)];
ACC_L(i) = VR[vt][(0x1 & 0x0) + (i & 0x7)];
VR[vd][de] = DivOut >> 16;
DPH = 1;
return;
@ -50,7 +38,7 @@ static void VRSQH0q(void)
DivIn = VR[vt][02] << 16;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0x2 & 0x1) + (i & 0xE)];
ACC_L(i) = VR[vt][(0x2 & 0x1) + (i & 0xE)];
VR[vd][de] = DivOut >> 16;
DPH = 1;
return;
@ -64,7 +52,7 @@ static void VRSQH1q(void)
DivIn = VR[vt][03] << 16;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0x3 & 0x1) + (i & 0xE)];
ACC_L(i) = VR[vt][(0x3 & 0x1) + (i & 0xE)];
VR[vd][de] = DivOut >> 16;
DPH = 1;
return;
@ -78,7 +66,7 @@ static void VRSQH0h(void)
DivIn = VR[vt][04] << 16;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0x4 & 0x3) + (i & 0xC)];
ACC_L(i) = VR[vt][(0x4 & 0x3) + (i & 0xC)];
VR[vd][de] = DivOut >> 16;
DPH = 1;
return;
@ -92,7 +80,7 @@ static void VRSQH1h(void)
DivIn = VR[vt][05] << 16;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0x5 & 0x3) + (i & 0xC)];
ACC_L(i) = VR[vt][(0x5 & 0x3) + (i & 0xC)];
VR[vd][de] = DivOut >> 16;
DPH = 1;
return;
@ -106,7 +94,7 @@ static void VRSQH2h(void)
DivIn = VR[vt][06] << 16;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0x6 & 0x3) + (i & 0xC)];
ACC_L(i) = VR[vt][(0x6 & 0x3) + (i & 0xC)];
VR[vd][de] = DivOut >> 16;
DPH = 1;
return;
@ -120,7 +108,7 @@ static void VRSQH3h(void)
DivIn = VR[vt][07] << 16;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0x7 & 0x3) + (i & 0xC)];
ACC_L(i) = VR[vt][(0x7 & 0x3) + (i & 0xC)];
VR[vd][de] = DivOut >> 16;
DPH = 1;
return;
@ -134,7 +122,7 @@ static void VRSQH0w(void)
DivIn = VR[vt][00] << 16;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0x8 & 0x7) + (i & 0x0)];
ACC_L(i) = VR[vt][(0x8 & 0x7) + (i & 0x0)];
VR[vd][de] = DivOut >> 16;
DPH = 1;
return;
@ -148,7 +136,7 @@ static void VRSQH1w(void)
DivIn = VR[vt][01] << 16;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0x9 & 0x7) + (i & 0x0)];
ACC_L(i) = VR[vt][(0x9 & 0x7) + (i & 0x0)];
VR[vd][de] = DivOut >> 16;
DPH = 1;
return;
@ -162,7 +150,7 @@ static void VRSQH2w(void)
DivIn = VR[vt][02] << 16;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0xA & 0x7) + (i & 0x0)];
ACC_L(i) = VR[vt][(0xA & 0x7) + (i & 0x0)];
VR[vd][de] = DivOut >> 16;
DPH = 1;
return;
@ -176,7 +164,7 @@ static void VRSQH3w(void)
DivIn = VR[vt][03] << 16;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0xB & 0x7) + (i & 0x0)];
ACC_L(i) = VR[vt][(0xB & 0x7) + (i & 0x0)];
VR[vd][de] = DivOut >> 16;
DPH = 1;
return;
@ -190,7 +178,7 @@ static void VRSQH4w(void)
DivIn = VR[vt][04] << 16;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0xC & 0x7) + (i & 0x0)];
ACC_L(i) = VR[vt][(0xC & 0x7) + (i & 0x0)];
VR[vd][de] = DivOut >> 16;
DPH = 1;
return;
@ -204,7 +192,7 @@ static void VRSQH5w(void)
DivIn = VR[vt][05] << 16;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0xD & 0x7) + (i & 0x0)];
ACC_L(i) = VR[vt][(0xD & 0x7) + (i & 0x0)];
VR[vd][de] = DivOut >> 16;
DPH = 1;
return;
@ -218,7 +206,7 @@ static void VRSQH6w(void)
DivIn = VR[vt][06] << 16;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0xE & 0x7) + (i & 0x0)];
ACC_L(i) = VR[vt][(0xE & 0x7) + (i & 0x0)];
VR[vd][de] = DivOut >> 16;
DPH = 1;
return;
@ -232,7 +220,7 @@ static void VRSQH7w(void)
DivIn = VR[vt][07] << 16;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0xF & 0x7) + (i & 0x0)];
ACC_L(i) = VR[vt][(0xF & 0x7) + (i & 0x0)];
VR[vd][de] = DivOut >> 16;
DPH = 1;
return;

View File

@ -1,49 +1,6 @@
#include "vu.h"
#include "divrom.h"
static void VRSQL(int vd, int de, int vt, int e)
{
unsigned int addr;
int data;
int fetch;
int shift = 32;
if (DPH)
DivIn |= (unsigned short)VR[vt][e & 07];
else
DivIn = VR[vt][e & 07] & 0x0000FFFF; /* Do not sign-extend. */
data = DivIn;
if (data < 0)
data = -data - (data < -32768); /* -(x) if >=; ~(x) if < */
do
{
--shift;
if (data & (1 << shift))
goto FOUND_MSB;
} while (shift); /* while (shift > 0) or ((shift ^ 31) < 32) */
shift = 31 - 16*DPH; /* if (data == 0) shift = DPH ? 16 ^ 31 : 0 ^ 31; */
FOUND_MSB:
shift ^= 31; /* Right-to-left shift direction conversion. */
addr = (data << shift) >> 22;
addr &= 0x000001FE;
addr |= 0x00000200 | (shift & 1);
fetch = div_ROM[addr];
shift ^= 31; /* Flipped shift direction back to right-. */
shift >>= 1;
DivOut = (0x40000000 | (fetch << 14)) >> shift;
if (DivIn < 0)
DivOut = ~DivOut;
else if (DivIn == 0) /* corner case: overflow via division by zero */
DivOut = 0x7FFFFFFF;
else if (DivIn == -32768) /* corner case: signed underflow barrier */
DivOut = 0xFFFF0000;
for (addr = 0; addr < N; addr++)
VACC[addr].s[LO] = VR_T(addr);
VR_D(de &= 07) = (short)DivOut;
DPH = 0;
return;
}
void do_rsql(int data)
{
unsigned int addr;
@ -88,7 +45,7 @@ static void VRSQLv0(void)
DivIn |= (unsigned short)VR[vt][00];
do_rsql(DivIn);
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0x0 & 0x0) + (i & 0x7)];
ACC_L(i) = VR[vt][(0x0 & 0x0) + (i & 0x7)];
VR[vd][de] = (short)DivOut;
DPH = 0;
return;
@ -104,7 +61,7 @@ static void VRSQLv1(void)
DivIn |= (unsigned short)VR[vt][01];
do_rsql(DivIn);
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0x1 & 0x0) + (i & 0x7)];
ACC_L(i) = VR[vt][(0x1 & 0x0) + (i & 0x7)];
VR[vd][de] = (short)DivOut;
DPH = 0;
return;
@ -120,7 +77,7 @@ static void VRSQL0q(void)
DivIn |= (unsigned short)VR[vt][02];
do_rsql(DivIn);
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0x2 & 0x1) + (i & 0xE)];
ACC_L(i) = VR[vt][(0x2 & 0x1) + (i & 0xE)];
VR[vd][de] = (short)DivOut;
DPH = 0;
return;
@ -136,7 +93,7 @@ static void VRSQL1q(void)
DivIn |= (unsigned short)VR[vt][03];
do_rsql(DivIn);
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0x3 & 0x1) + (i & 0xE)];
ACC_L(i) = VR[vt][(0x3 & 0x1) + (i & 0xE)];
VR[vd][de] = (short)DivOut;
DPH = 0;
return;
@ -152,7 +109,7 @@ static void VRSQL0h(void)
DivIn |= (unsigned short)VR[vt][04];
do_rsql(DivIn);
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0x4 & 0x3) + (i & 0xC)];
ACC_L(i) = VR[vt][(0x4 & 0x3) + (i & 0xC)];
VR[vd][de] = (short)DivOut;
DPH = 0;
return;
@ -168,7 +125,7 @@ static void VRSQL1h(void)
DivIn |= (unsigned short)VR[vt][05];
do_rsql(DivIn);
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0x5 & 0x3) + (i & 0xC)];
ACC_L(i) = VR[vt][(0x5 & 0x3) + (i & 0xC)];
VR[vd][de] = (short)DivOut;
DPH = 0;
return;
@ -184,7 +141,7 @@ static void VRSQL2h(void)
DivIn |= (unsigned short)VR[vt][06];
do_rsql(DivIn);
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0x6 & 0x3) + (i & 0xC)];
ACC_L(i) = VR[vt][(0x6 & 0x3) + (i & 0xC)];
VR[vd][de] = (short)DivOut;
DPH = 0;
return;
@ -200,7 +157,7 @@ static void VRSQL3h(void)
DivIn |= (unsigned short)VR[vt][07];
do_rsql(DivIn);
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0x7 & 0x3) + (i & 0xC)];
ACC_L(i) = VR[vt][(0x7 & 0x3) + (i & 0xC)];
VR[vd][de] = (short)DivOut;
DPH = 0;
return;
@ -216,7 +173,7 @@ static void VRSQL0w(void)
DivIn |= (unsigned short)VR[vt][00];
do_rsql(DivIn);
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0x8 & 0x7) + (i & 0x0)];
ACC_L(i) = VR[vt][(0x8 & 0x7) + (i & 0x0)];
VR[vd][de] = (short)DivOut;
DPH = 0;
return;
@ -232,7 +189,7 @@ static void VRSQL1w(void)
DivIn |= (unsigned short)VR[vt][01];
do_rsql(DivIn);
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0x9 & 0x7) + (i & 0x0)];
ACC_L(i) = VR[vt][(0x9 & 0x7) + (i & 0x0)];
VR[vd][de] = (short)DivOut;
DPH = 0;
return;
@ -248,7 +205,7 @@ static void VRSQL2w(void)
DivIn |= (unsigned short)VR[vt][02];
do_rsql(DivIn);
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0xA & 0x7) + (i & 0x0)];
ACC_L(i) = VR[vt][(0xA & 0x7) + (i & 0x0)];
VR[vd][de] = (short)DivOut;
DPH = 0;
return;
@ -264,7 +221,7 @@ static void VRSQL3w(void)
DivIn |= (unsigned short)VR[vt][03];
do_rsql(DivIn);
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0xB & 0x7) + (i & 0x0)];
ACC_L(i) = VR[vt][(0xB & 0x7) + (i & 0x0)];
VR[vd][de] = (short)DivOut;
DPH = 0;
return;
@ -280,7 +237,7 @@ static void VRSQL4w(void)
DivIn |= (unsigned short)VR[vt][04];
do_rsql(DivIn);
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0xC & 0x7) + (i & 0x0)];
ACC_L(i) = VR[vt][(0xC & 0x7) + (i & 0x0)];
VR[vd][de] = (short)DivOut;
DPH = 0;
return;
@ -296,7 +253,7 @@ static void VRSQL5w(void)
DivIn |= (unsigned short)VR[vt][05];
do_rsql(DivIn);
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0xD & 0x7) + (i & 0x0)];
ACC_L(i) = VR[vt][(0xD & 0x7) + (i & 0x0)];
VR[vd][de] = (short)DivOut;
DPH = 0;
return;
@ -312,7 +269,7 @@ static void VRSQL6w(void)
DivIn |= (unsigned short)VR[vt][06];
do_rsql(DivIn);
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0xE & 0x7) + (i & 0x0)];
ACC_L(i) = VR[vt][(0xE & 0x7) + (i & 0x0)];
VR[vd][de] = (short)DivOut;
DPH = 0;
return;
@ -328,7 +285,7 @@ static void VRSQL7w(void)
DivIn |= (unsigned short)VR[vt][07];
do_rsql(DivIn);
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vt][(0xF & 0x7) + (i & 0x0)];
ACC_L(i) = VR[vt][(0xF & 0x7) + (i & 0x0)];
VR[vd][de] = (short)DivOut;
DPH = 0;
return;

View File

@ -1,15 +1,16 @@
#include "vu.h"
static void VSAW(int vd, int vs, int vt, int e)
{
register int i;
#ifdef VU_EMULATE_SCALAR_ACCUMULATOR_READ
static void VSAR(int vd, int vs, int vt, int e)
{
short oldval[N];
register int i;
for (i = 0; i < N; i++)
result[i] = VR[vs][i];
#endif
vs = vt = 0;
/* Even though `vt` is ignored in VSAR, according to official sources as well
* as reversing, lots of games seem to specify it as nonzero, possibly to
oldval[i] = VR[vs][i];
vt = 0;
/* Even though VT is ignored in VSAR, according to official sources as well
* as reversing, lots of games seem to specify it as non-zero, possibly to
* avoid register stalling or other VU hazards. Not really certain why yet.
*/
e ^= 0x8;
@ -17,24 +18,20 @@ static void VSAW(int vd, int vs, int vt, int e)
* Currently this code is safer because &= is less likely to catch oddities.
* Either way, documentation shows that the switch range is 0:2, not 8:A.
*/
e = 2 - e;
if (e < 0)
if (e > 2)
{
message("VSAR\nInvalid mask.", 2);
for (i = vs; i < 8; i++)
VR_D(i) = 0x0000; /* override behavior (zilmar) */
for (i = 0; i < N; i++)
VR[vd][i] = 0x0000; /* override behavior (zilmar) */
}
else
for (i = vs; i < 8; i++)
VR_D(i) = VACC[i].s[e];
#ifdef VU_EMULATE_SCALAR_ACCUMULATOR_READ
e ^= 03;
--e;
for (i = 0; i < N; i++)
VR[vd][i] = VACC[e][i];
for (i = 0; i < N; i++)
VACC[i].s[e] = result[i]; /* ... = VR[vs][i]; */
#endif
VACC[e][i] = oldval[i]; /* ... = VS */
return;
}
#endif
static void VSAWH(void)
{
@ -42,7 +39,7 @@ static void VSAWH(void)
const int vd = inst.R.sa;
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[HI];
VR[vd][i] = ACC_H(i);
return;
}
static void VSAWM(void)
@ -51,7 +48,7 @@ static void VSAWM(void)
const int vd = inst.R.sa;
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[MD];
VR[vd][i] = ACC_M(i);
return;
}
static void VSAWL(void)
@ -60,6 +57,6 @@ static void VSAWL(void)
const int vd = inst.R.sa;
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}

View File

@ -1,23 +1,5 @@
#include "vu.h"
static void VSUB(int vd, int vs, int vt, int e)
{
register int i;
for (i = 0; i < N; i++) /* Try to vectorize the subtracts to be parallel. */
result[i] = VR[vs][i] - VR_T(i);
for (i = 0; i < N; i++)
{
result[i] -= VCO & 0x0001;
VCO >>= 1;
}
VCO = 0x0000; /* Clear the remaining, upper NOTEQUAL bits. */
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)result[i];
SIGNED_CLAMP(VMUL_PTR, SM_ADD_A);
return;
}
void clr_bi(void) /* clear CARRY and borrow in to accumulators */
{
int bi[8];
@ -41,7 +23,7 @@ static void VSUB_v(void)
result[i] = VR[vs][i] - VR[vt][i];
clr_bi();
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
SIGNED_CLAMP(VR[vd], SM_ADD_A);
return;
}
@ -56,7 +38,7 @@ static void VSUB0q(void)
result[i] = VR[vs][i] - VR[vt][(0x2 & 01) + (i & 0xE)];
clr_bi();
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
SIGNED_CLAMP(VR[vd], SM_ADD_A);
return;
}
@ -71,7 +53,7 @@ static void VSUB1q(void)
result[i] = VR[vs][i] - VR[vt][(0x3 & 01) + (i & 0xE)];
clr_bi();
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
SIGNED_CLAMP(VR[vd], SM_ADD_A);
return;
}
@ -86,7 +68,7 @@ static void VSUB0h(void)
result[i] = VR[vs][i] - VR[vt][(0x4 & 03) + (i & 0xC)];
clr_bi();
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
SIGNED_CLAMP(VR[vd], SM_ADD_A);
return;
}
@ -101,7 +83,7 @@ static void VSUB1h(void)
result[i] = VR[vs][i] - VR[vt][(0x5 & 03) + (i & 0xC)];
clr_bi();
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
SIGNED_CLAMP(VR[vd], SM_ADD_A);
return;
}
@ -116,7 +98,7 @@ static void VSUB2h(void)
result[i] = VR[vs][i] - VR[vt][(0x6 & 03) + (i & 0xC)];
clr_bi();
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
SIGNED_CLAMP(VR[vd], SM_ADD_A);
return;
}
@ -131,7 +113,7 @@ static void VSUB3h(void)
result[i] = VR[vs][i] - VR[vt][(0x7 & 03) + (i & 0xC)];
clr_bi();
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
SIGNED_CLAMP(VR[vd], SM_ADD_A);
return;
}
@ -146,7 +128,7 @@ static void VSUB0w(void)
result[i] = VR[vs][i] - VR[vt][(0x8 & 07) + (i & 0x0)];
clr_bi();
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
SIGNED_CLAMP(VR[vd], SM_ADD_A);
return;
}
@ -161,7 +143,7 @@ static void VSUB1w(void)
result[i] = VR[vs][i] - VR[vt][(0x9 & 07) + (i & 0x0)];
clr_bi();
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
SIGNED_CLAMP(VR[vd], SM_ADD_A);
return;
}
@ -176,7 +158,7 @@ static void VSUB2w(void)
result[i] = VR[vs][i] - VR[vt][(0xA & 07) + (i & 0x0)];
clr_bi();
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
SIGNED_CLAMP(VR[vd], SM_ADD_A);
return;
}
@ -191,7 +173,7 @@ static void VSUB3w(void)
result[i] = VR[vs][i] - VR[vt][(0xB & 07) + (i & 0x0)];
clr_bi();
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
SIGNED_CLAMP(VR[vd], SM_ADD_A);
return;
}
@ -206,7 +188,7 @@ static void VSUB4w(void)
result[i] = VR[vs][i] - VR[vt][(0xC & 07) + (i & 0x0)];
clr_bi();
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
SIGNED_CLAMP(VR[vd], SM_ADD_A);
return;
}
@ -221,7 +203,7 @@ static void VSUB5w(void)
result[i] = VR[vs][i] - VR[vt][(0xD & 07) + (i & 0x0)];
clr_bi();
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
SIGNED_CLAMP(VR[vd], SM_ADD_A);
return;
}
@ -236,7 +218,7 @@ static void VSUB6w(void)
result[i] = VR[vs][i] - VR[vt][(0xE & 07) + (i & 0x0)];
clr_bi();
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
SIGNED_CLAMP(VR[vd], SM_ADD_A);
return;
}
@ -251,7 +233,7 @@ static void VSUB7w(void)
result[i] = VR[vs][i] - VR[vt][(0xF & 07) + (i & 0x0)];
clr_bi();
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
SIGNED_CLAMP(VR[vd], SM_ADD_A);
return;
}

View File

@ -1,25 +1,5 @@
#include "vu.h"
static void VSUBC(int vd, int vs, int vt, int e)
{
register int i;
VCO = 0x0000;
for (i = 0; i < N; i++)
result[i] = (unsigned short)VR[vs][i] - (unsigned short)VR_T(i);
for (i = 0; i < N; i++)
ACC_R(i) = (short)result[i];
for (i = 0; i < N; i++)
{
if (result[i] == 0) continue; /* If VS == VT, neither flag is set. */
VCO |= (result[i] < 0) << i; /* CARRY, because VS - VT < 0 */
VCO |= (0x01 << 8) << i; /* NOTEQUAL, because VS - VT != 0 */
}
for (i = 0; i < N; i++)
ACC_W(i) = ACC_R(i);
return;
}
#if (0)
#define SETBI(i) (result[i] < 0)
#else
@ -62,9 +42,9 @@ static void VSUBC_v(void)
for (i = 0; i < N; i++)
result[i] = (unsigned short)(VR[vs][i]) - (unsigned short)(VR[vt][i]);
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
set_bo();
return;
}
@ -80,9 +60,9 @@ static void VSUBC0q(void)
(unsigned short)(VR[vs][i])
- (unsigned short)(VR[vt][(0x2 & 01) + (i & 0xE)]);
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
set_bo();
return;
}
@ -98,9 +78,9 @@ static void VSUBC1q(void)
(unsigned short)(VR[vs][i])
- (unsigned short)(VR[vt][(0x3 & 01) + (i & 0xE)]);
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
set_bo();
return;
}
@ -116,9 +96,9 @@ static void VSUBC0h(void)
(unsigned short)(VR[vs][i])
- (unsigned short)(VR[vt][(0x4 & 03) + (i & 0xC)]);
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
set_bo();
return;
}
@ -134,9 +114,9 @@ static void VSUBC1h(void)
(unsigned short)(VR[vs][i])
- (unsigned short)(VR[vt][(0x5 & 03) + (i & 0xC)]);
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
set_bo();
return;
}
@ -152,9 +132,9 @@ static void VSUBC2h(void)
(unsigned short)(VR[vs][i])
- (unsigned short)(VR[vt][(0x6 & 03) + (i & 0xC)]);
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
set_bo();
return;
}
@ -170,9 +150,9 @@ static void VSUBC3h(void)
(unsigned short)(VR[vs][i])
- (unsigned short)(VR[vt][(0x7 & 03) + (i & 0xC)]);
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
set_bo();
return;
}
@ -188,9 +168,9 @@ static void VSUBC0w(void)
(unsigned short)(VR[vs][i])
- (unsigned short)(VR[vt][(0x8 & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
set_bo();
return;
}
@ -206,9 +186,9 @@ static void VSUBC1w(void)
(unsigned short)(VR[vs][i])
- (unsigned short)(VR[vt][(0x9 & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
set_bo();
return;
}
@ -224,9 +204,9 @@ static void VSUBC2w(void)
(unsigned short)(VR[vs][i])
- (unsigned short)(VR[vt][(0xA & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
set_bo();
return;
}
@ -242,9 +222,9 @@ static void VSUBC3w(void)
(unsigned short)(VR[vs][i])
- (unsigned short)(VR[vt][(0xB & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
set_bo();
return;
}
@ -260,9 +240,9 @@ static void VSUBC4w(void)
(unsigned short)(VR[vs][i])
- (unsigned short)(VR[vt][(0xC & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
set_bo();
return;
}
@ -278,9 +258,9 @@ static void VSUBC5w(void)
(unsigned short)(VR[vs][i])
- (unsigned short)(VR[vt][(0xD & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
set_bo();
return;
}
@ -296,9 +276,9 @@ static void VSUBC6w(void)
(unsigned short)(VR[vs][i])
- (unsigned short)(VR[vt][(0xE & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
set_bo();
return;
}
@ -314,9 +294,9 @@ static void VSUBC7w(void)
(unsigned short)(VR[vs][i])
- (unsigned short)(VR[vt][(0xF & 07) + (i & 0x0)]);
for (i = 0; i < N; i++)
VACC[i].s[LO] = (short)(result[i]);
ACC_L(i) = (short)(result[i]);
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
set_bo();
return;
}

119
vu/vu.h
View File

@ -1,7 +1,7 @@
/******************************************************************************\
* Project: MSP Emulation Layer for Vector Unit Computational Operations *
* Authors: Iconoclast *
* Release: 2013.09.11 *
* Release: 2013.09.13 *
* License: none (public domain) *
\******************************************************************************/
#ifndef _VU_H
@ -12,6 +12,8 @@
#define MACHINE_SIZE_48_MIN
#endif
typedef long long INT64;
/*
* vector-scalar element decoding
*
@ -47,6 +49,9 @@ static const int ei[16][8] = {
{ 07, 07, 07, 07, 07, 07, 07, 07 } /* 7 */
};
#define N 8
/* N: number of processor elements in SIMD processor */
/*
* RSP virtual registers (of vector unit)
* The most important are the 32 general-purpose vector registers.
@ -55,8 +60,8 @@ static const int ei[16][8] = {
* For ?WC2 we may need to do byte-precision access just as directly.
* This is amended by using the `VU_S` and `VU_B` macros defined in `rsp.h`.
*/
short VR[32][8];
short VC[8]; /* vector/scalar coefficient */
short VR[32][N];
short VC[N]; /* vector/scalar coefficient */
/* #define EMULATE_VECTOR_RESULT_BUFFER */
/*
@ -120,9 +125,6 @@ int sub_mask[16] = {
0x7, 0x7, 0x7, 0x7, 0x7, 0x7, 0x7, 0x7
};
#define N 8
/* N: number of processor elements in SIMD processor */
void SHUFFLE_VECTOR(int vt, int e)
{
register int i, j;
@ -149,19 +151,15 @@ void SHUFFLE_VECTOR(int vt, int e)
return;
}
typedef long long INT64;
#if (0)
/*
* accumulator-indexing macros
* accumulator-indexing macros (little endian: not suitable for VSAW)
*/
#define LO 00
#define MD 01
#define HI 02
#define HI 02
#define MD 01
#define LO 00
static union ACC {
#ifdef MACHINE_SIZE_48_MIN
signed e: 48; /* There are eight elements in the accumulator. */
#endif
short int s[3]; /* Each element has a low, middle, and high 16-bit slice. */
signed char SB[6];
/* 64-bit access: */
@ -170,27 +168,32 @@ static union ACC {
unsigned short UHW[4];
int W[2];
unsigned int UW[2];
long long int DW;
unsigned long long UDW;
} VACC[8];
INT64 DW;
} VACC[N];
#define ACC_L(i) (VACC[i].s[LO])
#define ACC_M(i) (VACC[i].s[MD])
#define ACC_H(i) (VACC[i].s[HI])
#else
/*
* special macro service for clamping accumulators
*
* Clamping on the RSP is the same as traditional vector units, not just SGI.
* This algorithm, therefore, is public domain material.
*
* In almost all cases, the RSP requests clamping to bits 47..16 of each acc.
* We therefore compare the 32-bit (signed int)(acc >> 16) and clamp it down
* to, usually, 16-bit results (0x8000 if < -32768, 0x7FFF if > +32767).
*
* The exception is VMACQ, which requests a clamp index lsb of >> 17.
* accumulator-indexing macros (inverted access dimensions, suited for SSE)
*/
#define CLAMP_BASE(acc, lo) ((signed int)(VACC[acc].DW >> lo))
#define HI 00
#define MD 01
#define LO 02
short VACC[3][N];
/*
* This algorithm might have a bug if you invoke shifts greater than 16,
* because the 48-bit acc needs to be sign-extended when shifting right here.
* short ACC_L[N];
* short ACC_M[N];
* short ACC_H[N];
*/
#define ACC_L(i) (VACC[LO][i])
#define ACC_M(i) (VACC[MD][i])
#define ACC_H(i) (VACC[HI][i])
#endif
#define FORCE_STATIC_CLAMP
static signed short sclamp[2][2] = {
{ 0x0000, -0x8000},
@ -220,6 +223,39 @@ enum {
signed int result[N];
INLINE void do_store(INT64* acc)
{
register int i;
for (i = 0; i < N; i++)
ACC_H(i) = (short)(acc[i] >> 32);
for (i = 0; i < N; i++)
ACC_M(i) = (short)(acc[i] >> 16);
for (i = 0; i < N; i++)
ACC_L(i) = (short)(acc[i] >> 0);
return;
}
INLINE void do_acc(INT64* acc)
{
INT64 base[N];
register int i;
for (i = 0; i < N; i++)
base[N] = ACC_H(i);
for (i = 0; i < N; i++)
base[N] = base[N] << 16;
for (i = 0; i < N; i++)
base[N] = base[N] | (unsigned short)ACC_M(i);
for (i = 0; i < N; i++)
base[N] = base[N] << 16;
for (i = 0; i < N; i++)
base[N] = base[N] | (unsigned short)ACC_L(i);
for (i = 0; i < N; i++)
base[N] = base[N] + acc[i];
do_store(base);
return;
}
void SIGNED_CLAMP(short* VD, int mode)
{
register int i;
@ -227,9 +263,12 @@ void SIGNED_CLAMP(short* VD, int mode)
switch (mode)
{
case SM_MUL_X: /* typical sign-clamp of accumulator-mid (bits 31:16) */
for (i = 0; i < N; i++)
result[i] = ACC_H(i) << 16;
for (i = 0; i < N; i++)
result[i] = result[i] | (unsigned short)ACC_M(i);
for (i = 0; i < N; i++)
{
result[i] = *(signed int *)((unsigned char *)(VACC + i) + 2);
#ifdef FORCE_STATIC_CLAMP
VD[i] = result[i] & 0x0000FFFF;
VD[i] &= ~(result[i] - -32768) >> 31; /* min: 0x8000 ^ 0x8000 */
@ -244,11 +283,14 @@ void SIGNED_CLAMP(short* VD, int mode)
}
return;
case SM_MUL_Z: /* sign-clamp accumulator-low (bits 15:0) */
for (i = 0; i < N; i++)
result[i] = ACC_H(i) << 16;
for (i = 0; i < N; i++)
result[i] = result[i] | (unsigned short)ACC_M(i);
for (i = 0; i < N; i++)
{
result[i] = *(signed int *)((unsigned char *)(VACC + i) + 2);
#ifdef FORCE_STATIC_CLAMP
VD[i] = VACC[i].DW & 0x00000000FFFF;
VD[i] = ACC_L(i);
VD[i] &= ~(result[i] - -32768) >> 31;
VD[i] |= (+32767 - result[i]) >> 31;
continue;
@ -262,15 +304,18 @@ void SIGNED_CLAMP(short* VD, int mode)
return;
case SM_MUL_Q: /* possible DCT inverse quantization (VMACQ only) */
for (i = 0; i < N; i++)
{
result[i] = CLAMP_BASE(i, 17);
result[i] = (short)(ACC_H(i) << 31);
for (i = 0; i < N; i++)
result[i] = result[i] | (ACC_M(i) << 15);
for (i = 0; i < N; i++)
result[i] = result[i] | ((unsigned short)ACC_L(i) >> 1);
for (i = 0; i < N; i++)
if (result[i] < -32768)
VD[i] = -32768 & ~0x000F;
else if (result[i] > +32767)
VD[i] = +32767 & ~0x000F;
else
VD[i] = result[i] & 0x0000FFF0;
}
return;
case SM_ADD_A: /* VADD and VSUB */
for (i = 0; i < N; i++)

View File

@ -1,16 +1,5 @@
#include "vu.h"
static void VXOR(int vd, int vs, int vt, int e)
{
register int i;
for (i = 0; i < N; i++)
ACC_R(i) = VR[vs][i] ^ VR_T(i);
for (i = 0; i < N; i++)
ACC_W(i) = ACC_R(i);
return;
}
static void VXOR_v(void)
{
register int i;
@ -19,9 +8,9 @@ static void VXOR_v(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vs][i] ^ VR[vt][i];
ACC_L(i) = VR[vs][i] ^ VR[vt][i];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VXOR0q(void)
@ -32,9 +21,9 @@ static void VXOR0q(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vs][i] ^ VR[vt][(0x2 & 01) + (i & 0xE)];
ACC_L(i) = VR[vs][i] ^ VR[vt][(0x2 & 01) + (i & 0xE)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VXOR1q(void)
@ -45,9 +34,9 @@ static void VXOR1q(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vs][i] ^ VR[vt][(0x3 & 01) + (i & 0xE)];
ACC_L(i) = VR[vs][i] ^ VR[vt][(0x3 & 01) + (i & 0xE)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VXOR0h(void)
@ -58,9 +47,9 @@ static void VXOR0h(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vs][i] ^ VR[vt][(0x4 & 03) + (i & 0xC)];
ACC_L(i) = VR[vs][i] ^ VR[vt][(0x4 & 03) + (i & 0xC)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VXOR1h(void)
@ -71,9 +60,9 @@ static void VXOR1h(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vs][i] ^ VR[vt][(0x5 & 03) + (i & 0xC)];
ACC_L(i) = VR[vs][i] ^ VR[vt][(0x5 & 03) + (i & 0xC)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VXOR2h(void)
@ -84,9 +73,9 @@ static void VXOR2h(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vs][i] ^ VR[vt][(0x6 & 03) + (i & 0xC)];
ACC_L(i) = VR[vs][i] ^ VR[vt][(0x6 & 03) + (i & 0xC)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VXOR3h(void)
@ -97,9 +86,9 @@ static void VXOR3h(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vs][i] ^ VR[vt][(0x7 & 03) + (i & 0xC)];
ACC_L(i) = VR[vs][i] ^ VR[vt][(0x7 & 03) + (i & 0xC)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VXOR0w(void)
@ -110,9 +99,9 @@ static void VXOR0w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vs][i] ^ VR[vt][(0x8 & 07) + (i & 0x0)];
ACC_L(i) = VR[vs][i] ^ VR[vt][(0x8 & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VXOR1w(void)
@ -123,9 +112,9 @@ static void VXOR1w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vs][i] ^ VR[vt][(0x9 & 07) + (i & 0x0)];
ACC_L(i) = VR[vs][i] ^ VR[vt][(0x9 & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VXOR2w(void)
@ -136,9 +125,9 @@ static void VXOR2w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vs][i] ^ VR[vt][(0xA & 07) + (i & 0x0)];
ACC_L(i) = VR[vs][i] ^ VR[vt][(0xA & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VXOR3w(void)
@ -149,9 +138,9 @@ static void VXOR3w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vs][i] ^ VR[vt][(0xB & 07) + (i & 0x0)];
ACC_L(i) = VR[vs][i] ^ VR[vt][(0xB & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VXOR4w(void)
@ -162,9 +151,9 @@ static void VXOR4w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vs][i] ^ VR[vt][(0xC & 07) + (i & 0x0)];
ACC_L(i) = VR[vs][i] ^ VR[vt][(0xC & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VXOR5w(void)
@ -175,9 +164,9 @@ static void VXOR5w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vs][i] ^ VR[vt][(0xD & 07) + (i & 0x0)];
ACC_L(i) = VR[vs][i] ^ VR[vt][(0xD & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VXOR6w(void)
@ -188,9 +177,9 @@ static void VXOR6w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vs][i] ^ VR[vt][(0xE & 07) + (i & 0x0)];
ACC_L(i) = VR[vs][i] ^ VR[vt][(0xE & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}
static void VXOR7w(void)
@ -201,8 +190,8 @@ static void VXOR7w(void)
const int vt = inst.R.rt;
for (i = 0; i < N; i++)
VACC[i].s[LO] = VR[vs][i] ^ VR[vt][(0xF & 07) + (i & 0x0)];
ACC_L(i) = VR[vs][i] ^ VR[vt][(0xF & 07) + (i & 0x0)];
for (i = 0; i < N; i++)
VR[vd][i] = VACC[i].s[LO];
VR[vd][i] = ACC_L(i);
return;
}