interp: Handle prefixes for Vmmov/Vmmul/Vmscl.

I doubt any actual code uses this, but we have some tricky VFPU bugs left,
so just trying for maximum accuracy in the interpreter.
This commit is contained in:
Unknown W. Brackets 2019-03-10 16:39:08 -07:00
parent 26b1368f7b
commit d40ac043d4
2 changed files with 56 additions and 30 deletions

View File

@ -1043,8 +1043,11 @@ namespace MIPSComp {
void IRFrontend::Comp_Vmmov(MIPSOpcode op) {
CONDITIONAL_DISABLE(VFPU_MTX);
if (!js.HasNoPrefix()) {
DISABLE;
}
// Matrix move (no prefixes)
// Matrix move (weird prefixes)
// D[N,M] = S[N,M]
int vs = _VS;
@ -1100,9 +1103,13 @@ namespace MIPSComp {
void IRFrontend::Comp_Vmscl(MIPSOpcode op) {
CONDITIONAL_DISABLE(VFPU_MTX);
if (!js.HasNoPrefix()) {
DISABLE;
}
// Matrix scale, matrix by scalar (no prefixes)
// Matrix scale, matrix by scalar (weird prefixes)
// d[N,M] = s[N,M] * t[0]
// Note: behaves just slightly differently than a series of vscls.
int vs = _VS;
int vd = _VD;
@ -1216,7 +1223,7 @@ namespace MIPSComp {
DISABLE;
}
// Matrix multiply (wierd prefixes)
// Matrix multiply (weird prefixes)
// D[0 .. N, 0 .. M] = S[0 .. N, 0 .. M]' * T[0 .. N, 0 .. M]
// Note: Behaves as if it's implemented through a series of vdots.
// Important: this is a matrix multiply with a pre-transposed S.

View File

@ -419,11 +419,8 @@ namespace MIPSInt
}
// The test really needs some work.
void Int_Vmmul(MIPSOpcode op)
{
float s[16];
float t[16];
float d[16];
void Int_Vmmul(MIPSOpcode op) {
float s[16]{}, t[16]{}, d[16];
int vd = _VD;
int vs = _VS;
@ -434,29 +431,37 @@ namespace MIPSInt
ReadMatrix(s, sz, vs);
ReadMatrix(t, sz, vt);
for (int a = 0; a < n; a++)
{
for (int b = 0; b < n; b++)
{
for (int a = 0; a < n; a++) {
for (int b = 0; b < n; b++) {
float sum = 0.0f;
for (int c = 0; c < n; c++)
{
sum += s[b*4 + c] * t[a*4 + c];
if (a == n - 1 && b == n - 1) {
// S and T prefixes work on the final (or maybe first, in reverse?) dot.
ApplySwizzleS(&s[b * 4], V_Quad);
ApplySwizzleT(&t[a * 4], V_Quad);
for (int c = 0; c < 4; c++) {
sum += s[b * 4 + c] * t[a * 4 + c];
}
} else {
for (int c = 0; c < n; c++) {
sum += s[b * 4 + c] * t[a * 4 + c];
}
}
d[a*4 + b] = sum;
d[a * 4 + b] = sum;
}
}
// The D prefix applies ONLY to the final element, but sat does work.
u32 lastmask = (currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] & (1 << 8)) << (n - 1);
u32 lastsat = (currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] & 3) << (n + n - 2);
currentMIPS->vfpuCtrl[VFPU_CTRL_DPREFIX] = lastmask | lastsat;
ApplyPrefixD(&d[4 * (n - 1)], V_Quad, false);
WriteMatrix(d, sz, vd);
PC += 4;
EatPrefixes();
}
void Int_Vmscl(MIPSOpcode op)
{
float d[16];
float s[16];
float t[1];
void Int_Vmscl(MIPSOpcode op) {
float s[16]{}, t[4]{}, d[16];
int vd = _VD;
int vs = _VS;
@ -467,27 +472,41 @@ namespace MIPSInt
ReadMatrix(s, sz, vs);
ReadVector(t, V_Single, vt);
for (int a = 0; a < n; a++)
{
for (int b = 0; b < n; b++)
{
d[a*4 + b] = s[a*4 + b] * t[0];
for (int a = 0; a < n - 1; a++) {
for (int b = 0; b < n; b++) {
d[a * 4 + b] = s[a * 4 + b] * t[0];
}
}
// S prefix applies to the last row.
ApplySwizzleS(&s[(n - 1) * 4], V_Quad);
// T prefix applies only for the last row, and is used per element.
// This is like vscl, but instead of zzzz it uses xxxx.
u32 tprefixRemove = VFPU_ANY_SWIZZLE();
u32 tprefixAdd = VFPU_SWIZZLE(0, 0, 0, 0);
ApplyPrefixST(t, VFPURewritePrefix(VFPU_CTRL_TPREFIX, tprefixRemove, tprefixAdd), V_Quad);
for (int b = 0; b < n; b++) {
d[(n - 1) * 4 + b] = s[(n - 1) * 4 + b] * t[b];
}
// The D prefix is applied to the last row.
ApplyPrefixD(&d[(n - 1) * 4], V_Quad);
WriteMatrix(d, sz, vd);
PC += 4;
EatPrefixes();
}
void Int_Vmmov(MIPSOpcode op)
{
float s[16];
void Int_Vmmov(MIPSOpcode op) {
float s[16]{};
int vd = _VD;
int vs = _VS;
MatrixSize sz = GetMtxSize(op);
ReadMatrix(s, sz, vs);
// This is just for matrices. No prefixes.
// S and D prefixes are applied to the last row.
int off = GetMatrixSide(sz) - 1;
ApplySwizzleS(&s[off * 4], V_Quad);
ApplyPrefixD(&s[off * 4], V_Quad);
WriteMatrix(s, sz, vd);
PC += 4;
EatPrefixes();