vx2i, vbfy, vsgn

This commit is contained in:
Henrik Rydgard 2016-05-15 10:34:30 +02:00
parent 7046f960e5
commit 905af75925
4 changed files with 233 additions and 9 deletions

View File

@ -59,6 +59,10 @@ namespace MIPSComp {
}
}
static bool IsConsecutive2(const u8 regs[2]) {
return regs[1] == regs[0] + 1;
}
static bool IsConsecutive4(const u8 regs[4]) {
return regs[1] == regs[0] + 1 &&
regs[2] == regs[1] + 1 &&
@ -303,6 +307,12 @@ namespace MIPSComp {
}
break;
case 53: // lvl/lvr.q - highly unusual
case 61: // svl/svr.q - highly unusual
logBlocks = 1;
Comp_Generic(op);
break;
default:
DISABLE;
break;
@ -1348,7 +1358,101 @@ namespace MIPSComp {
}
void IRFrontend::Comp_Vx2i(MIPSOpcode op) {
DISABLE;
CONDITIONAL_DISABLE;
if (js.HasUnknownPrefix())
DISABLE;
int bits = ((op >> 16) & 2) == 0 ? 8 : 16; // vuc2i/vc2i (0/1), vus2i/vs2i (2/3)
bool unsignedOp = ((op >> 16) & 1) == 0; // vuc2i (0), vus2i (2)
// vs2i or vus2i unpack pairs of 16-bit integers into 32-bit integers, with the values
// at the top. vus2i shifts it an extra bit right afterward.
// vc2i and vuc2i unpack quads of 8-bit integers into 32-bit integers, with the values
// at the top too. vuc2i is a bit special (see below.)
// Let's do this similarly as h2f - we do a solution that works for both singles and pairs
// then use it for both.
VectorSize sz = GetVecSize(op);
VectorSize outsize;
if (bits == 8) {
outsize = V_Quad;
} else {
switch (sz) {
case V_Single:
outsize = V_Pair;
break;
case V_Pair:
outsize = V_Quad;
break;
default:
DISABLE;
}
}
u8 sregs[2], dregs[4], tempregs[4], srcregs[2];
GetVectorRegsPrefixS(sregs, sz, _VS);
GetVectorRegsPrefixD(dregs, outsize, _VD);
memcpy(tempregs, dregs, sizeof(dregs));
memcpy(srcregs, sregs, sizeof(sregs));
// Remap source regs to be consecutive. This is not required
// but helpful when implementations can join two Vec2Expand.
if (sz == V_Pair && !IsConsecutive2(srcregs)) {
for (int i = 0; i < 2; i++) {
srcregs[i] = IRVTEMP_0 + i;
ir.Write(IROp::FMov, srcregs[i], sregs[i]);
}
}
int nIn = GetNumVectorElements(sz);
int nOut = 2;
if (outsize == V_Quad)
nOut = 4;
// Remap dest regs. PFX_T is unused.
if (outsize == V_Pair) {
bool consecutive = IsConsecutive2(dregs);
for (int i = 0; i < 2; i++) {
if (!consecutive || !IsOverlapSafe(dregs[i], nIn, srcregs)) {
tempregs[i] = IRVTEMP_PFX_T + i;
}
}
} else if (outsize == V_Quad) {
bool consecutive = IsConsecutive4(dregs);
for (int i = 0; i < 4; i++) {
if (!consecutive || !IsOverlapSafe(dregs[i], nIn, srcregs)) {
tempregs[i] = IRVTEMP_PFX_T + i;
}
}
}
if (bits == 16) {
if (unsignedOp) {
ir.Write(IROp::Vec2Unpack16To31, tempregs[0], srcregs[0]);
if (outsize == V_Quad)
ir.Write(IROp::Vec2Unpack16To31, tempregs[2], srcregs[1]);
} else {
ir.Write(IROp::Vec2Unpack16To32, tempregs[0], srcregs[0]);
if (outsize == V_Quad)
ir.Write(IROp::Vec2Unpack16To32, tempregs[2], srcregs[1]);
}
} else if (bits == 8) {
if (unsignedOp) {
// See the interpreter, this one is odd. Hardware bug?
ir.Write(IROp::Vec4Unpack8To32, tempregs[0], srcregs[0]);
ir.Write(IROp::Vec4DuplicateUpperBitsAndShift1, tempregs[0], tempregs[0]);
} else {
ir.Write(IROp::Vec4Unpack8To32, tempregs[0], srcregs[0]);
}
}
for (int i = 0; i < nOut; i++) {
if (tempregs[i] != dregs[i]) {
ir.Write(IROp::FMov, dregs[i], tempregs[i]);
}
}
ApplyPrefixD(dregs, outsize);
}
void IRFrontend::Comp_VCrossQuat(MIPSOpcode op) {
@ -1537,8 +1641,6 @@ namespace MIPSComp {
int n = GetNumVectorElements(sz);
bool negSin = (imm & 0x10) ? true : false;
logBlocks = 1;
char d[4] = { '0', '0', '0', '0' };
if (((imm >> 2) & 3) == (imm & 3)) {
for (int i = 0; i < 4; i++)
@ -1578,7 +1680,33 @@ namespace MIPSComp {
// Vector extract sign
// d[N] = signum(s[N])
DISABLE;
VectorSize sz = GetVecSize(op);
int n = GetNumVectorElements(sz);
u8 sregs[4], dregs[4];
GetVectorRegsPrefixS(sregs, sz, _VS);
GetVectorRegsPrefixD(dregs, sz, _VD);
u8 tempregs[4];
for (int i = 0; i < n; ++i) {
if (!IsOverlapSafe(dregs[i], n, sregs)) {
tempregs[i] = IRTEMP_0 + i;
} else {
tempregs[i] = dregs[i];
}
}
for (int i = 0; i < n; ++i) {
ir.Write(IROp::FSign, tempregs[i], sregs[i]);
}
for (int i = 0; i < n; ++i) {
if (dregs[i] != tempregs[i]) {
ir.Write(IROp::FMov, dregs[i], tempregs[i]);
}
}
ApplyPrefixD(dregs, sz);
}
void IRFrontend::Comp_Vocp(MIPSOpcode op) {
@ -1629,6 +1757,54 @@ namespace MIPSComp {
}
void IRFrontend::Comp_Vbfy(MIPSOpcode op) {
DISABLE;
CONDITIONAL_DISABLE;
if (js.HasUnknownPrefix())
DISABLE;
VectorSize sz = GetVecSize(op);
int n = GetNumVectorElements(sz);
if (n != 2 && n != 4) {
// Bad instructions
DISABLE;
}
u8 sregs[4], dregs[4];
GetVectorRegsPrefixS(sregs, sz, _VS);
GetVectorRegsPrefixD(dregs, sz, _VD);
u8 tempregs[4];
for (int i = 0; i < n; ++i) {
if (!IsOverlapSafe(dregs[i], n, sregs)) {
tempregs[i] = IRVTEMP_0;
} else {
tempregs[i] = dregs[i];
}
}
int subop = (op >> 16) & 0x1F;
if (subop == 3) {
// vbfy2
ir.Write(IROp::FAdd, tempregs[0], sregs[0], sregs[2]);
ir.Write(IROp::FAdd, tempregs[1], sregs[1], sregs[3]);
ir.Write(IROp::FSub, tempregs[2], sregs[0], sregs[2]);
ir.Write(IROp::FSub, tempregs[3], sregs[1], sregs[3]);
} else if (subop == 2) {
// vbfy1
ir.Write(IROp::FAdd, tempregs[0], sregs[0], sregs[1]);
ir.Write(IROp::FSub, tempregs[1], sregs[0], sregs[1]);
if (n == 4) {
ir.Write(IROp::FAdd, tempregs[2], sregs[2], sregs[3]);
ir.Write(IROp::FSub, tempregs[3], sregs[2], sregs[3]);
}
} else {
DISABLE;
}
for (int i = 0; i < n; ++i) {
if (tempregs[i] != dregs[i])
dregs[i] = tempregs[i];
}
ApplyPrefixD(dregs, sz);
}
}

View File

@ -81,6 +81,7 @@ static const IRMeta irMeta[] = {
{ IROp::FRecip, "FRecip", "FF" },
{ IROp::FAsin, "FAsin", "FF" },
{ IROp::FNeg, "FNeg", "FF" },
{ IROp::FSign, "FSign", "FF" },
{ IROp::FAbs, "FAbs", "FF" },
{ IROp::FRound, "FRound", "FF" },
{ IROp::FTrunc, "FTrunc", "FF" },
@ -114,6 +115,12 @@ static const IRMeta irMeta[] = {
{ IROp::Vec4Neg, "Vec4Neg", "FF" },
{ IROp::Vec4Abs, "Vec4Abs", "FF" },
// Pack/Unpack
{ IROp::Vec2Unpack16To31, "Vec2Unpack16To31", "FF" }, // Note that the result is shifted down by 1, hence 31
{ IROp::Vec2Unpack16To32, "Vec2Unpack16To32", "FF" },
{ IROp::Vec4Unpack8To32, "Vec4Unpack8To32", "FF" },
{ IROp::Vec4DuplicateUpperBitsAndShift1, "Vec4DuplicateUpperBitsAndShift1", "FF" },
{ IROp::Interpret, "Interpret", "_C" },
{ IROp::Downcount, "Downcount", "_II" },
{ IROp::ExitToPC, "ExitToPC", "", IRFLAG_EXIT },

View File

@ -124,6 +124,7 @@ enum class IROp : u8 {
FSqrt,
FNeg,
FAbs,
FSign,
FRound,
FTrunc,
@ -174,10 +175,10 @@ enum class IROp : u8 {
Vec4Abs,
// vx2i
Vec4ExpandU16ToU32Hi,
Vec4ExpandU8ToU32Hi,
Vec4ExpandS16ToS32Hi,
Vec4ExpandS8ToS32Hi,
Vec2Unpack16To31, // Note that the result is shifted down by 1, hence 31
Vec2Unpack16To32,
Vec4Unpack8To32,
Vec4DuplicateUpperBitsAndShift1, // Bizarro vuc2i behaviour, in an instruction. Split?
// Slow special functions. Used on singles.
FSin,

View File

@ -226,6 +226,33 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c
mips->f[inst->dest + i] = fabsf(mips->f[inst->src1 + i]);
break;
case IROp::Vec2Unpack16To31:
mips->fi[inst->dest] = (mips->fi[inst->src1] << 16) >> 1;
mips->fi[inst->dest + 1] = (mips->fi[inst->src1] & 0xFFFF0000) >> 1;
break;
case IROp::Vec2Unpack16To32:
mips->fi[inst->dest] = (mips->fi[inst->src1] << 16);
mips->fi[inst->dest + 1] = (mips->fi[inst->src1] & 0xFFFF0000);
break;
case IROp::Vec4Unpack8To32:
mips->fi[inst->dest] = (mips->fi[inst->src1] << 24);
mips->fi[inst->dest + 1] = (mips->fi[inst->src1] << 16) & 0xFF000000;
mips->fi[inst->dest + 2] = (mips->fi[inst->src1] << 8) & 0xFF000000;
mips->fi[inst->dest + 3] = (mips->fi[inst->src1]) & 0xFF000000;
break;
case IROp::Vec4DuplicateUpperBitsAndShift1:
for (int i = 0; i < 4; i++) {
u32 val = mips->fi[inst->src1 + i];
val = val | (val >> 8);
val = val | (val >> 16);
val >>= 1;
mips->fi[inst->dest + i] = val;
}
break;
case IROp::FCmpVfpuBit:
{
int op = inst->dest & 0xF;
@ -519,6 +546,19 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c
mips->f[inst->dest] = clamp_value(mips->f[inst->src1], -1.0f, 1.0f);
break;
// Bitwise trickery
case IROp::FSign:
{
u32 val;
memcpy(&val, &mips->f[inst->src1], sizeof(u32));
if (val == 0 || val == 0x80000000)
mips->f[inst->dest] = 0.0f;
else if ((val >> 31) == 0)
mips->f[inst->dest] = 1.0f;
else
mips->f[inst->dest] = -1.0f;
}
case IROp::FpCondToReg:
mips->r[inst->dest] = mips->fpcond;
break;