mirror of
https://github.com/hrydgard/ppsspp.git
synced 2024-11-24 05:49:58 +00:00
vx2i, vbfy, vsgn
This commit is contained in:
parent
7046f960e5
commit
905af75925
@ -59,6 +59,10 @@ namespace MIPSComp {
|
||||
}
|
||||
}
|
||||
|
||||
static bool IsConsecutive2(const u8 regs[2]) {
|
||||
return regs[1] == regs[0] + 1;
|
||||
}
|
||||
|
||||
static bool IsConsecutive4(const u8 regs[4]) {
|
||||
return regs[1] == regs[0] + 1 &&
|
||||
regs[2] == regs[1] + 1 &&
|
||||
@ -303,6 +307,12 @@ namespace MIPSComp {
|
||||
}
|
||||
break;
|
||||
|
||||
case 53: // lvl/lvr.q - highly unusual
|
||||
case 61: // svl/svr.q - highly unusual
|
||||
logBlocks = 1;
|
||||
Comp_Generic(op);
|
||||
break;
|
||||
|
||||
default:
|
||||
DISABLE;
|
||||
break;
|
||||
@ -1348,7 +1358,101 @@ namespace MIPSComp {
|
||||
}
|
||||
|
||||
void IRFrontend::Comp_Vx2i(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
CONDITIONAL_DISABLE;
|
||||
|
||||
if (js.HasUnknownPrefix())
|
||||
DISABLE;
|
||||
|
||||
int bits = ((op >> 16) & 2) == 0 ? 8 : 16; // vuc2i/vc2i (0/1), vus2i/vs2i (2/3)
|
||||
bool unsignedOp = ((op >> 16) & 1) == 0; // vuc2i (0), vus2i (2)
|
||||
|
||||
// vs2i or vus2i unpack pairs of 16-bit integers into 32-bit integers, with the values
|
||||
// at the top. vus2i shifts it an extra bit right afterward.
|
||||
// vc2i and vuc2i unpack quads of 8-bit integers into 32-bit integers, with the values
|
||||
// at the top too. vuc2i is a bit special (see below.)
|
||||
// Let's do this similarly as h2f - we do a solution that works for both singles and pairs
|
||||
// then use it for both.
|
||||
|
||||
VectorSize sz = GetVecSize(op);
|
||||
VectorSize outsize;
|
||||
if (bits == 8) {
|
||||
outsize = V_Quad;
|
||||
} else {
|
||||
switch (sz) {
|
||||
case V_Single:
|
||||
outsize = V_Pair;
|
||||
break;
|
||||
case V_Pair:
|
||||
outsize = V_Quad;
|
||||
break;
|
||||
default:
|
||||
DISABLE;
|
||||
}
|
||||
}
|
||||
|
||||
u8 sregs[2], dregs[4], tempregs[4], srcregs[2];
|
||||
GetVectorRegsPrefixS(sregs, sz, _VS);
|
||||
GetVectorRegsPrefixD(dregs, outsize, _VD);
|
||||
memcpy(tempregs, dregs, sizeof(dregs));
|
||||
memcpy(srcregs, sregs, sizeof(sregs));
|
||||
|
||||
// Remap source regs to be consecutive. This is not required
|
||||
// but helpful when implementations can join two Vec2Expand.
|
||||
if (sz == V_Pair && !IsConsecutive2(srcregs)) {
|
||||
for (int i = 0; i < 2; i++) {
|
||||
srcregs[i] = IRVTEMP_0 + i;
|
||||
ir.Write(IROp::FMov, srcregs[i], sregs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
int nIn = GetNumVectorElements(sz);
|
||||
|
||||
int nOut = 2;
|
||||
if (outsize == V_Quad)
|
||||
nOut = 4;
|
||||
// Remap dest regs. PFX_T is unused.
|
||||
if (outsize == V_Pair) {
|
||||
bool consecutive = IsConsecutive2(dregs);
|
||||
for (int i = 0; i < 2; i++) {
|
||||
if (!consecutive || !IsOverlapSafe(dregs[i], nIn, srcregs)) {
|
||||
tempregs[i] = IRVTEMP_PFX_T + i;
|
||||
}
|
||||
}
|
||||
} else if (outsize == V_Quad) {
|
||||
bool consecutive = IsConsecutive4(dregs);
|
||||
for (int i = 0; i < 4; i++) {
|
||||
if (!consecutive || !IsOverlapSafe(dregs[i], nIn, srcregs)) {
|
||||
tempregs[i] = IRVTEMP_PFX_T + i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (bits == 16) {
|
||||
if (unsignedOp) {
|
||||
ir.Write(IROp::Vec2Unpack16To31, tempregs[0], srcregs[0]);
|
||||
if (outsize == V_Quad)
|
||||
ir.Write(IROp::Vec2Unpack16To31, tempregs[2], srcregs[1]);
|
||||
} else {
|
||||
ir.Write(IROp::Vec2Unpack16To32, tempregs[0], srcregs[0]);
|
||||
if (outsize == V_Quad)
|
||||
ir.Write(IROp::Vec2Unpack16To32, tempregs[2], srcregs[1]);
|
||||
}
|
||||
} else if (bits == 8) {
|
||||
if (unsignedOp) {
|
||||
// See the interpreter, this one is odd. Hardware bug?
|
||||
ir.Write(IROp::Vec4Unpack8To32, tempregs[0], srcregs[0]);
|
||||
ir.Write(IROp::Vec4DuplicateUpperBitsAndShift1, tempregs[0], tempregs[0]);
|
||||
} else {
|
||||
ir.Write(IROp::Vec4Unpack8To32, tempregs[0], srcregs[0]);
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < nOut; i++) {
|
||||
if (tempregs[i] != dregs[i]) {
|
||||
ir.Write(IROp::FMov, dregs[i], tempregs[i]);
|
||||
}
|
||||
}
|
||||
ApplyPrefixD(dregs, outsize);
|
||||
}
|
||||
|
||||
void IRFrontend::Comp_VCrossQuat(MIPSOpcode op) {
|
||||
@ -1537,8 +1641,6 @@ namespace MIPSComp {
|
||||
int n = GetNumVectorElements(sz);
|
||||
bool negSin = (imm & 0x10) ? true : false;
|
||||
|
||||
logBlocks = 1;
|
||||
|
||||
char d[4] = { '0', '0', '0', '0' };
|
||||
if (((imm >> 2) & 3) == (imm & 3)) {
|
||||
for (int i = 0; i < 4; i++)
|
||||
@ -1578,7 +1680,33 @@ namespace MIPSComp {
|
||||
// Vector extract sign
|
||||
// d[N] = signum(s[N])
|
||||
|
||||
DISABLE;
|
||||
VectorSize sz = GetVecSize(op);
|
||||
int n = GetNumVectorElements(sz);
|
||||
|
||||
u8 sregs[4], dregs[4];
|
||||
GetVectorRegsPrefixS(sregs, sz, _VS);
|
||||
GetVectorRegsPrefixD(dregs, sz, _VD);
|
||||
|
||||
u8 tempregs[4];
|
||||
for (int i = 0; i < n; ++i) {
|
||||
if (!IsOverlapSafe(dregs[i], n, sregs)) {
|
||||
tempregs[i] = IRTEMP_0 + i;
|
||||
} else {
|
||||
tempregs[i] = dregs[i];
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < n; ++i) {
|
||||
ir.Write(IROp::FSign, tempregs[i], sregs[i]);
|
||||
}
|
||||
|
||||
for (int i = 0; i < n; ++i) {
|
||||
if (dregs[i] != tempregs[i]) {
|
||||
ir.Write(IROp::FMov, dregs[i], tempregs[i]);
|
||||
}
|
||||
}
|
||||
|
||||
ApplyPrefixD(dregs, sz);
|
||||
}
|
||||
|
||||
void IRFrontend::Comp_Vocp(MIPSOpcode op) {
|
||||
@ -1629,6 +1757,54 @@ namespace MIPSComp {
|
||||
}
|
||||
|
||||
void IRFrontend::Comp_Vbfy(MIPSOpcode op) {
|
||||
DISABLE;
|
||||
CONDITIONAL_DISABLE;
|
||||
if (js.HasUnknownPrefix())
|
||||
DISABLE;
|
||||
|
||||
VectorSize sz = GetVecSize(op);
|
||||
int n = GetNumVectorElements(sz);
|
||||
if (n != 2 && n != 4) {
|
||||
// Bad instructions
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
u8 sregs[4], dregs[4];
|
||||
GetVectorRegsPrefixS(sregs, sz, _VS);
|
||||
GetVectorRegsPrefixD(dregs, sz, _VD);
|
||||
|
||||
u8 tempregs[4];
|
||||
for (int i = 0; i < n; ++i) {
|
||||
if (!IsOverlapSafe(dregs[i], n, sregs)) {
|
||||
tempregs[i] = IRVTEMP_0;
|
||||
} else {
|
||||
tempregs[i] = dregs[i];
|
||||
}
|
||||
}
|
||||
|
||||
int subop = (op >> 16) & 0x1F;
|
||||
if (subop == 3) {
|
||||
// vbfy2
|
||||
ir.Write(IROp::FAdd, tempregs[0], sregs[0], sregs[2]);
|
||||
ir.Write(IROp::FAdd, tempregs[1], sregs[1], sregs[3]);
|
||||
ir.Write(IROp::FSub, tempregs[2], sregs[0], sregs[2]);
|
||||
ir.Write(IROp::FSub, tempregs[3], sregs[1], sregs[3]);
|
||||
} else if (subop == 2) {
|
||||
// vbfy1
|
||||
ir.Write(IROp::FAdd, tempregs[0], sregs[0], sregs[1]);
|
||||
ir.Write(IROp::FSub, tempregs[1], sregs[0], sregs[1]);
|
||||
if (n == 4) {
|
||||
ir.Write(IROp::FAdd, tempregs[2], sregs[2], sregs[3]);
|
||||
ir.Write(IROp::FSub, tempregs[3], sregs[2], sregs[3]);
|
||||
}
|
||||
} else {
|
||||
DISABLE;
|
||||
}
|
||||
|
||||
for (int i = 0; i < n; ++i) {
|
||||
if (tempregs[i] != dregs[i])
|
||||
dregs[i] = tempregs[i];
|
||||
}
|
||||
|
||||
ApplyPrefixD(dregs, sz);
|
||||
}
|
||||
}
|
||||
|
@ -81,6 +81,7 @@ static const IRMeta irMeta[] = {
|
||||
{ IROp::FRecip, "FRecip", "FF" },
|
||||
{ IROp::FAsin, "FAsin", "FF" },
|
||||
{ IROp::FNeg, "FNeg", "FF" },
|
||||
{ IROp::FSign, "FSign", "FF" },
|
||||
{ IROp::FAbs, "FAbs", "FF" },
|
||||
{ IROp::FRound, "FRound", "FF" },
|
||||
{ IROp::FTrunc, "FTrunc", "FF" },
|
||||
@ -114,6 +115,12 @@ static const IRMeta irMeta[] = {
|
||||
{ IROp::Vec4Neg, "Vec4Neg", "FF" },
|
||||
{ IROp::Vec4Abs, "Vec4Abs", "FF" },
|
||||
|
||||
// Pack/Unpack
|
||||
{ IROp::Vec2Unpack16To31, "Vec2Unpack16To31", "FF" }, // Note that the result is shifted down by 1, hence 31
|
||||
{ IROp::Vec2Unpack16To32, "Vec2Unpack16To32", "FF" },
|
||||
{ IROp::Vec4Unpack8To32, "Vec4Unpack8To32", "FF" },
|
||||
{ IROp::Vec4DuplicateUpperBitsAndShift1, "Vec4DuplicateUpperBitsAndShift1", "FF" },
|
||||
|
||||
{ IROp::Interpret, "Interpret", "_C" },
|
||||
{ IROp::Downcount, "Downcount", "_II" },
|
||||
{ IROp::ExitToPC, "ExitToPC", "", IRFLAG_EXIT },
|
||||
|
@ -124,6 +124,7 @@ enum class IROp : u8 {
|
||||
FSqrt,
|
||||
FNeg,
|
||||
FAbs,
|
||||
FSign,
|
||||
|
||||
FRound,
|
||||
FTrunc,
|
||||
@ -174,10 +175,10 @@ enum class IROp : u8 {
|
||||
Vec4Abs,
|
||||
|
||||
// vx2i
|
||||
Vec4ExpandU16ToU32Hi,
|
||||
Vec4ExpandU8ToU32Hi,
|
||||
Vec4ExpandS16ToS32Hi,
|
||||
Vec4ExpandS8ToS32Hi,
|
||||
Vec2Unpack16To31, // Note that the result is shifted down by 1, hence 31
|
||||
Vec2Unpack16To32,
|
||||
Vec4Unpack8To32,
|
||||
Vec4DuplicateUpperBitsAndShift1, // Bizarro vuc2i behaviour, in an instruction. Split?
|
||||
|
||||
// Slow special functions. Used on singles.
|
||||
FSin,
|
||||
|
@ -226,6 +226,33 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c
|
||||
mips->f[inst->dest + i] = fabsf(mips->f[inst->src1 + i]);
|
||||
break;
|
||||
|
||||
case IROp::Vec2Unpack16To31:
|
||||
mips->fi[inst->dest] = (mips->fi[inst->src1] << 16) >> 1;
|
||||
mips->fi[inst->dest + 1] = (mips->fi[inst->src1] & 0xFFFF0000) >> 1;
|
||||
break;
|
||||
|
||||
case IROp::Vec2Unpack16To32:
|
||||
mips->fi[inst->dest] = (mips->fi[inst->src1] << 16);
|
||||
mips->fi[inst->dest + 1] = (mips->fi[inst->src1] & 0xFFFF0000);
|
||||
break;
|
||||
|
||||
case IROp::Vec4Unpack8To32:
|
||||
mips->fi[inst->dest] = (mips->fi[inst->src1] << 24);
|
||||
mips->fi[inst->dest + 1] = (mips->fi[inst->src1] << 16) & 0xFF000000;
|
||||
mips->fi[inst->dest + 2] = (mips->fi[inst->src1] << 8) & 0xFF000000;
|
||||
mips->fi[inst->dest + 3] = (mips->fi[inst->src1]) & 0xFF000000;
|
||||
break;
|
||||
|
||||
case IROp::Vec4DuplicateUpperBitsAndShift1:
|
||||
for (int i = 0; i < 4; i++) {
|
||||
u32 val = mips->fi[inst->src1 + i];
|
||||
val = val | (val >> 8);
|
||||
val = val | (val >> 16);
|
||||
val >>= 1;
|
||||
mips->fi[inst->dest + i] = val;
|
||||
}
|
||||
break;
|
||||
|
||||
case IROp::FCmpVfpuBit:
|
||||
{
|
||||
int op = inst->dest & 0xF;
|
||||
@ -519,6 +546,19 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c
|
||||
mips->f[inst->dest] = clamp_value(mips->f[inst->src1], -1.0f, 1.0f);
|
||||
break;
|
||||
|
||||
// Bitwise trickery
|
||||
case IROp::FSign:
|
||||
{
|
||||
u32 val;
|
||||
memcpy(&val, &mips->f[inst->src1], sizeof(u32));
|
||||
if (val == 0 || val == 0x80000000)
|
||||
mips->f[inst->dest] = 0.0f;
|
||||
else if ((val >> 31) == 0)
|
||||
mips->f[inst->dest] = 1.0f;
|
||||
else
|
||||
mips->f[inst->dest] = -1.0f;
|
||||
}
|
||||
|
||||
case IROp::FpCondToReg:
|
||||
mips->r[inst->dest] = mips->fpcond;
|
||||
break;
|
||||
|
Loading…
Reference in New Issue
Block a user