Prefix prep

This commit is contained in:
Henrik Rydgard 2016-05-11 00:16:07 +02:00
parent b3dd36982f
commit 219548b8e2
6 changed files with 96 additions and 58 deletions

View File

@ -88,7 +88,7 @@ namespace MIPSComp {
}
}
void IRFrontend::ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz) {
void IRFrontend::ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz, int tempReg) {
if (prefix == 0xE4)
return;
@ -109,13 +109,9 @@ namespace MIPSComp {
if (!constants && regnum == i && !abs && !negate)
continue;
/*
// This puts the value into a temp reg, so we won't write the modified value back.
vregs[i] = fpr.GetTempV();
vregs[i] = tempReg + i;
if (!constants) {
fpr.MapDirtyInV(vregs[i], origV[regnum]);
fpr.SpillLockV(vregs[i]);
// Prefix may say "z, z, z, z" but if this is a pair, we force to x.
// TODO: But some ops seem to use const 0 instead?
if (regnum >= n) {
@ -124,36 +120,58 @@ namespace MIPSComp {
}
if (abs) {
fp.FABS(fpr.V(vregs[i]), fpr.V(origV[regnum]));
ir.Write(IROp::FAbs, vregs[i], origV[regnum]);
if (negate)
fp.FNEG(fpr.V(vregs[i]), fpr.V(vregs[i]));
ir.Write(IROp::FNeg, vregs[i], vregs[i]);
} else {
if (negate)
fp.FNEG(fpr.V(vregs[i]), fpr.V(origV[regnum]));
ir.Write(IROp::FNeg, vregs[i], origV[regnum]);
else
fp.FMOV(fpr.V(vregs[i]), fpr.V(origV[regnum]));
ir.Write(IROp::FMov, vregs[i], origV[regnum]);
}
} else {
fpr.MapRegV(vregs[i], MAP_DIRTY | MAP_NOINIT);
fpr.SpillLockV(vregs[i]);
fp.MOVI2F(fpr.V(vregs[i]), constantArray[regnum + (abs << 2)], SCRATCH1, (bool)negate);
if (negate) {
ir.Write(IROp::SetConstF, vregs[i], ir.AddConstantFloat(-constantArray[regnum + (abs << 2)]));
} else {
ir.Write(IROp::SetConstF, vregs[i], ir.AddConstantFloat(constantArray[regnum + (abs << 2)]));
}
}
*/
}
}
void IRFrontend::GetVectorRegs(u8 regs[4], VectorSize N, int vectorReg) {
::GetVectorRegs(regs, N, vectorReg);
ApplyVoffset(regs, N);
}
void IRFrontend::GetMatrixRegs(u8 regs[16], MatrixSize N, int matrixReg) {
::GetMatrixRegs(regs, N, matrixReg);
// TODO
}
void IRFrontend::GetVectorRegsPrefixS(u8 *regs, VectorSize sz, int vectorReg) {
_assert_(js.prefixSFlag & JitState::PREFIX_KNOWN);
::GetVectorRegs(regs, sz, vectorReg);
ApplyPrefixST(regs, js.prefixS, sz, IRVTEMP_PFX_S);
}
void IRFrontend::GetVectorRegsPrefixT(u8 *regs, VectorSize sz, int vectorReg) {
_assert_(js.prefixTFlag & JitState::PREFIX_KNOWN);
::GetVectorRegs(regs, sz, vectorReg);
ApplyPrefixST(regs, js.prefixT, sz, IRVTEMP_PFX_T);
}
void IRFrontend::GetVectorRegsPrefixD(u8 *regs, VectorSize sz, int vectorReg) {
_assert_(js.prefixDFlag & JitState::PREFIX_KNOWN);
GetVectorRegs(regs, sz, vectorReg);
int n = GetNumVectorElements(sz);
if (js.prefixD == 0)
return;
int n = GetNumVectorElements(sz);
for (int i = 0; i < n; i++) {
// Hopefully this is rare, we'll just write it into a reg we drop.
// Hopefully this is rare, we'll just write it into a dumping ground reg.
if (js.VfpuWriteMask(i))
regs[i] = fpr.GetTempV();
regs[i] = IRVTEMP_PFX_D + i;
}
}
@ -171,13 +189,12 @@ namespace MIPSComp {
for (int i = 0; i < n; i++) {
if (js.VfpuWriteMask(i))
continue;
int sat = (js.prefixD >> (i * 2)) & 3;
int sat = GetDSat(js.prefixD, i);
if (sat == 1) {
// clamped = x < 0 ? (x > 1 ? 1 : x) : x [0, 1]
ir.Write(IROp::FSat0_1, vfpuBase + voffset[vregs[i]], vfpuBase + voffset[vregs[i]]);
ir.Write(IROp::FSat0_1, vregs[i], vregs[i]);
} else if (sat == 3) {
ir.Write(IROp::FSatMinus1_1, vfpuBase + voffset[vregs[i]], vfpuBase + voffset[vregs[i]]);
ir.Write(IROp::FSatMinus1_1, vregs[i], vregs[i]);
}
}
}
@ -207,7 +224,6 @@ namespace MIPSComp {
u8 vregs[4];
GetVectorRegs(vregs, V_Quad, vt);
ApplyVoffset(vregs, 4); // Translate to memory order
switch (op >> 26) {
case 54: //lv.q
@ -251,9 +267,11 @@ namespace MIPSComp {
if (sz == 4 && IsVectorColumn(vd)) {
u8 dregs[4];
GetVectorRegs(dregs, sz, vd);
ir.Write(IROp::InitVec4, vfpuBase + voffset[dregs[0]], (int)(type == 6 ? Vec4Init::AllZERO : Vec4Init::AllONE));
ir.Write(IROp::InitVec4, dregs[0], (int)(type == 6 ? Vec4Init::AllZERO : Vec4Init::AllONE));
} else if (sz == 1) {
ir.Write(IROp::SetConstF, vfpuBase + voffset[vd], ir.AddConstantFloat(type == 6 ? 0.0f : 1.0f));
u8 dreg;
GetVectorRegs(&dreg, V_Single, vd);
ir.Write(IROp::SetConstF, dreg, ir.AddConstantFloat(type == 6 ? 0.0f : 1.0f));
} else {
DISABLE;
}
@ -275,7 +293,7 @@ namespace MIPSComp {
GetVectorRegs(dregs, sz, vd);
int row = vd & 3;
Vec4Init init = Vec4Init((int)Vec4Init::Set_1000 + row);
ir.Write(IROp::InitVec4, vfpuBase + voffset[dregs[0]], (int)init);
ir.Write(IROp::InitVec4, dregs[0], (int)init);
}
void IRFrontend::Comp_VMatrixInit(MIPSOpcode op) {
@ -311,7 +329,7 @@ namespace MIPSComp {
default:
return;
}
ir.Write(IROp::InitVec4, vfpuBase + voffset[vec[0]], (int)init);
ir.Write(IROp::InitVec4, vec[0], (int)init);
}
return;
}
@ -440,12 +458,14 @@ namespace MIPSComp {
}
void IRFrontend::Comp_Viim(MIPSOpcode op) {
if (!js.HasNoPrefix())
if (!js.HasUnknownPrefix())
DISABLE;
u8 dreg = _VT;
s32 imm = (s32)(s16)(u16)(op & 0xFFFF);
ir.Write(IROp::SetConstF, vfpuBase + voffset[dreg], ir.AddConstantFloat((float)imm));
u8 dreg;
GetVectorRegsPrefixD(&dreg, V_Single, _VT);
ir.Write(IROp::SetConstF, dreg, ir.AddConstantFloat((float)imm));
ApplyPrefixD(&dreg, V_Single);
}
void IRFrontend::Comp_Vfim(MIPSOpcode op) {

View File

@ -115,19 +115,13 @@ private:
void CompShiftImm(MIPSOpcode op, IROp shiftType, int sa);
void CompShiftVar(MIPSOpcode op, IROp shiftType, IROp shiftTypeConst);
void ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz);
void ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz, int tempReg);
void ApplyPrefixD(const u8 *vregs, VectorSize sz);
void GetVectorRegsPrefixS(u8 *regs, VectorSize sz, int vectorReg) {
_assert_(js.prefixSFlag & JitState::PREFIX_KNOWN);
GetVectorRegs(regs, sz, vectorReg);
ApplyPrefixST(regs, js.prefixS, sz);
}
void GetVectorRegsPrefixT(u8 *regs, VectorSize sz, int vectorReg) {
_assert_(js.prefixTFlag & JitState::PREFIX_KNOWN);
GetVectorRegs(regs, sz, vectorReg);
ApplyPrefixST(regs, js.prefixT, sz);
}
void GetVectorRegsPrefixS(u8 *regs, VectorSize sz, int vectorReg);
void GetVectorRegsPrefixT(u8 *regs, VectorSize sz, int vectorReg);
void GetVectorRegsPrefixD(u8 *regs, VectorSize sz, int vectorReg);
void GetVectorRegs(u8 regs[4], VectorSize N, int vectorReg);
void GetMatrixRegs(u8 regs[16], MatrixSize N, int matrixReg);
// Utils
void Comp_ITypeMemLR(MIPSOpcode op, bool load);

View File

@ -70,6 +70,12 @@ static const IRMeta irMeta[] = {
{ IROp::FDiv, "FDiv", "FFF" },
{ IROp::FMov, "FMov", "FF" },
{ IROp::FSqrt, "FSqrt", "FF" },
{ IROp::FSin, "FSin", "FF" },
{ IROp::FCos, "FCos", "FF" },
{ IROp::FSqrt, "FSqrt", "FF" },
{ IROp::FRSqrt, "FRSqrt", "FF" },
{ IROp::FRecip, "FRecip", "FF" },
{ IROp::FAsin, "FAsin", "FF" },
{ IROp::FNeg, "FNeg", "FF" },
{ IROp::FAbs, "FAbs", "FF" },
{ IROp::FRound, "FRound", "FF" },
@ -82,17 +88,12 @@ static const IRMeta irMeta[] = {
{ IROp::FSatMinus1_1, "FSat(-1 - 1)", "FF" },
{ IROp::FMovFromGPR, "FMovFromGPR", "FG" },
{ IROp::FMovToGPR, "FMovToGPR", "GF" },
{ IROp::InitVec4, "InitVec4", "Fv"},
{ IROp::FpCondToReg, "FpCondToReg", "G" },
{ IROp::VfpuCtrlToReg, "VfpuCtrlToReg", "GI" },
{ IROp::SetCtrlVFPU, "SetCtrlVFPU", "TC" },
{ IROp::FSin, "FSin", "FF" },
{ IROp::FCos, "FCos", "FF" },
{ IROp::FSqrt, "FSqrt", "FF" },
{ IROp::FRSqrt, "FRSqrt", "FF" },
{ IROp::FRecip, "FRecip", "FF" },
{ IROp::FAsin, "FAsin", "FF" },
{ IROp::InitVec4, "InitVec4", "Fv" },
{ IROp::ShuffleVec4, "ShuffleVec4", "FFs" },
{ IROp::Interpret, "Interpret", "_C" },
{ IROp::Downcount, "Downcount", "_II" },
@ -192,6 +193,7 @@ void DisassembleParam(char *buf, int bufSize, u8 param, char type, const u32 *co
"[0 0 1 0]",
"[0 0 0 1]",
};
static const char *xyzw = "xyzw";
switch (type) {
case 'G':
@ -216,6 +218,9 @@ void DisassembleParam(char *buf, int bufSize, u8 param, char type, const u32 *co
case 'v':
snprintf(buf, bufSize, "%s", initVec4Names[param]);
break;
case 's':
snprintf(buf, bufSize, "%s%s%s%s", xyzw[param & 3], xyzw[(param >> 2) & 3], xyzw[(param >> 4) & 3], xyzw[(param >> 6) & 3]);
break;
case '_':
case '\0':
buf[0] = 0;

View File

@ -142,7 +142,11 @@ enum class IROp : u8 {
SetCtrlVFPU,
// 4-wide instructions to assist SIMD.
// Can of course add a pass to break them up if a target does not
// support SIMD.
InitVec4,
ShuffleVec4,
// Slow special functions. Used on singles.
FSin,
@ -232,16 +236,21 @@ enum {
IRTEMP_LHS, // Reserved for use in branches
IRTEMP_RHS, // Reserved for use in branches
IRVTEMP_PFX_S = 224 - 32, // Relative to the FP regs
IRVTEMP_PFX_T = 228 - 32,
IRVTEMP_PFX_D = 232 - 32,
IRVTEMP_0 = 236 - 32,
// 16 float temps for vector S and T prefixes and things like that.
// IRVTEMP_0 = 208 - 64, // -64 to be relative to v[0]
// Hacky way to get to other state
IRREG_VFPU_CTRL_BASE = 208,
IRREG_VFPU_CC = 211,
IRREG_LO = 226, // offset of lo in MIPSState / 4
IRREG_HI = 227,
IRREG_FCR31 = 228,
IRREG_FPCOND = 229,
IRREG_LO = 242, // offset of lo in MIPSState / 4
IRREG_HI = 243,
IRREG_FCR31 = 244,
IRREG_FPCOND = 245,
};
struct IRMeta {

View File

@ -144,6 +144,15 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c
#endif
break;
case IROp::ShuffleVec4:
{
// Can't use the SSE shuffle here because it takes an immediate.
// Backends with SSE support could use that though.
for (int i = 0; i < 4; i++)
mips->f[inst->dest + i] = mips->f[inst->src1 + ((inst->src2 >> (i * 2)) & 3)];
break;
}
case IROp::FSin:
mips->f[inst->dest] = vfpu_sin(mips->f[inst->src1]);
break;

View File

@ -172,23 +172,24 @@ public:
// However, the IR interpreter needs some temps that can stick around between ops.
// Can be indexed through r[] using indices 192+.
u32 t[16]; //192
// float vt[16]; //208 TODO: VFPU temp
// If vfpuCtrl (prefixes) get mysterious values, check the VFPU regcache code.
u32 vfpuCtrl[16]; // 208
float vt[16]; //224 TODO: VFPU temp
// ARM64 wants lo/hi to be aligned to 64 bits from the base of this struct.
u32 padLoHi; // 224
u32 padLoHi; // 240
union {
struct {
u32 pc; //225
u32 pc; //241
u32 lo; //226
u32 hi; //227
u32 lo; //242
u32 hi; //243
u32 fcr31; //fpu control register
u32 fpcond; // cache the cond flag of fcr31 (& 1 << 23)
u32 fcr31; //244 fpu control register
u32 fpcond; //245 cache the cond flag of fcr31 (& 1 << 23)
};
u32 other[6];
};