mirror of
https://github.com/libretro/ppsspp.git
synced 2024-11-24 08:39:51 +00:00
Prefix prep
This commit is contained in:
parent
b3dd36982f
commit
219548b8e2
@ -88,7 +88,7 @@ namespace MIPSComp {
|
||||
}
|
||||
}
|
||||
|
||||
void IRFrontend::ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz) {
|
||||
void IRFrontend::ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz, int tempReg) {
|
||||
if (prefix == 0xE4)
|
||||
return;
|
||||
|
||||
@ -109,13 +109,9 @@ namespace MIPSComp {
|
||||
if (!constants && regnum == i && !abs && !negate)
|
||||
continue;
|
||||
|
||||
/*
|
||||
// This puts the value into a temp reg, so we won't write the modified value back.
|
||||
vregs[i] = fpr.GetTempV();
|
||||
vregs[i] = tempReg + i;
|
||||
if (!constants) {
|
||||
fpr.MapDirtyInV(vregs[i], origV[regnum]);
|
||||
fpr.SpillLockV(vregs[i]);
|
||||
|
||||
// Prefix may say "z, z, z, z" but if this is a pair, we force to x.
|
||||
// TODO: But some ops seem to use const 0 instead?
|
||||
if (regnum >= n) {
|
||||
@ -124,36 +120,58 @@ namespace MIPSComp {
|
||||
}
|
||||
|
||||
if (abs) {
|
||||
fp.FABS(fpr.V(vregs[i]), fpr.V(origV[regnum]));
|
||||
ir.Write(IROp::FAbs, vregs[i], origV[regnum]);
|
||||
if (negate)
|
||||
fp.FNEG(fpr.V(vregs[i]), fpr.V(vregs[i]));
|
||||
ir.Write(IROp::FNeg, vregs[i], vregs[i]);
|
||||
} else {
|
||||
if (negate)
|
||||
fp.FNEG(fpr.V(vregs[i]), fpr.V(origV[regnum]));
|
||||
ir.Write(IROp::FNeg, vregs[i], origV[regnum]);
|
||||
else
|
||||
fp.FMOV(fpr.V(vregs[i]), fpr.V(origV[regnum]));
|
||||
ir.Write(IROp::FMov, vregs[i], origV[regnum]);
|
||||
}
|
||||
} else {
|
||||
fpr.MapRegV(vregs[i], MAP_DIRTY | MAP_NOINIT);
|
||||
fpr.SpillLockV(vregs[i]);
|
||||
fp.MOVI2F(fpr.V(vregs[i]), constantArray[regnum + (abs << 2)], SCRATCH1, (bool)negate);
|
||||
if (negate) {
|
||||
ir.Write(IROp::SetConstF, vregs[i], ir.AddConstantFloat(-constantArray[regnum + (abs << 2)]));
|
||||
} else {
|
||||
ir.Write(IROp::SetConstF, vregs[i], ir.AddConstantFloat(constantArray[regnum + (abs << 2)]));
|
||||
}
|
||||
}
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
||||
void IRFrontend::GetVectorRegs(u8 regs[4], VectorSize N, int vectorReg) {
|
||||
::GetVectorRegs(regs, N, vectorReg);
|
||||
ApplyVoffset(regs, N);
|
||||
}
|
||||
|
||||
void IRFrontend::GetMatrixRegs(u8 regs[16], MatrixSize N, int matrixReg) {
|
||||
::GetMatrixRegs(regs, N, matrixReg);
|
||||
// TODO
|
||||
}
|
||||
|
||||
void IRFrontend::GetVectorRegsPrefixS(u8 *regs, VectorSize sz, int vectorReg) {
|
||||
_assert_(js.prefixSFlag & JitState::PREFIX_KNOWN);
|
||||
::GetVectorRegs(regs, sz, vectorReg);
|
||||
ApplyPrefixST(regs, js.prefixS, sz, IRVTEMP_PFX_S);
|
||||
}
|
||||
void IRFrontend::GetVectorRegsPrefixT(u8 *regs, VectorSize sz, int vectorReg) {
|
||||
_assert_(js.prefixTFlag & JitState::PREFIX_KNOWN);
|
||||
::GetVectorRegs(regs, sz, vectorReg);
|
||||
ApplyPrefixST(regs, js.prefixT, sz, IRVTEMP_PFX_T);
|
||||
}
|
||||
|
||||
void IRFrontend::GetVectorRegsPrefixD(u8 *regs, VectorSize sz, int vectorReg) {
|
||||
_assert_(js.prefixDFlag & JitState::PREFIX_KNOWN);
|
||||
|
||||
GetVectorRegs(regs, sz, vectorReg);
|
||||
int n = GetNumVectorElements(sz);
|
||||
if (js.prefixD == 0)
|
||||
return;
|
||||
|
||||
int n = GetNumVectorElements(sz);
|
||||
for (int i = 0; i < n; i++) {
|
||||
// Hopefully this is rare, we'll just write it into a reg we drop.
|
||||
// Hopefully this is rare, we'll just write it into a dumping ground reg.
|
||||
if (js.VfpuWriteMask(i))
|
||||
regs[i] = fpr.GetTempV();
|
||||
regs[i] = IRVTEMP_PFX_D + i;
|
||||
}
|
||||
}
|
||||
|
||||
@ -171,13 +189,12 @@ namespace MIPSComp {
|
||||
for (int i = 0; i < n; i++) {
|
||||
if (js.VfpuWriteMask(i))
|
||||
continue;
|
||||
|
||||
int sat = (js.prefixD >> (i * 2)) & 3;
|
||||
int sat = GetDSat(js.prefixD, i);
|
||||
if (sat == 1) {
|
||||
// clamped = x < 0 ? (x > 1 ? 1 : x) : x [0, 1]
|
||||
ir.Write(IROp::FSat0_1, vfpuBase + voffset[vregs[i]], vfpuBase + voffset[vregs[i]]);
|
||||
ir.Write(IROp::FSat0_1, vregs[i], vregs[i]);
|
||||
} else if (sat == 3) {
|
||||
ir.Write(IROp::FSatMinus1_1, vfpuBase + voffset[vregs[i]], vfpuBase + voffset[vregs[i]]);
|
||||
ir.Write(IROp::FSatMinus1_1, vregs[i], vregs[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -207,7 +224,6 @@ namespace MIPSComp {
|
||||
|
||||
u8 vregs[4];
|
||||
GetVectorRegs(vregs, V_Quad, vt);
|
||||
ApplyVoffset(vregs, 4); // Translate to memory order
|
||||
|
||||
switch (op >> 26) {
|
||||
case 54: //lv.q
|
||||
@ -251,9 +267,11 @@ namespace MIPSComp {
|
||||
if (sz == 4 && IsVectorColumn(vd)) {
|
||||
u8 dregs[4];
|
||||
GetVectorRegs(dregs, sz, vd);
|
||||
ir.Write(IROp::InitVec4, vfpuBase + voffset[dregs[0]], (int)(type == 6 ? Vec4Init::AllZERO : Vec4Init::AllONE));
|
||||
ir.Write(IROp::InitVec4, dregs[0], (int)(type == 6 ? Vec4Init::AllZERO : Vec4Init::AllONE));
|
||||
} else if (sz == 1) {
|
||||
ir.Write(IROp::SetConstF, vfpuBase + voffset[vd], ir.AddConstantFloat(type == 6 ? 0.0f : 1.0f));
|
||||
u8 dreg;
|
||||
GetVectorRegs(&dreg, V_Single, vd);
|
||||
ir.Write(IROp::SetConstF, dreg, ir.AddConstantFloat(type == 6 ? 0.0f : 1.0f));
|
||||
} else {
|
||||
DISABLE;
|
||||
}
|
||||
@ -275,7 +293,7 @@ namespace MIPSComp {
|
||||
GetVectorRegs(dregs, sz, vd);
|
||||
int row = vd & 3;
|
||||
Vec4Init init = Vec4Init((int)Vec4Init::Set_1000 + row);
|
||||
ir.Write(IROp::InitVec4, vfpuBase + voffset[dregs[0]], (int)init);
|
||||
ir.Write(IROp::InitVec4, dregs[0], (int)init);
|
||||
}
|
||||
|
||||
void IRFrontend::Comp_VMatrixInit(MIPSOpcode op) {
|
||||
@ -311,7 +329,7 @@ namespace MIPSComp {
|
||||
default:
|
||||
return;
|
||||
}
|
||||
ir.Write(IROp::InitVec4, vfpuBase + voffset[vec[0]], (int)init);
|
||||
ir.Write(IROp::InitVec4, vec[0], (int)init);
|
||||
}
|
||||
return;
|
||||
}
|
||||
@ -440,12 +458,14 @@ namespace MIPSComp {
|
||||
}
|
||||
|
||||
void IRFrontend::Comp_Viim(MIPSOpcode op) {
|
||||
if (!js.HasNoPrefix())
|
||||
if (!js.HasUnknownPrefix())
|
||||
DISABLE;
|
||||
|
||||
u8 dreg = _VT;
|
||||
s32 imm = (s32)(s16)(u16)(op & 0xFFFF);
|
||||
ir.Write(IROp::SetConstF, vfpuBase + voffset[dreg], ir.AddConstantFloat((float)imm));
|
||||
u8 dreg;
|
||||
GetVectorRegsPrefixD(&dreg, V_Single, _VT);
|
||||
ir.Write(IROp::SetConstF, dreg, ir.AddConstantFloat((float)imm));
|
||||
ApplyPrefixD(&dreg, V_Single);
|
||||
}
|
||||
|
||||
void IRFrontend::Comp_Vfim(MIPSOpcode op) {
|
||||
|
@ -115,19 +115,13 @@ private:
|
||||
void CompShiftImm(MIPSOpcode op, IROp shiftType, int sa);
|
||||
void CompShiftVar(MIPSOpcode op, IROp shiftType, IROp shiftTypeConst);
|
||||
|
||||
void ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz);
|
||||
void ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz, int tempReg);
|
||||
void ApplyPrefixD(const u8 *vregs, VectorSize sz);
|
||||
void GetVectorRegsPrefixS(u8 *regs, VectorSize sz, int vectorReg) {
|
||||
_assert_(js.prefixSFlag & JitState::PREFIX_KNOWN);
|
||||
GetVectorRegs(regs, sz, vectorReg);
|
||||
ApplyPrefixST(regs, js.prefixS, sz);
|
||||
}
|
||||
void GetVectorRegsPrefixT(u8 *regs, VectorSize sz, int vectorReg) {
|
||||
_assert_(js.prefixTFlag & JitState::PREFIX_KNOWN);
|
||||
GetVectorRegs(regs, sz, vectorReg);
|
||||
ApplyPrefixST(regs, js.prefixT, sz);
|
||||
}
|
||||
void GetVectorRegsPrefixS(u8 *regs, VectorSize sz, int vectorReg);
|
||||
void GetVectorRegsPrefixT(u8 *regs, VectorSize sz, int vectorReg);
|
||||
void GetVectorRegsPrefixD(u8 *regs, VectorSize sz, int vectorReg);
|
||||
void GetVectorRegs(u8 regs[4], VectorSize N, int vectorReg);
|
||||
void GetMatrixRegs(u8 regs[16], MatrixSize N, int matrixReg);
|
||||
|
||||
// Utils
|
||||
void Comp_ITypeMemLR(MIPSOpcode op, bool load);
|
||||
|
@ -70,6 +70,12 @@ static const IRMeta irMeta[] = {
|
||||
{ IROp::FDiv, "FDiv", "FFF" },
|
||||
{ IROp::FMov, "FMov", "FF" },
|
||||
{ IROp::FSqrt, "FSqrt", "FF" },
|
||||
{ IROp::FSin, "FSin", "FF" },
|
||||
{ IROp::FCos, "FCos", "FF" },
|
||||
{ IROp::FSqrt, "FSqrt", "FF" },
|
||||
{ IROp::FRSqrt, "FRSqrt", "FF" },
|
||||
{ IROp::FRecip, "FRecip", "FF" },
|
||||
{ IROp::FAsin, "FAsin", "FF" },
|
||||
{ IROp::FNeg, "FNeg", "FF" },
|
||||
{ IROp::FAbs, "FAbs", "FF" },
|
||||
{ IROp::FRound, "FRound", "FF" },
|
||||
@ -82,17 +88,12 @@ static const IRMeta irMeta[] = {
|
||||
{ IROp::FSatMinus1_1, "FSat(-1 - 1)", "FF" },
|
||||
{ IROp::FMovFromGPR, "FMovFromGPR", "FG" },
|
||||
{ IROp::FMovToGPR, "FMovToGPR", "GF" },
|
||||
{ IROp::InitVec4, "InitVec4", "Fv"},
|
||||
{ IROp::FpCondToReg, "FpCondToReg", "G" },
|
||||
{ IROp::VfpuCtrlToReg, "VfpuCtrlToReg", "GI" },
|
||||
{ IROp::SetCtrlVFPU, "SetCtrlVFPU", "TC" },
|
||||
|
||||
{ IROp::FSin, "FSin", "FF" },
|
||||
{ IROp::FCos, "FCos", "FF" },
|
||||
{ IROp::FSqrt, "FSqrt", "FF" },
|
||||
{ IROp::FRSqrt, "FRSqrt", "FF" },
|
||||
{ IROp::FRecip, "FRecip", "FF" },
|
||||
{ IROp::FAsin, "FAsin", "FF" },
|
||||
{ IROp::InitVec4, "InitVec4", "Fv" },
|
||||
{ IROp::ShuffleVec4, "ShuffleVec4", "FFs" },
|
||||
|
||||
{ IROp::Interpret, "Interpret", "_C" },
|
||||
{ IROp::Downcount, "Downcount", "_II" },
|
||||
@ -192,6 +193,7 @@ void DisassembleParam(char *buf, int bufSize, u8 param, char type, const u32 *co
|
||||
"[0 0 1 0]",
|
||||
"[0 0 0 1]",
|
||||
};
|
||||
static const char *xyzw = "xyzw";
|
||||
|
||||
switch (type) {
|
||||
case 'G':
|
||||
@ -216,6 +218,9 @@ void DisassembleParam(char *buf, int bufSize, u8 param, char type, const u32 *co
|
||||
case 'v':
|
||||
snprintf(buf, bufSize, "%s", initVec4Names[param]);
|
||||
break;
|
||||
case 's':
|
||||
snprintf(buf, bufSize, "%s%s%s%s", xyzw[param & 3], xyzw[(param >> 2) & 3], xyzw[(param >> 4) & 3], xyzw[(param >> 6) & 3]);
|
||||
break;
|
||||
case '_':
|
||||
case '\0':
|
||||
buf[0] = 0;
|
||||
|
@ -142,7 +142,11 @@ enum class IROp : u8 {
|
||||
|
||||
SetCtrlVFPU,
|
||||
|
||||
// 4-wide instructions to assist SIMD.
|
||||
// Can of course add a pass to break them up if a target does not
|
||||
// support SIMD.
|
||||
InitVec4,
|
||||
ShuffleVec4,
|
||||
|
||||
// Slow special functions. Used on singles.
|
||||
FSin,
|
||||
@ -232,16 +236,21 @@ enum {
|
||||
IRTEMP_LHS, // Reserved for use in branches
|
||||
IRTEMP_RHS, // Reserved for use in branches
|
||||
|
||||
IRVTEMP_PFX_S = 224 - 32, // Relative to the FP regs
|
||||
IRVTEMP_PFX_T = 228 - 32,
|
||||
IRVTEMP_PFX_D = 232 - 32,
|
||||
IRVTEMP_0 = 236 - 32,
|
||||
|
||||
// 16 float temps for vector S and T prefixes and things like that.
|
||||
// IRVTEMP_0 = 208 - 64, // -64 to be relative to v[0]
|
||||
|
||||
// Hacky way to get to other state
|
||||
IRREG_VFPU_CTRL_BASE = 208,
|
||||
IRREG_VFPU_CC = 211,
|
||||
IRREG_LO = 226, // offset of lo in MIPSState / 4
|
||||
IRREG_HI = 227,
|
||||
IRREG_FCR31 = 228,
|
||||
IRREG_FPCOND = 229,
|
||||
IRREG_LO = 242, // offset of lo in MIPSState / 4
|
||||
IRREG_HI = 243,
|
||||
IRREG_FCR31 = 244,
|
||||
IRREG_FPCOND = 245,
|
||||
};
|
||||
|
||||
struct IRMeta {
|
||||
|
@ -144,6 +144,15 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c
|
||||
#endif
|
||||
break;
|
||||
|
||||
case IROp::ShuffleVec4:
|
||||
{
|
||||
// Can't use the SSE shuffle here because it takes an immediate.
|
||||
// Backends with SSE support could use that though.
|
||||
for (int i = 0; i < 4; i++)
|
||||
mips->f[inst->dest + i] = mips->f[inst->src1 + ((inst->src2 >> (i * 2)) & 3)];
|
||||
break;
|
||||
}
|
||||
|
||||
case IROp::FSin:
|
||||
mips->f[inst->dest] = vfpu_sin(mips->f[inst->src1]);
|
||||
break;
|
||||
|
@ -172,23 +172,24 @@ public:
|
||||
// However, the IR interpreter needs some temps that can stick around between ops.
|
||||
// Can be indexed through r[] using indices 192+.
|
||||
u32 t[16]; //192
|
||||
// float vt[16]; //208 TODO: VFPU temp
|
||||
|
||||
// If vfpuCtrl (prefixes) get mysterious values, check the VFPU regcache code.
|
||||
u32 vfpuCtrl[16]; // 208
|
||||
|
||||
float vt[16]; //224 TODO: VFPU temp
|
||||
|
||||
// ARM64 wants lo/hi to be aligned to 64 bits from the base of this struct.
|
||||
u32 padLoHi; // 224
|
||||
u32 padLoHi; // 240
|
||||
|
||||
union {
|
||||
struct {
|
||||
u32 pc; //225
|
||||
u32 pc; //241
|
||||
|
||||
u32 lo; //226
|
||||
u32 hi; //227
|
||||
u32 lo; //242
|
||||
u32 hi; //243
|
||||
|
||||
u32 fcr31; //fpu control register
|
||||
u32 fpcond; // cache the cond flag of fcr31 (& 1 << 23)
|
||||
u32 fcr31; //244 fpu control register
|
||||
u32 fpcond; //245 cache the cond flag of fcr31 (& 1 << 23)
|
||||
};
|
||||
u32 other[6];
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user