Prefix prep

2024-11-28 02:41:18 +00:00 · 2016-05-11 00:16:07 +02:00 · 2016-05-11 00:16:07 +02:00 · 219548b8e2
commit 219548b8e2
parent b3dd36982f
6 changed files with 96 additions and 58 deletions
--- a/Core/MIPS/IR/IRCompVFPU.cpp
+++ b/Core/MIPS/IR/IRCompVFPU.cpp
@ -88,7 +88,7 @@ namespace MIPSComp {
 		}
 	}

-	void IRFrontend::ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz) {
+	void IRFrontend::ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz, int tempReg) {
 		if (prefix == 0xE4)
 			return;

@ -109,13 +109,9 @@ namespace MIPSComp {
 			if (!constants && regnum == i && !abs && !negate)
 				continue;

-			/*
 			// This puts the value into a temp reg, so we won't write the modified value back.
-			vregs[i] = fpr.GetTempV();
+			vregs[i] = tempReg + i;
 			if (!constants) {
-				fpr.MapDirtyInV(vregs[i], origV[regnum]);
-				fpr.SpillLockV(vregs[i]);
-
 				// Prefix may say "z, z, z, z" but if this is a pair, we force to x.
 				// TODO: But some ops seem to use const 0 instead?
 				if (regnum >= n) {
@ -124,36 +120,58 @@ namespace MIPSComp {
 				}

 				if (abs) {
-					fp.FABS(fpr.V(vregs[i]), fpr.V(origV[regnum]));
+					ir.Write(IROp::FAbs, vregs[i], origV[regnum]);
 					if (negate)
-						fp.FNEG(fpr.V(vregs[i]), fpr.V(vregs[i]));
+						ir.Write(IROp::FNeg, vregs[i], vregs[i]);
 				} else {
 					if (negate)
-						fp.FNEG(fpr.V(vregs[i]), fpr.V(origV[regnum]));
+						ir.Write(IROp::FNeg, vregs[i], origV[regnum]);
 					else
-						fp.FMOV(fpr.V(vregs[i]), fpr.V(origV[regnum]));
+						ir.Write(IROp::FMov, vregs[i], origV[regnum]);
 				}
 			} else {
-				fpr.MapRegV(vregs[i], MAP_DIRTY | MAP_NOINIT);
-				fpr.SpillLockV(vregs[i]);
-				fp.MOVI2F(fpr.V(vregs[i]), constantArray[regnum + (abs << 2)], SCRATCH1, (bool)negate);
+				if (negate) {
+					ir.Write(IROp::SetConstF, vregs[i], ir.AddConstantFloat(-constantArray[regnum + (abs << 2)]));
+				} else {
+					ir.Write(IROp::SetConstF, vregs[i], ir.AddConstantFloat(constantArray[regnum + (abs << 2)]));
+				}
 			}
-			*/
 		}
 	}

+	void IRFrontend::GetVectorRegs(u8 regs[4], VectorSize N, int vectorReg) {
+		::GetVectorRegs(regs, N, vectorReg);
+		ApplyVoffset(regs, N);
+	}
+
+	void IRFrontend::GetMatrixRegs(u8 regs[16], MatrixSize N, int matrixReg) {
+		::GetMatrixRegs(regs, N, matrixReg);
+		// TODO
+	}
+
+	void IRFrontend::GetVectorRegsPrefixS(u8 *regs, VectorSize sz, int vectorReg) {
+		_assert_(js.prefixSFlag & JitState::PREFIX_KNOWN);
+		::GetVectorRegs(regs, sz, vectorReg);
+		ApplyPrefixST(regs, js.prefixS, sz, IRVTEMP_PFX_S);
+	}
+	void IRFrontend::GetVectorRegsPrefixT(u8 *regs, VectorSize sz, int vectorReg) {
+		_assert_(js.prefixTFlag & JitState::PREFIX_KNOWN);
+		::GetVectorRegs(regs, sz, vectorReg);
+		ApplyPrefixST(regs, js.prefixT, sz, IRVTEMP_PFX_T);
+	}
+
 	void IRFrontend::GetVectorRegsPrefixD(u8 *regs, VectorSize sz, int vectorReg) {
 		_assert_(js.prefixDFlag & JitState::PREFIX_KNOWN);

 		GetVectorRegs(regs, sz, vectorReg);
+		int n = GetNumVectorElements(sz);
 		if (js.prefixD == 0)
 			return;

-		int n = GetNumVectorElements(sz);
 		for (int i = 0; i < n; i++) {
-			// Hopefully this is rare, we'll just write it into a reg we drop.
+			// Hopefully this is rare, we'll just write it into a dumping ground reg.
 			if (js.VfpuWriteMask(i))
-				regs[i] = fpr.GetTempV();
+				regs[i] = IRVTEMP_PFX_D + i;
 		}
 	}

@ -171,13 +189,12 @@ namespace MIPSComp {
 		for (int i = 0; i < n; i++) {
 			if (js.VfpuWriteMask(i))
 				continue;
-
-			int sat = (js.prefixD >> (i * 2)) & 3;
+			int sat = GetDSat(js.prefixD, i);
 			if (sat == 1) {
 				// clamped = x < 0 ? (x > 1 ? 1 : x) : x [0, 1]
-				ir.Write(IROp::FSat0_1, vfpuBase + voffset[vregs[i]], vfpuBase + voffset[vregs[i]]);
+				ir.Write(IROp::FSat0_1, vregs[i], vregs[i]);
 			} else if (sat == 3) {
-				ir.Write(IROp::FSatMinus1_1, vfpuBase + voffset[vregs[i]], vfpuBase + voffset[vregs[i]]);
+				ir.Write(IROp::FSatMinus1_1, vregs[i], vregs[i]);
 			}
 		}
 	}
@ -207,7 +224,6 @@ namespace MIPSComp {

 		u8 vregs[4];
 		GetVectorRegs(vregs, V_Quad, vt);
-		ApplyVoffset(vregs, 4);  // Translate to memory order

 		switch (op >> 26) {
 		case 54: //lv.q
@ -251,9 +267,11 @@ namespace MIPSComp {
 		if (sz == 4 && IsVectorColumn(vd)) {
 			u8 dregs[4];
 			GetVectorRegs(dregs, sz, vd);
-			ir.Write(IROp::InitVec4, vfpuBase + voffset[dregs[0]], (int)(type == 6 ? Vec4Init::AllZERO : Vec4Init::AllONE));
+			ir.Write(IROp::InitVec4, dregs[0], (int)(type == 6 ? Vec4Init::AllZERO : Vec4Init::AllONE));
 		} else if (sz == 1) {
-			ir.Write(IROp::SetConstF, vfpuBase + voffset[vd], ir.AddConstantFloat(type == 6 ? 0.0f : 1.0f));
+			u8 dreg;
+			GetVectorRegs(&dreg, V_Single, vd);
+			ir.Write(IROp::SetConstF, dreg, ir.AddConstantFloat(type == 6 ? 0.0f : 1.0f));
 		} else {
 			DISABLE;
 		}
@ -275,7 +293,7 @@ namespace MIPSComp {
 		GetVectorRegs(dregs, sz, vd);
 		int row = vd & 3;
 		Vec4Init init = Vec4Init((int)Vec4Init::Set_1000 + row);
-		ir.Write(IROp::InitVec4, vfpuBase + voffset[dregs[0]], (int)init);
+		ir.Write(IROp::InitVec4, dregs[0], (int)init);
 	}

 	void IRFrontend::Comp_VMatrixInit(MIPSOpcode op) {
@ -311,7 +329,7 @@ namespace MIPSComp {
 			default:
 				return;
 			}
-			ir.Write(IROp::InitVec4, vfpuBase + voffset[vec[0]], (int)init);
+			ir.Write(IROp::InitVec4, vec[0], (int)init);
 		}
 		return;
 	}
@ -440,12 +458,14 @@ namespace MIPSComp {
 	}

 	void IRFrontend::Comp_Viim(MIPSOpcode op) {
-		if (!js.HasNoPrefix())
+		if (!js.HasUnknownPrefix())
 			DISABLE;

-		u8 dreg = _VT;
 		s32 imm = (s32)(s16)(u16)(op & 0xFFFF);
-		ir.Write(IROp::SetConstF, vfpuBase + voffset[dreg], ir.AddConstantFloat((float)imm));
+		u8 dreg;
+		GetVectorRegsPrefixD(&dreg, V_Single, _VT);
+		ir.Write(IROp::SetConstF, dreg, ir.AddConstantFloat((float)imm));
+		ApplyPrefixD(&dreg, V_Single);
 	}

 	void IRFrontend::Comp_Vfim(MIPSOpcode op) {
--- a/Core/MIPS/IR/IRFrontend.h
+++ b/Core/MIPS/IR/IRFrontend.h
@ -115,19 +115,13 @@ private:
 	void CompShiftImm(MIPSOpcode op, IROp shiftType, int sa);
 	void CompShiftVar(MIPSOpcode op, IROp shiftType, IROp shiftTypeConst);

-	void ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz);
+	void ApplyPrefixST(u8 *vregs, u32 prefix, VectorSize sz, int tempReg);
 	void ApplyPrefixD(const u8 *vregs, VectorSize sz);
-	void GetVectorRegsPrefixS(u8 *regs, VectorSize sz, int vectorReg) {
-		_assert_(js.prefixSFlag & JitState::PREFIX_KNOWN);
-		GetVectorRegs(regs, sz, vectorReg);
-		ApplyPrefixST(regs, js.prefixS, sz);
-	}
-	void GetVectorRegsPrefixT(u8 *regs, VectorSize sz, int vectorReg) {
-		_assert_(js.prefixTFlag & JitState::PREFIX_KNOWN);
-		GetVectorRegs(regs, sz, vectorReg);
-		ApplyPrefixST(regs, js.prefixT, sz);
-	}
+	void GetVectorRegsPrefixS(u8 *regs, VectorSize sz, int vectorReg);
+	void GetVectorRegsPrefixT(u8 *regs, VectorSize sz, int vectorReg);
 	void GetVectorRegsPrefixD(u8 *regs, VectorSize sz, int vectorReg);
+	void GetVectorRegs(u8 regs[4], VectorSize N, int vectorReg);
+	void GetMatrixRegs(u8 regs[16], MatrixSize N, int matrixReg);

 	// Utils
 	void Comp_ITypeMemLR(MIPSOpcode op, bool load);
--- a/Core/MIPS/IR/IRInst.cpp
+++ b/Core/MIPS/IR/IRInst.cpp
@ -70,6 +70,12 @@ static const IRMeta irMeta[] = {
 	{ IROp::FDiv, "FDiv", "FFF" },
 	{ IROp::FMov, "FMov", "FF" },
 	{ IROp::FSqrt, "FSqrt", "FF" },
+	{ IROp::FSin, "FSin", "FF" },
+	{ IROp::FCos, "FCos", "FF" },
+	{ IROp::FSqrt, "FSqrt", "FF" },
+	{ IROp::FRSqrt, "FRSqrt", "FF" },
+	{ IROp::FRecip, "FRecip", "FF" },
+	{ IROp::FAsin, "FAsin", "FF" },
 	{ IROp::FNeg, "FNeg", "FF" },
 	{ IROp::FAbs, "FAbs", "FF" },
 	{ IROp::FRound, "FRound", "FF" },
@ -82,17 +88,12 @@ static const IRMeta irMeta[] = {
 	{ IROp::FSatMinus1_1, "FSat(-1 - 1)", "FF" },
 	{ IROp::FMovFromGPR, "FMovFromGPR", "FG" },
 	{ IROp::FMovToGPR, "FMovToGPR", "GF" },
-	{ IROp::InitVec4, "InitVec4", "Fv"},
 	{ IROp::FpCondToReg, "FpCondToReg", "G" },
 	{ IROp::VfpuCtrlToReg, "VfpuCtrlToReg", "GI" },
 	{ IROp::SetCtrlVFPU, "SetCtrlVFPU", "TC" },

-	{ IROp::FSin, "FSin", "FF" },
-	{ IROp::FCos, "FCos", "FF" },
-	{ IROp::FSqrt, "FSqrt", "FF" },
-	{ IROp::FRSqrt, "FRSqrt", "FF" },
-	{ IROp::FRecip, "FRecip", "FF" },
-	{ IROp::FAsin, "FAsin", "FF" },
+	{ IROp::InitVec4, "InitVec4", "Fv" },
+	{ IROp::ShuffleVec4, "ShuffleVec4", "FFs" },

 	{ IROp::Interpret, "Interpret", "_C" },
 	{ IROp::Downcount, "Downcount", "_II" },
@ -192,6 +193,7 @@ void DisassembleParam(char *buf, int bufSize, u8 param, char type, const u32 *co
 		"[0 0 1 0]",
 		"[0 0 0 1]",
 	};
+	static const char *xyzw = "xyzw";

 	switch (type) {
 	case 'G':
@ -216,6 +218,9 @@ void DisassembleParam(char *buf, int bufSize, u8 param, char type, const u32 *co
 	case 'v':
 		snprintf(buf, bufSize, "%s", initVec4Names[param]);
 		break;
+	case 's':
+		snprintf(buf, bufSize, "%s%s%s%s", xyzw[param & 3], xyzw[(param >> 2) & 3], xyzw[(param >> 4) & 3], xyzw[(param >> 6) & 3]);
+		break;
 	case '_':
 	case '\0':
 		buf[0] = 0;
--- a/Core/MIPS/IR/IRInst.h
+++ b/Core/MIPS/IR/IRInst.h
@ -142,7 +142,11 @@ enum class IROp : u8 {

 	SetCtrlVFPU,

+	// 4-wide instructions to assist SIMD.
+	// Can of course add a pass to break them up if a target does not
+	// support SIMD.
 	InitVec4,
+	ShuffleVec4,

 	// Slow special functions. Used on singles.
 	FSin,
@ -232,16 +236,21 @@ enum {
 	IRTEMP_LHS,  // Reserved for use in branches
 	IRTEMP_RHS,  // Reserved for use in branches

+	IRVTEMP_PFX_S = 224 - 32,  // Relative to the FP regs
+	IRVTEMP_PFX_T = 228 - 32,
+	IRVTEMP_PFX_D = 232 - 32,
+	IRVTEMP_0 = 236 - 32,
+
 	// 16 float temps for vector S and T prefixes and things like that.
 	// IRVTEMP_0 = 208 - 64,  // -64 to be relative to v[0]

 	// Hacky way to get to other state
 	IRREG_VFPU_CTRL_BASE = 208,
 	IRREG_VFPU_CC = 211,
-	IRREG_LO = 226,  // offset of lo in MIPSState / 4
-	IRREG_HI = 227,
-	IRREG_FCR31 = 228,
-	IRREG_FPCOND = 229,
+	IRREG_LO = 242,  // offset of lo in MIPSState / 4
+	IRREG_HI = 243,
+	IRREG_FCR31 = 244,
+	IRREG_FPCOND = 245,
 };

 struct IRMeta {
--- a/Core/MIPS/IR/IRInterpreter.cpp
+++ b/Core/MIPS/IR/IRInterpreter.cpp
@ -144,6 +144,15 @@ u32 IRInterpret(MIPSState *mips, const IRInst *inst, const u32 *constPool, int c
 #endif
 			break;

+		case IROp::ShuffleVec4:
+		{
+			// Can't use the SSE shuffle here because it takes an immediate.
+			// Backends with SSE support could use that though.
+			for (int i = 0; i < 4; i++)
+				mips->f[inst->dest + i] = mips->f[inst->src1 + ((inst->src2 >> (i * 2)) & 3)];
+			break;
+		}
+
 		case IROp::FSin:
 			mips->f[inst->dest] = vfpu_sin(mips->f[inst->src1]);
 			break;
--- a/Core/MIPS/MIPS.h
+++ b/Core/MIPS/MIPS.h
@ -172,23 +172,24 @@ public:
 	// However, the IR interpreter needs some temps that can stick around between ops.
 	// Can be indexed through r[] using indices 192+.
 	u32 t[16];     //192
-	// float vt[16];  //208  TODO: VFPU temp

 	// If vfpuCtrl (prefixes) get mysterious values, check the VFPU regcache code.
 	u32 vfpuCtrl[16]; // 208

+	float vt[16];  //224  TODO: VFPU temp
+
 	// ARM64 wants lo/hi to be aligned to 64 bits from the base of this struct.
-	u32 padLoHi;    // 224
+	u32 padLoHi;    // 240

 	union {
 		struct {
-			u32 pc;   //225
+			u32 pc;   //241

-			u32 lo;   //226
-			u32 hi;   //227
+			u32 lo;   //242
+			u32 hi;   //243

-			u32 fcr31; //fpu control register
-			u32 fpcond;  // cache the cond flag of fcr31  (& 1 << 23)
+			u32 fcr31; //244 fpu control register
+			u32 fpcond;  //245 cache the cond flag of fcr31  (& 1 << 23)
 		};
 		u32 other[6];
 	};