From 42c231389322b34709d06545db0704c22df03005 Mon Sep 17 00:00:00 2001
From: "Unknown W. Brackets" <checkins@unknownbrackets.org>
Date: Sat, 23 Feb 2013 09:52:24 -0800
Subject: [PATCH 1/2] Initial implementation of vf2h.

Fixes Fat Princess and possibly other stuff.
---
 Core/MIPS/MIPSIntVFPU.cpp | 79 +++++++++++++++++++++++++++++----------
 1 file changed, 60 insertions(+), 19 deletions(-)

diff --git a/Core/MIPS/MIPSIntVFPU.cpp b/Core/MIPS/MIPSIntVFPU.cpp
index b04d2385c..574f2a2fe 100644
--- a/Core/MIPS/MIPSIntVFPU.cpp
+++ b/Core/MIPS/MIPSIntVFPU.cpp
@@ -620,8 +620,8 @@ namespace MIPSInt
 		int imm = (op >> 16) & 0x1f;
 		float mult = 1.0f/(float)(1 << imm);
 		VectorSize sz = GetVecSize(op);
-		ReadVector((float*)&s, sz, vs);
-		ApplySwizzleS((float*)&s, sz); //TODO: and the mask to kill everything but swizzle
+		ReadVector((float*)&s[0], sz, vs);
+		ApplySwizzleS((float*)&s[0], sz); //TODO: and the mask to kill everything but swizzle
 		for (int i = 0; i < GetNumVectorElements(sz); i++)
 		{
 			d[i] = (float)s[i] * mult;
@@ -642,6 +642,7 @@ namespace MIPSInt
 	};
 
 	// magic code from ryg: http://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/
+	// See also SSE2 version: https://gist.github.com/rygorous/2144712
 	static FP32 half_to_float_fast5(FP16 h)
 	{
 		static const FP32 magic = { (127 + (127 - 15)) << 23 };
@@ -662,6 +663,42 @@ namespace MIPSInt
 		return fp.f;
 	}
 
+	// More magic code: https://gist.github.com/rygorous/2156668
+	static FP16 float_to_half_fast3(FP32 f)
+	{
+		static const FP32 f32infty = { 255 << 23 };
+		static const FP32 f16infty = { 31 << 23 };
+		static const FP32 magic = { 15 << 23 };
+		static const u32 sign_mask = 0x80000000u;
+		static const u32 round_mask = ~0xfffu;
+		FP16 o = { 0 };
+
+		u32 sign = f.u & sign_mask;
+		f.u ^= sign;
+
+		if (f.u >= f32infty.u) // Inf or NaN (all exponent bits set)
+			 o.u = (f.u > f32infty.u) ? 0x7e00 : 0x7c00; // NaN->qNaN and Inf->Inf
+		else // (De)normalized number or zero
+		{
+			f.u &= round_mask;
+			f.f *= magic.f;
+			f.u -= round_mask;
+			if (f.u > f16infty.u) f.u = f16infty.u; // Clamp to signed infinity if overflowed
+
+			 o.u = f.u >> 13; // Take the bits!
+		}
+
+		o.u |= sign >> 16;
+		return o;
+	}
+
+	static u16 ShrinkToHalf(float full) {
+		FP32 fp32;
+		fp32.f = full;
+		FP16 fp = float_to_half_fast3(fp32);
+		return fp.u;
+	}
+
 	void Int_Vh2f(u32 op)
 	{
 		u32 s[4];
@@ -669,7 +706,7 @@ namespace MIPSInt
 		int vd = _VD;
 		int vs = _VS;
 		VectorSize sz = GetVecSize(op);
-		ReadVector((float*)&s, sz, vs);
+		ReadVector((float*)&s[0], sz, vs);
 		
 		VectorSize outsize = V_Pair;
 		switch (sz) {
@@ -698,27 +735,31 @@ namespace MIPSInt
 
 	void Int_Vf2h(u32 op)
 	{
-		_dbg_assert_msg_(CPU,0,"Trying to interpret instruction that can't be interpreted");
-		// See http://stackoverflow.com/questions/1659440/32-bit-to-16-bit-floating-point-conversion
-
-		/*
-		int s[4];
-		float d[4];
+		float s[4];
+		u32 d[4];
 		int vd = _VD;
 		int vs = _VS;
-		int imm = (op >> 16) & 0x1f;
-		float mult = 1.0f/(float)(1 << imm);
 		VectorSize sz = GetVecSize(op);
-		ReadVector((float*)&s, sz, vs);
-		ApplySwizzleS((float*)&s, sz); //TODO: and the mask to kill everything but swizzle
+		ReadVector(s, sz, vs);
 		
-		for (int i = 0; i < GetNumVectorElements(sz); i++)
-		{
-			d[i] = (float)s[i] * mult;
+		VectorSize outsize = V_Single;
+		switch (sz) {
+		case V_Pair:
+			outsize = V_Single;
+			d[0] = ShrinkToHalf(s[0]) | ((u32)ShrinkToHalf(s[1]) << 16);
+			break;
+		case V_Quad:
+			outsize = V_Pair;
+			d[0] = ShrinkToHalf(s[0]) | ((u32)ShrinkToHalf(s[1]) << 16);
+			d[1] = ShrinkToHalf(s[2]) | ((u32)ShrinkToHalf(s[3]) << 16);
+			break;
+		case V_Single:
+		case V_Triple:
+			_dbg_assert_msg_(CPU, 0, "Trying to interpret Int_Vf2h instruction that can't be interpreted");
+			break;
 		}
-		ApplyPrefixD(d, sz); //TODO: and the mask to kill everything but mask
-		WriteVector(d, sz, vd);
-		*/
+		ApplyPrefixD((float*)&d[0], outsize); //TODO: and the mask to kill everything but mask
+		WriteVector((float*)&d[0], outsize, vd);
 		PC += 4;
 		EatPrefixes();
 	}

From 6c6bd0bd9cfccf4fc4bf90a2a8faf0afcd49661f Mon Sep 17 00:00:00 2001
From: "Unknown W. Brackets" <checkins@unknownbrackets.org>
Date: Sat, 23 Feb 2013 11:34:32 -0800
Subject: [PATCH 2/2] Correct prefix handling in vf2h/vh2f.

---
 Core/MIPS/MIPSIntVFPU.cpp | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/Core/MIPS/MIPSIntVFPU.cpp b/Core/MIPS/MIPSIntVFPU.cpp
index 574f2a2fe..4cc701ece 100644
--- a/Core/MIPS/MIPSIntVFPU.cpp
+++ b/Core/MIPS/MIPSIntVFPU.cpp
@@ -707,6 +707,7 @@ namespace MIPSInt
 		int vs = _VS;
 		VectorSize sz = GetVecSize(op);
 		ReadVector((float*)&s[0], sz, vs);
+		ApplySwizzleS((float*)&s[0], sz);
 		
 		VectorSize outsize = V_Pair;
 		switch (sz) {
@@ -727,7 +728,7 @@ namespace MIPSInt
 			_dbg_assert_msg_(CPU, 0, "Trying to interpret Int_Vh2f instruction that can't be interpreted");
 			break;
 		}
-		ApplyPrefixD(d, outsize); //TODO: and the mask to kill everything but mask
+		ApplyPrefixD(d, outsize);
 		WriteVector(d, outsize, vd);
 		PC += 4;
 		EatPrefixes();
@@ -741,6 +742,7 @@ namespace MIPSInt
 		int vs = _VS;
 		VectorSize sz = GetVecSize(op);
 		ReadVector(s, sz, vs);
+		ApplySwizzleS(s, sz);
 		
 		VectorSize outsize = V_Single;
 		switch (sz) {
@@ -758,7 +760,7 @@ namespace MIPSInt
 			_dbg_assert_msg_(CPU, 0, "Trying to interpret Int_Vf2h instruction that can't be interpreted");
 			break;
 		}
-		ApplyPrefixD((float*)&d[0], outsize); //TODO: and the mask to kill everything but mask
+		ApplyPrefixD((float*)&d[0], outsize);
 		WriteVector((float*)&d[0], outsize, vd);
 		PC += 4;
 		EatPrefixes();