diff --git a/Core/MIPS/x86/CompVFPU.cpp b/Core/MIPS/x86/CompVFPU.cpp
index 6f29ac165..8c60df33d 100644
--- a/Core/MIPS/x86/CompVFPU.cpp
+++ b/Core/MIPS/x86/CompVFPU.cpp
@@ -17,6 +17,7 @@
 
 #include <cmath>
 #include <limits>
+#include <xmmintrin.h>
 #include "base/logging.h"
 #include "math/math_util.h"
 
@@ -1083,8 +1084,104 @@ void Jit::Comp_Vi2f(MIPSOpcode op) {
 	fpr.ReleaseSpillLocks();
 }
 
+// Planning for true SIMD
+
+// Sequence for gathering sparse registers into one SIMD:
+// MOVSS(XMM0, fpr.R(sregs[0]));
+// MOVSS(XMM1, fpr.R(sregs[1]));
+// MOVSS(XMM2, fpr.R(sregs[2]));
+// MOVSS(XMM3, fpr.R(sregs[3]));
+// SHUFPS(XMM0, R(XMM1), _MM_SHUFFLE(0, 0, 0, 0));   // XMM0 = S1 S1 S0 S0
+// SHUFPS(XMM2, R(XMM3), _MM_SHUFFLE(0, 0, 0, 0));   // XMM2 = S3 S3 S2 S2
+// SHUFPS(XMM0, R(XMM2), _MM_SHUFFLE(2, 0, 2, 0));   // XMM0 = S3 S2 S1 S0
+// Some punpckwd etc would also work.
+
+// Sequence for scattering a SIMD register to sparse registers:
+// (Very serial though, better methods may be possible)
+// MOVSS(fpr.R(sregs[0]), XMM0);
+// SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(3, 3, 2, 1));
+// MOVSS(fpr.R(sregs[1]), XMM0);
+// SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(3, 3, 2, 1));
+// MOVSS(fpr.R(sregs[2]), XMM0);
+// SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(3, 3, 2, 1));
+// MOVSS(fpr.R(sregs[3]), XMM0);
+
+
+// Translation of ryg's half_to_float5_SSE2
 void Jit::Comp_Vh2f(MIPSOpcode op) {
-	DISABLE;
+#define SSE_CONST4(name, val) static const u32 MEMORY_ALIGNED16(name[4]) = { (val), (val), (val), (val) }
+
+	SSE_CONST4(mask_nosign,         0x7fff);
+	SSE_CONST4(magic,               (254 - 15) << 23);
+	SSE_CONST4(was_infnan,          0x7bff);
+	SSE_CONST4(exp_infnan,          255 << 23);
+	
+#undef SSE_CONST4
+
+	CONDITIONAL_DISABLE;
+	if (js.HasUnknownPrefix())
+		DISABLE;
+
+	VectorSize sz = GetVecSize(op);
+	VectorSize outsize;
+	switch (sz) {
+	case V_Single:
+		outsize = V_Pair;
+		DISABLE;
+		break;
+	case V_Pair:
+		outsize = V_Quad;
+		break;
+	}
+
+	u8 sregs[4], dregs[4];
+	GetVectorRegsPrefixS(sregs, sz, _VS);
+	GetVectorRegsPrefixD(dregs, outsize, _VD);
+
+	// Force ourselves an extra xreg as temp space.
+	X64Reg tempR = fpr.GetFreeXReg();
+	
+	MOVSS(XMM0, fpr.V(sregs[0]));
+	if (sz != V_Single) {
+		MOVSS(XMM1, fpr.V(sregs[1]));
+		PUNPCKLDQ(XMM0, R(XMM1));
+	}
+	XORPS(XMM1, R(XMM1));
+	PUNPCKLWD(XMM0, R(XMM1));
+
+	// OK, 16 bits in each word.
+	// Let's go. Deep magic here.
+	MOVAPS(XMM1, R(XMM0));
+	// MOVAPS(XMM2, R(XMM0));  // xmm2 = h
+	ANDPS(XMM0, M((void *)mask_nosign)); // xmm0 = expmant
+	XORPS(XMM1, R(XMM0));  // xmm1 = justsign = expmant ^ xmm0
+	MOVAPS(tempR, R(XMM0));
+	PCMPGTD(tempR, M((void *)was_infnan));  // xmm2 = b_wasinfnan
+	PSLLD(XMM0, 13);
+	MULPS(XMM0, M((void *)magic));  /// xmm0 = scaled
+	PSLLD(XMM1, 16);  // xmm1 = sign
+	ANDPS(tempR, M((void *)exp_infnan));
+	ORPS(XMM1, R(tempR));
+	ORPS(XMM0, R(XMM1));
+
+	fpr.MapRegsV(dregs, outsize, MAP_NOINIT | MAP_DIRTY);  
+
+	// TODO: Could apply D-prefix in parallel here...
+
+	MOVSS(fpr.V(dregs[0]), XMM0);
+	SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(3, 3, 2, 1));
+	MOVSS(fpr.V(dregs[1]), XMM0);
+
+	if (sz != V_Single) {
+		SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(3, 3, 2, 1));
+		MOVSS(fpr.V(dregs[2]), XMM0);
+		SHUFPS(XMM0, R(XMM0), _MM_SHUFFLE(3, 3, 2, 1));
+		MOVSS(fpr.V(dregs[3]), XMM0);
+	}
+
+	ApplyPrefixD(dregs, outsize);
+	gpr.UnlockAllX();
+	fpr.ReleaseSpillLocks();
 }
 
 extern const double mulTableVf2i[32] = {
@@ -1759,22 +1856,17 @@ void Jit::Comp_Vfim(MIPSOpcode op) {
 static float sincostemp[2];
 
 void SinCos(float angle) {
-#ifndef M_PI_2
-#define M_PI_2     1.57079632679489661923
-#endif
-	angle *= (float)M_PI_2;
+	angle *= (float)1.57079632679489661923;  // pi / 2
 	sincostemp[0] = sinf(angle);
 	sincostemp[1] = cosf(angle);
 }
 
 void SinCosNegSin(float angle) {
-#ifndef M_PI_2
-#define M_PI_2     1.57079632679489661923
-#endif
-	angle *= (float)M_PI_2;
+	angle *= (float)1.57079632679489661923;  // pi / 2
 	sincostemp[0] = -sinf(angle);
 	sincostemp[1] = cosf(angle);
 }
+
 // Very heavily used by FF:CC
 void Jit::Comp_VRot(MIPSOpcode op) {
 	// DISABLE;
diff --git a/Core/MIPS/x86/Jit.cpp b/Core/MIPS/x86/Jit.cpp
index 3d0f6bc36..7322ccd4e 100644
--- a/Core/MIPS/x86/Jit.cpp
+++ b/Core/MIPS/x86/Jit.cpp
@@ -18,6 +18,8 @@
 #include <algorithm>
 #include <iterator>
 
+#include "math/math_util.h"
+
 #include "Common/ChunkFile.h"
 #include "Core/Core.h"
 #include "Core/System.h"
@@ -116,11 +118,13 @@ Jit::Jit(MIPSState *mips) : blocks(mips, this), mips_(mips)
 	fpr.SetEmitter(this);
 	AllocCodeSpace(1024 * 1024 * 16);
 	asm_.Init(mips, this);
-
 	// TODO: If it becomes possible to switch from the interpreter, this should be set right.
 	js.startDefaultPrefix = true;
 }
 
+Jit::~Jit() {
+}
+
 void Jit::DoState(PointerWrap &p)
 {
 	auto s = p.Section("Jit", 1);
diff --git a/Core/MIPS/x86/Jit.h b/Core/MIPS/x86/Jit.h
index 9c8cfdbe6..b8b49ee48 100644
--- a/Core/MIPS/x86/Jit.h
+++ b/Core/MIPS/x86/Jit.h
@@ -170,6 +170,7 @@ class Jit : public Gen::XCodeBlock
 {
 public:
 	Jit(MIPSState *mips);
+	~Jit();
 	void DoState(PointerWrap &p);
 	static void DoDummyState(PointerWrap &p);
 
@@ -339,8 +340,7 @@ private:
 
 	MIPSState *mips_;
 
-	class JitSafeMem
-	{
+	class JitSafeMem {
 	public:
 		JitSafeMem(Jit *jit, MIPSGPReg raddr, s32 offset, u32 alignMask = 0xFFFFFFFF);
 
@@ -367,8 +367,7 @@ private:
 		void NextSlowRead(void *safeFunc, int suboffset);
 
 	private:
-		enum ReadType
-		{
+		enum ReadType {
 			MEM_READ,
 			MEM_WRITE,
 		};
diff --git a/Core/MIPS/x86/RegCacheFPU.h b/Core/MIPS/x86/RegCacheFPU.h
index cd8b669f2..e8f49568c 100644
--- a/Core/MIPS/x86/RegCacheFPU.h
+++ b/Core/MIPS/x86/RegCacheFPU.h
@@ -139,9 +139,9 @@ public:
 
 	MIPSState *mips;
 
-private:
-	X64Reg GetFreeXReg();
 	void FlushX(X64Reg reg);
+	X64Reg GetFreeXReg();
+private:
 	const int *GetAllocationOrder(int &count);
 
 	MIPSCachedFPReg regs[NUM_MIPS_FPRS];