diff --git a/Core/MIPS/x86/X64IRCompFPU.cpp b/Core/MIPS/x86/X64IRCompFPU.cpp
index 618e844b3d..ee43326832 100644
--- a/Core/MIPS/x86/X64IRCompFPU.cpp
+++ b/Core/MIPS/x86/X64IRCompFPU.cpp
@@ -58,6 +58,24 @@ void X64JitBackend::EmitFPUConstants() {
 	}
 }
 
+void X64JitBackend::CopyVec4ToFPRLane0(Gen::X64Reg dest, Gen::X64Reg src, int lane) {
+	// TODO: Move to regcache or emitter maybe?
+	if (lane == 0) {
+		if (dest != src)
+			MOVAPS(dest, R(src));
+	} else if (lane == 1 && cpu_info.bSSE3) {
+		MOVSHDUP(dest, R(src));
+	} else if (lane == 2) {
+		MOVHLPS(dest, src);
+	} else if (cpu_info.bAVX) {
+		VPERMILPS(128, dest, R(src), VFPU_SWIZZLE(lane, lane, lane, lane));
+	} else {
+		if (dest != src)
+			MOVAPS(dest, R(src));
+		SHUFPS(dest, R(dest), VFPU_SWIZZLE(lane, lane, lane, lane));
+	}
+}
+
 void X64JitBackend::CompIR_FArith(IRInst inst) {
 	CONDITIONAL_DISABLE;
 
@@ -174,7 +192,15 @@ void X64JitBackend::CompIR_FAssign(IRInst inst) {
 
 	switch (inst.op) {
 	case IROp::FMov:
-		if (inst.dest != inst.src1) {
+		// Just to make sure we don't generate bad code.
+		if (inst.dest == inst.src1)
+			break;
+		if (regs_.IsFPRMapped(inst.src1 & 3) && regs_.GetFPRLaneCount(inst.src1 & ~3) == 4 && (inst.dest & ~3) != (inst.src1 & ~3)) {
+			// Okay, this is an extract.  Avoid unvec4ing src1.
+			regs_.SpillLockFPR(inst.src1);
+			regs_.MapFPR(inst.dest, MIPSMap::NOINIT);
+			CopyVec4ToFPRLane0(regs_.FX(inst.dest), regs_.FX(inst.src1 & ~3), inst.src1 & 3);
+		} else {
 			regs_.Map(inst);
 			MOVAPS(regs_.FX(inst.dest), regs_.F(inst.src1));
 		}
@@ -688,31 +714,13 @@ static uint32_t x64_asin(uint32_t v) {
 void X64JitBackend::CompIR_FSpecial(IRInst inst) {
 	CONDITIONAL_DISABLE;
 
-	// TODO: Regcache... maybe emitter helper too?
-	auto laneToReg0 = [&](X64Reg dest, X64Reg src, int lane) {
-		if (lane == 0) {
-			if (dest != src)
-				MOVAPS(dest, R(src));
-		} else if (lane == 1 && cpu_info.bSSE3) {
-			MOVSHDUP(dest, R(src));
-		} else if (lane == 2) {
-			MOVHLPS(dest, src);
-		} else if (cpu_info.bAVX) {
-			VPERMILPS(128, dest, R(src), VFPU_SWIZZLE(lane, lane, lane, lane));
-		} else {
-			if (dest != src)
-				MOVAPS(dest, R(src));
-			SHUFPS(dest, R(dest), VFPU_SWIZZLE(lane, lane, lane, lane));
-		}
-	};
-
 	auto callFuncF_F = [&](const void *func) {
 		regs_.FlushBeforeCall();
 
 #if X64JIT_USE_XMM_CALL
 		if (regs_.IsFPRMapped(inst.src1)) {
 			int lane = regs_.GetFPRLane(inst.src1);
-			laneToReg0(XMM0, regs_.FX(inst.src1), lane);
+			CopyVec4ToFPRLane0(XMM0, regs_.FX(inst.src1), lane);
 		} else {
 			// Account for CTXREG being increased by 128 to reduce imm sizes.
 			int offset = offsetof(MIPSState, f) + inst.src1 * 4 - 128;
@@ -728,7 +736,7 @@ void X64JitBackend::CompIR_FSpecial(IRInst inst) {
 			if (lane == 0) {
 				MOVD_xmm(R(SCRATCH1), regs_.FX(inst.src1));
 			} else {
-				laneToReg0(XMM0, regs_.FX(inst.src1), lane);
+				CopyVec4ToFPRLane0(XMM0, regs_.FX(inst.src1), lane);
 				MOVD_xmm(R(SCRATCH1), XMM0);
 			}
 		} else {
diff --git a/Core/MIPS/x86/X64IRJit.h b/Core/MIPS/x86/X64IRJit.h
index a30268f554..cb892d80d1 100644
--- a/Core/MIPS/x86/X64IRJit.h
+++ b/Core/MIPS/x86/X64IRJit.h
@@ -124,6 +124,7 @@ private:
 	void EmitVecConstants();
 
 	Gen::OpArg PrepareSrc1Address(IRInst inst);
+	void CopyVec4ToFPRLane0(Gen::X64Reg dest, Gen::X64Reg src, int lane);
 
 	JitOptions &jo;
 	X64IRRegCache regs_;