diff --git a/Core/MIPS/ARM/ArmCompVFPU.cpp b/Core/MIPS/ARM/ArmCompVFPU.cpp
index b1851f82c..9d3f5b135 100644
--- a/Core/MIPS/ARM/ArmCompVFPU.cpp
+++ b/Core/MIPS/ARM/ArmCompVFPU.cpp
@@ -29,6 +29,8 @@
 #include "Core/MIPS/ARM/ArmJit.h"
 #include "Core/MIPS/ARM/ArmRegCache.h"
 
+// Cool NEON references:
+// http://www.delmarnorth.com/microwave/requirements/neon-test-tutorial.pdf
 
 const bool disablePrefixes = false;
 
diff --git a/Core/MIPS/x86/RegCacheFPU.h b/Core/MIPS/x86/RegCacheFPU.h
index e8f49568c..5f7340f25 100644
--- a/Core/MIPS/x86/RegCacheFPU.h
+++ b/Core/MIPS/x86/RegCacheFPU.h
@@ -32,6 +32,18 @@ using namespace Gen;
 // Temp regs: 4 from S prefix, 4 from T prefix, 4 from D mask, and 4 for work (worst case.)
 // But most of the time prefixes aren't used that heavily so we won't use all of them.
 
+// PLANS FOR PROPER SIMD
+// 1, 2, 3, and 4-vectors will be loaded into single XMM registers
+// Matrices will be loaded into pairs, triads, or quads of XMM registers - simply by loading
+// the columns or the rows one by one.
+
+// On x86 this means that only one 4x4 matrix can be fully loaded at once but that's alright.
+// We might want to keep "linearized" columns in memory.
+
+// Implement optimized vec/matrix multiplications of all types and transposes that
+// take into account in which XMM registers the values are. Fallback: Just dump out the values
+// and do it the old way.
+
 enum {
 	NUM_TEMPS = 16,
 	TEMP0 = 32 + 128,
diff --git a/lang b/lang
index 6d8cc479c..fa980bf1e 160000
--- a/lang
+++ b/lang
@@ -1 +1 @@
-Subproject commit 6d8cc479c8be8ba20bdab5595e5c041e7db0cdf7
+Subproject commit fa980bf1e567687c3649e4b2b461d1a223f800cc
diff --git a/native b/native
index e409b65e8..5bbab2794 160000
--- a/native
+++ b/native
@@ -1 +1 @@
-Subproject commit e409b65e85b6f1b0cc06f8b5db220aca0505ed5d
+Subproject commit 5bbab2794a009061a3b5bbf6b69b3ef32d34c152