diff --git a/Core/MIPS/ARM/ArmCompVFPU.cpp b/Core/MIPS/ARM/ArmCompVFPU.cpp index b1851f82c..9d3f5b135 100644 --- a/Core/MIPS/ARM/ArmCompVFPU.cpp +++ b/Core/MIPS/ARM/ArmCompVFPU.cpp @@ -29,6 +29,8 @@ #include "Core/MIPS/ARM/ArmJit.h" #include "Core/MIPS/ARM/ArmRegCache.h" +// Cool NEON references: +// http://www.delmarnorth.com/microwave/requirements/neon-test-tutorial.pdf const bool disablePrefixes = false; diff --git a/Core/MIPS/x86/RegCacheFPU.h b/Core/MIPS/x86/RegCacheFPU.h index e8f49568c..5f7340f25 100644 --- a/Core/MIPS/x86/RegCacheFPU.h +++ b/Core/MIPS/x86/RegCacheFPU.h @@ -32,6 +32,18 @@ using namespace Gen; // Temp regs: 4 from S prefix, 4 from T prefix, 4 from D mask, and 4 for work (worst case.) // But most of the time prefixes aren't used that heavily so we won't use all of them. +// PLANS FOR PROPER SIMD +// 1, 2, 3, and 4-vectors will be loaded into single XMM registers +// Matrices will be loaded into pairs, triads, or quads of XMM registers - simply by loading +// the columns or the rows one by one. + +// On x86 this means that only one 4x4 matrix can be fully loaded at once but that's alright. +// We might want to keep "linearized" columns in memory. + +// Implement optimized vec/matrix multiplications of all types and transposes that +// take into account in which XMM registers the values are. Fallback: Just dump out the values +// and do it the old way. + enum { NUM_TEMPS = 16, TEMP0 = 32 + 128, diff --git a/lang b/lang index 6d8cc479c..fa980bf1e 160000 --- a/lang +++ b/lang @@ -1 +1 @@ -Subproject commit 6d8cc479c8be8ba20bdab5595e5c041e7db0cdf7 +Subproject commit fa980bf1e567687c3649e4b2b461d1a223f800cc diff --git a/native b/native index e409b65e8..5bbab2794 160000 --- a/native +++ b/native @@ -1 +1 @@ -Subproject commit e409b65e85b6f1b0cc06f8b5db220aca0505ed5d +Subproject commit 5bbab2794a009061a3b5bbf6b69b3ef32d34c152