commit 7d085468536a58c2ea1959e6e76939aae1f6944b
Author: gabest <gabest@627e133f-a60d-0410-aeef-a778af760ea1>
Date:   Sat Dec 8 04:50:48 2007 +0000

diff --git a/GSdx_vs2005.sln b/GSdx_vs2005.sln
new file mode 100644
index 0000000..4fef422
--- /dev/null
+++ b/GSdx_vs2005.sln
@@ -0,0 +1,59 @@
+﻿
+Microsoft Visual Studio Solution File, Format Version 9.00
+# Visual Studio 2005
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "GSdx", "gsdx\GSdx_vs2005.vcproj", "{18E42F6F-3A62-41EE-B42F-79366C4F1E95}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "GSdx10", "gsdx10\GSdx10_vs2005.vcproj", "{345C9F24-0B9A-4289-B375-ADD3B63461B7}"
+	ProjectSection(ProjectDependencies) = postProject
+		{18E42F6F-3A62-41EE-B42F-79366C4F1E95} = {18E42F6F-3A62-41EE-B42F-79366C4F1E95}
+	EndProjectSection
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug SSE2|Win32 = Debug SSE2|Win32
+		Debug SSE2|x64 = Debug SSE2|x64
+		Debug|Win32 = Debug|Win32
+		Debug|x64 = Debug|x64
+		Release SSE2|Win32 = Release SSE2|Win32
+		Release SSE2|x64 = Release SSE2|x64
+		Release|Win32 = Release|Win32
+		Release|x64 = Release|x64
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug SSE2|Win32.ActiveCfg = Debug SSE2|Win32
+		{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug SSE2|Win32.Build.0 = Debug SSE2|Win32
+		{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug SSE2|x64.ActiveCfg = Debug SSE2|x64
+		{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug SSE2|x64.Build.0 = Debug SSE2|x64
+		{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug|Win32.ActiveCfg = Debug|Win32
+		{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug|Win32.Build.0 = Debug|Win32
+		{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug|x64.ActiveCfg = Debug|x64
+		{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug|x64.Build.0 = Debug|x64
+		{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSE2|Win32.ActiveCfg = Release SSE2|Win32
+		{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSE2|Win32.Build.0 = Release SSE2|Win32
+		{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSE2|x64.ActiveCfg = Release SSE2|x64
+		{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSE2|x64.Build.0 = Release SSE2|x64
+		{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release|Win32.ActiveCfg = Release|Win32
+		{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release|Win32.Build.0 = Release|Win32
+		{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release|x64.ActiveCfg = Release|x64
+		{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release|x64.Build.0 = Release|x64
+		{345C9F24-0B9A-4289-B375-ADD3B63461B7}.Debug SSE2|Win32.ActiveCfg = Debug SSE2|Win32
+		{345C9F24-0B9A-4289-B375-ADD3B63461B7}.Debug SSE2|Win32.Build.0 = Debug SSE2|Win32
+		{345C9F24-0B9A-4289-B375-ADD3B63461B7}.Debug SSE2|x64.ActiveCfg = Debug SSE2|x64
+		{345C9F24-0B9A-4289-B375-ADD3B63461B7}.Debug SSE2|x64.Build.0 = Debug SSE2|x64
+		{345C9F24-0B9A-4289-B375-ADD3B63461B7}.Debug|Win32.ActiveCfg = Debug|Win32
+		{345C9F24-0B9A-4289-B375-ADD3B63461B7}.Debug|Win32.Build.0 = Debug|Win32
+		{345C9F24-0B9A-4289-B375-ADD3B63461B7}.Debug|x64.ActiveCfg = Debug|x64
+		{345C9F24-0B9A-4289-B375-ADD3B63461B7}.Debug|x64.Build.0 = Debug|x64
+		{345C9F24-0B9A-4289-B375-ADD3B63461B7}.Release SSE2|Win32.ActiveCfg = Release SSE2|Win32
+		{345C9F24-0B9A-4289-B375-ADD3B63461B7}.Release SSE2|Win32.Build.0 = Release SSE2|Win32
+		{345C9F24-0B9A-4289-B375-ADD3B63461B7}.Release SSE2|x64.ActiveCfg = Release SSE2|x64
+		{345C9F24-0B9A-4289-B375-ADD3B63461B7}.Release SSE2|x64.Build.0 = Release SSE2|x64
+		{345C9F24-0B9A-4289-B375-ADD3B63461B7}.Release|Win32.ActiveCfg = Release|Win32
+		{345C9F24-0B9A-4289-B375-ADD3B63461B7}.Release|Win32.Build.0 = Release|Win32
+		{345C9F24-0B9A-4289-B375-ADD3B63461B7}.Release|x64.ActiveCfg = Release|x64
+		{345C9F24-0B9A-4289-B375-ADD3B63461B7}.Release|x64.Build.0 = Release|x64
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal
diff --git a/GSdx_vs2008.sln b/GSdx_vs2008.sln
new file mode 100644
index 0000000..c193102
--- /dev/null
+++ b/GSdx_vs2008.sln
@@ -0,0 +1,59 @@
+﻿
+Microsoft Visual Studio Solution File, Format Version 10.00
+# Visual Studio 2008
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "GSdx", "gsdx\GSdx_vs2008.vcproj", "{18E42F6F-3A62-41EE-B42F-79366C4F1E95}"
+EndProject
+Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "GSdx10", "gsdx10\GSdx10_vs2008.vcproj", "{345C9F24-0B9A-4289-B375-ADD3B63461B7}"
+	ProjectSection(ProjectDependencies) = postProject
+		{18E42F6F-3A62-41EE-B42F-79366C4F1E95} = {18E42F6F-3A62-41EE-B42F-79366C4F1E95}
+	EndProjectSection
+EndProject
+Global
+	GlobalSection(SolutionConfigurationPlatforms) = preSolution
+		Debug SSE2|Win32 = Debug SSE2|Win32
+		Debug SSE2|x64 = Debug SSE2|x64
+		Debug|Win32 = Debug|Win32
+		Debug|x64 = Debug|x64
+		Release SSE2|Win32 = Release SSE2|Win32
+		Release SSE2|x64 = Release SSE2|x64
+		Release|Win32 = Release|Win32
+		Release|x64 = Release|x64
+	EndGlobalSection
+	GlobalSection(ProjectConfigurationPlatforms) = postSolution
+		{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug SSE2|Win32.ActiveCfg = Debug SSE2|Win32
+		{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug SSE2|Win32.Build.0 = Debug SSE2|Win32
+		{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug SSE2|x64.ActiveCfg = Debug SSE2|x64
+		{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug SSE2|x64.Build.0 = Debug SSE2|x64
+		{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug|Win32.ActiveCfg = Debug|Win32
+		{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug|Win32.Build.0 = Debug|Win32
+		{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug|x64.ActiveCfg = Debug|x64
+		{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Debug|x64.Build.0 = Debug|x64
+		{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSE2|Win32.ActiveCfg = Release SSE2|Win32
+		{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSE2|Win32.Build.0 = Release SSE2|Win32
+		{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSE2|x64.ActiveCfg = Release SSE2|x64
+		{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release SSE2|x64.Build.0 = Release SSE2|x64
+		{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release|Win32.ActiveCfg = Release|Win32
+		{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release|Win32.Build.0 = Release|Win32
+		{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release|x64.ActiveCfg = Release|x64
+		{18E42F6F-3A62-41EE-B42F-79366C4F1E95}.Release|x64.Build.0 = Release|x64
+		{345C9F24-0B9A-4289-B375-ADD3B63461B7}.Debug SSE2|Win32.ActiveCfg = Debug SSE2|Win32
+		{345C9F24-0B9A-4289-B375-ADD3B63461B7}.Debug SSE2|Win32.Build.0 = Debug SSE2|Win32
+		{345C9F24-0B9A-4289-B375-ADD3B63461B7}.Debug SSE2|x64.ActiveCfg = Debug SSE2|x64
+		{345C9F24-0B9A-4289-B375-ADD3B63461B7}.Debug SSE2|x64.Build.0 = Debug SSE2|x64
+		{345C9F24-0B9A-4289-B375-ADD3B63461B7}.Debug|Win32.ActiveCfg = Debug|Win32
+		{345C9F24-0B9A-4289-B375-ADD3B63461B7}.Debug|Win32.Build.0 = Debug|Win32
+		{345C9F24-0B9A-4289-B375-ADD3B63461B7}.Debug|x64.ActiveCfg = Debug|x64
+		{345C9F24-0B9A-4289-B375-ADD3B63461B7}.Debug|x64.Build.0 = Debug|x64
+		{345C9F24-0B9A-4289-B375-ADD3B63461B7}.Release SSE2|Win32.ActiveCfg = Release SSE2|Win32
+		{345C9F24-0B9A-4289-B375-ADD3B63461B7}.Release SSE2|Win32.Build.0 = Release SSE2|Win32
+		{345C9F24-0B9A-4289-B375-ADD3B63461B7}.Release SSE2|x64.ActiveCfg = Release SSE2|x64
+		{345C9F24-0B9A-4289-B375-ADD3B63461B7}.Release SSE2|x64.Build.0 = Release SSE2|x64
+		{345C9F24-0B9A-4289-B375-ADD3B63461B7}.Release|Win32.ActiveCfg = Release|Win32
+		{345C9F24-0B9A-4289-B375-ADD3B63461B7}.Release|Win32.Build.0 = Release|Win32
+		{345C9F24-0B9A-4289-B375-ADD3B63461B7}.Release|x64.ActiveCfg = Release|x64
+		{345C9F24-0B9A-4289-B375-ADD3B63461B7}.Release|x64.Build.0 = Release|x64
+	EndGlobalSection
+	GlobalSection(SolutionProperties) = preSolution
+		HideSolutionNode = FALSE
+	EndGlobalSection
+EndGlobal
diff --git a/common.vsprops b/common.vsprops
new file mode 100644
index 0000000..8b4a228
--- /dev/null
+++ b/common.vsprops
@@ -0,0 +1,25 @@
+<?xml version="1.0" encoding="windows-1250"?>
+<VisualStudioPropertySheet
+	ProjectType="Visual C++"
+	Version="8.00"
+	Name="common"
+	OutputDirectory="$(PlatformName)\$(ConfigurationName)"
+	IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+	>
+	<Tool
+		Name="VCCLCompilerTool"
+		EnableIntrinsicFunctions="true"
+		PreprocessorDefinitions="WIN32;_WINDOWS"
+		FloatingPointModel="2"
+		RuntimeTypeInfo="false"
+		WarningLevel="4"
+		DebugInformationFormat="3"
+		DisableSpecificWarnings="4995;4324"
+	/>
+	<Tool
+		Name="VCLinkerTool"
+		GenerateDebugInformation="true"
+		SubSystem="2"
+		RandomizedBaseAddress="1"
+	/>
+</VisualStudioPropertySheet>
diff --git a/debug.vsprops b/debug.vsprops
new file mode 100644
index 0000000..9d5c60b
--- /dev/null
+++ b/debug.vsprops
@@ -0,0 +1,19 @@
+<?xml version="1.0" encoding="windows-1250"?>
+<VisualStudioPropertySheet
+	ProjectType="Visual C++"
+	Version="8.00"
+	Name="debug"
+	>
+	<Tool
+		Name="VCCLCompilerTool"
+		Optimization="0"
+		PreprocessorDefinitions="_DEBUG"
+		MinimalRebuild="true"
+		BasicRuntimeChecks="1"
+		RuntimeLibrary="1"
+	/>
+	<Tool
+		Name="VCLinkerTool"
+		LinkIncremental="2"
+	/>
+</VisualStudioPropertySheet>
diff --git a/gsdx/GS.h b/gsdx/GS.h
new file mode 100644
index 0000000..5e2dab3
--- /dev/null
+++ b/gsdx/GS.h
@@ -0,0 +1,958 @@
+/* 
+ *	Copyright (C) 2007 Gabest
+ *	http://www.gabest.org
+ *
+ *  This Program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *   
+ *  This Program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *   
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
+ *  http://www.gnu.org/copyleft/gpl.html
+ *
+ *	Special Notes: 
+ *
+ *	Register definitions and most of the enums originate from sps2dev-0.4.0
+ *	Copyright (C) 2002 Terratron Technologies Inc.  All Rights Reserved.
+ *
+ */
+
+#pragma once
+
+// 
+
+#pragma pack(push, 1)
+
+//
+// sps2registers.h
+//
+
+enum GS_REG
+{
+	GS_PMODE	= 0x12000000,
+	GS_SMODE1	= 0x12000010,
+	GS_SMODE2	= 0x12000020,
+	GS_SRFSH	= 0x12000030,
+	GS_SYNCH1	= 0x12000040,
+	GS_SYNCH2	= 0x12000050,
+	GS_SYNCV	= 0x12000060,
+	GS_DISPFB1	= 0x12000070,
+	GS_DISPLAY1	= 0x12000080,
+	GS_DISPFB2	= 0x12000090,
+	GS_DISPLAY2	= 0x120000a0,
+	GS_EXTBUF	= 0x120000b0,
+	GS_EXTDATA	= 0x120000c0,
+	GS_EXTWRITE	= 0x120000d0,
+	GS_BGCOLOR	= 0x120000e0,
+	GS_UNKNOWN	= 0x12000400,
+	GS_CSR		= 0x12001000,
+	GS_IMR		= 0x12001010,
+	GS_BUSDIR	= 0x12001040,
+	GS_SIGLBLID	= 0x12001080
+};
+
+enum GS_PRIM
+{
+	GS_POINTLIST		= 0,
+	GS_LINELIST			= 1,
+	GS_LINESTRIP		= 2,
+	GS_TRIANGLELIST		= 3,
+	GS_TRIANGLESTRIP	= 4,
+	GS_TRIANGLEFAN		= 5,
+	GS_SPRITE			= 6,
+	GS_INVALID			= 7,
+};
+
+enum GIF_REG
+{
+	GIF_REG_PRIM	= 0x00,
+	GIF_REG_RGBA	= 0x01,
+	GIF_REG_STQ		= 0x02,
+	GIF_REG_UV		= 0x03,
+	GIF_REG_XYZF2	= 0x04,
+	GIF_REG_XYZ2	= 0x05,
+	GIF_REG_TEX0_1	= 0x06,
+	GIF_REG_TEX0_2	= 0x07,
+	GIF_REG_CLAMP_1	= 0x08,
+	GIF_REG_CLAMP_2	= 0x09,
+	GIF_REG_FOG		= 0x0a,
+	GIF_REG_XYZF3	= 0x0c,
+	GIF_REG_XYZ3	= 0x0d,
+	GIF_REG_A_D		= 0x0e,
+	GIF_REG_NOP		= 0x0f,
+};
+
+enum GIF_A_D_REG
+{
+	GIF_A_D_REG_PRIM		= 0x00,
+	GIF_A_D_REG_RGBAQ		= 0x01,
+	GIF_A_D_REG_ST			= 0x02,
+	GIF_A_D_REG_UV			= 0x03,
+	GIF_A_D_REG_XYZF2		= 0x04,
+	GIF_A_D_REG_XYZ2		= 0x05,
+	GIF_A_D_REG_TEX0_1		= 0x06,
+	GIF_A_D_REG_TEX0_2		= 0x07,
+	GIF_A_D_REG_CLAMP_1		= 0x08,
+	GIF_A_D_REG_CLAMP_2		= 0x09,
+	GIF_A_D_REG_FOG			= 0x0a,
+	GIF_A_D_REG_XYZF3		= 0x0c,
+	GIF_A_D_REG_XYZ3		= 0x0d,
+	GIF_A_D_REG_NOP			= 0x0f,
+	GIF_A_D_REG_TEX1_1		= 0x14,
+	GIF_A_D_REG_TEX1_2		= 0x15,
+	GIF_A_D_REG_TEX2_1		= 0x16,
+	GIF_A_D_REG_TEX2_2		= 0x17,
+	GIF_A_D_REG_XYOFFSET_1	= 0x18,
+	GIF_A_D_REG_XYOFFSET_2	= 0x19,
+	GIF_A_D_REG_PRMODECONT	= 0x1a,
+	GIF_A_D_REG_PRMODE		= 0x1b,
+	GIF_A_D_REG_TEXCLUT		= 0x1c,
+	GIF_A_D_REG_SCANMSK		= 0x22,
+	GIF_A_D_REG_MIPTBP1_1	= 0x34,
+	GIF_A_D_REG_MIPTBP1_2	= 0x35,
+	GIF_A_D_REG_MIPTBP2_1	= 0x36,
+	GIF_A_D_REG_MIPTBP2_2	= 0x37,
+	GIF_A_D_REG_TEXA		= 0x3b,
+	GIF_A_D_REG_FOGCOL		= 0x3d,
+	GIF_A_D_REG_TEXFLUSH	= 0x3f,
+	GIF_A_D_REG_SCISSOR_1	= 0x40,
+	GIF_A_D_REG_SCISSOR_2	= 0x41,
+	GIF_A_D_REG_ALPHA_1		= 0x42,
+	GIF_A_D_REG_ALPHA_2		= 0x43,
+	GIF_A_D_REG_DIMX		= 0x44,
+	GIF_A_D_REG_DTHE		= 0x45,
+	GIF_A_D_REG_COLCLAMP	= 0x46,
+	GIF_A_D_REG_TEST_1		= 0x47,
+	GIF_A_D_REG_TEST_2		= 0x48,
+	GIF_A_D_REG_PABE		= 0x49,
+	GIF_A_D_REG_FBA_1		= 0x4a,
+	GIF_A_D_REG_FBA_2		= 0x4b,
+	GIF_A_D_REG_FRAME_1		= 0x4c,
+	GIF_A_D_REG_FRAME_2		= 0x4d,
+	GIF_A_D_REG_ZBUF_1		= 0x4e,
+	GIF_A_D_REG_ZBUF_2		= 0x4f,
+	GIF_A_D_REG_BITBLTBUF	= 0x50,
+	GIF_A_D_REG_TRXPOS		= 0x51,
+	GIF_A_D_REG_TRXREG		= 0x52,
+	GIF_A_D_REG_TRXDIR		= 0x53,
+	GIF_A_D_REG_HWREG		= 0x54,
+	GIF_A_D_REG_SIGNAL		= 0x60,
+	GIF_A_D_REG_FINISH		= 0x61,
+	GIF_A_D_REG_LABEL		= 0x62,
+};
+
+enum GIF_FLG
+{
+	GIF_FLG_PACKED	= 0,
+	GIF_FLG_REGLIST	= 1,
+	GIF_FLG_IMAGE	= 2,
+	GIF_FLG_IMAGE2	= 3
+};
+
+enum PSM
+{
+	PSM_PSMCT32		= 0,  // 0000-0000 
+	PSM_PSMCT24		= 1,  // 0000-0001 
+	PSM_PSMCT16		= 2,  // 0000-0010 
+	PSM_PSMCT16S	= 10, // 0000-1010 
+	PSM_PSMT8		= 19, // 0001-0011 
+	PSM_PSMT4		= 20, // 0001-0100 
+	PSM_PSMT8H		= 27, // 0001-1011
+	PSM_PSMT4HL		= 36, // 0010-0100
+	PSM_PSMT4HH		= 44, // 0010-1100
+	PSM_PSMZ32		= 48, // 0011-0000
+	PSM_PSMZ24		= 49, // 0011-0001
+	PSM_PSMZ16		= 50, // 0011-0010
+	PSM_PSMZ16S		= 58, // 0011-1010
+};
+
+//
+// sps2regstructs.h
+//
+
+#define REG64(name) \
+union name			\
+{					\
+	UINT64 i64;		\
+	UINT32 ai32[2];	\
+	struct {		\
+
+#define REG128(name)\
+union name			\
+{					\
+	UINT64 ai64[2];	\
+	UINT32 ai32[4];	\
+	struct {		\
+
+#define REG64_(prefix, name) REG64(prefix##name)
+#define REG128_(prefix, name) REG128(prefix##name)
+
+#define REG_END }; };
+#define REG_END2 };
+
+#define REG64_SET(name) \
+union name			\
+{					\
+	UINT64 i64;		\
+	UINT32 ai32[2];	\
+
+#define REG128_SET(name)\
+union name			\
+{					\
+	__m128i ai128;  \
+	UINT64 ai64[2];	\
+	UINT32 ai32[4];	\
+
+#define REG_SET_END };
+
+REG64_(GSReg, BGCOLOR)
+	UINT32 R:8;
+	UINT32 G:8;
+	UINT32 B:8;
+	UINT32 _PAD1:8;
+	UINT32 _PAD2:32;
+REG_END
+
+REG64_(GSReg, BUSDIR)
+	UINT32 DIR:1;
+	UINT32 _PAD1:31;
+	UINT32 _PAD2:32;
+REG_END
+
+REG64_(GSReg, CSR)
+	UINT32 rSIGNAL:1;
+	UINT32 rFINISH:1;
+	UINT32 rHSINT:1;
+	UINT32 rVSINT:1;
+	UINT32 rEDWINT:1;
+	UINT32 rZERO1:1;
+	UINT32 rZERO2:1;
+	UINT32 r_PAD1:1;
+	UINT32 rFLUSH:1;
+	UINT32 rRESET:1;
+	UINT32 r_PAD2:2;
+	UINT32 rNFIELD:1;
+	UINT32 rFIELD:1;
+	UINT32 rFIFO:2;
+	UINT32 rREV:8;
+	UINT32 rID:8;
+	UINT32 wSIGNAL:1;
+	UINT32 wFINISH:1;
+	UINT32 wHSINT:1;
+	UINT32 wVSINT:1;
+	UINT32 wEDWINT:1;
+	UINT32 wZERO1:1;
+	UINT32 wZERO2:1;
+	UINT32 w_PAD1:1;
+	UINT32 wFLUSH:1;
+	UINT32 wRESET:1;
+	UINT32 w_PAD2:2;
+	UINT32 wNFIELD:1;
+	UINT32 wFIELD:1;
+	UINT32 wFIFO:2;
+	UINT32 wREV:8;
+	UINT32 wID:8;
+REG_END
+
+REG64_(GSReg, DISPFB) // (-1/2)
+	UINT32 FBP:9;
+	UINT32 FBW:6;
+	UINT32 PSM:5;
+	UINT32 _PAD:12;
+	UINT32 DBX:11;
+	UINT32 DBY:11;
+	UINT32 _PAD2:10;
+REG_END2
+	UINT32 Block() {return FBP<<5;}
+REG_END2
+
+REG64_(GSReg, DISPLAY) // (-1/2)
+	UINT32 DX:12;
+	UINT32 DY:11;
+	UINT32 MAGH:4;
+	UINT32 MAGV:2;
+	UINT32 _PAD:3;
+	UINT32 DW:12;
+	UINT32 DH:11;
+	UINT32 _PAD2:9;
+REG_END
+
+REG64_(GSReg, EXTBUF)
+	UINT32 EXBP:14;
+	UINT32 EXBW:6;
+	UINT32 FBIN:2;
+	UINT32 WFFMD:1;
+	UINT32 EMODA:2;
+	UINT32 EMODC:2;
+	UINT32 _PAD1:5;
+	UINT32 WDX:11;
+	UINT32 WDY:11;
+	UINT32 _PAD2:10;
+REG_END
+
+REG64_(GSReg, EXTDATA)
+	UINT32 SX:12;
+	UINT32 SY:11;
+	UINT32 SMPH:4;
+	UINT32 SMPV:2;
+	UINT32 _PAD1:3;
+	UINT32 WW:12;
+	UINT32 WH:11;
+	UINT32 _PAD2:9;
+REG_END
+
+REG64_(GSReg, EXTWRITE)
+	UINT32 WRITE:1;
+	UINT32 _PAD1:31;
+	UINT32 _PAD2:32;
+REG_END
+
+REG64_(GSReg, IMR)
+	UINT32 _PAD1:8;
+	UINT32 SIGMSK:1;
+	UINT32 FINISHMSK:1;
+	UINT32 HSMSK:1;
+	UINT32 VSMSK:1;
+	UINT32 EDWMSK:1;
+	UINT32 _PAD2:19;
+	UINT32 _PAD3:32;
+REG_END
+
+REG64_(GSReg, PMODE)
+	UINT32 EN1:1;
+	UINT32 EN2:1;
+	UINT32 CRTMD:3;
+	UINT32 MMOD:1;
+	UINT32 AMOD:1;
+	UINT32 SLBG:1;
+	UINT32 ALP:8;
+	UINT32 _PAD:16;
+	UINT32 _PAD1:32;
+REG_END
+
+REG64_(GSReg, SIGLBLID)
+	UINT32 SIGID:32;
+	UINT32 LBLID:32;
+REG_END
+
+REG64_(GSReg, SMODE1)
+	UINT32 RC:3;
+	UINT32 LC:7;
+	UINT32 T1248:2;
+	UINT32 SLCK:1;
+	UINT32 CMOD:2;
+	UINT32 EX:1;
+	UINT32 PRST:1;
+	UINT32 SINT:1;
+	UINT32 XPCK:1;
+	UINT32 PCK2:2;
+	UINT32 SPML:4;
+	UINT32 GCONT:1;
+	UINT32 PHS:1;
+	UINT32 PVS:1;
+	UINT32 PEHS:1;
+	UINT32 PEVS:1;
+	UINT32 CLKSEL:2;
+	UINT32 NVCK:1;
+	UINT32 SLCK2:1;
+	UINT32 VCKSEL:2;
+	UINT32 VHP:1;
+	UINT32 _PAD1:27;
+REG_END
+
+REG64_(GSReg, SMODE2)
+	UINT32 INT:1;
+	UINT32 FFMD:1;
+	UINT32 DPMS:2;
+	UINT32 _PAD2:28;
+	UINT32 _PAD3:32;
+REG_END
+
+REG64_SET(GSReg)
+	GSRegBGCOLOR	BGCOLOR;
+	GSRegBUSDIR		BUSDIR;
+	GSRegCSR		CSR;
+	GSRegDISPFB		DISPFB;
+	GSRegDISPLAY	DISPLAY;
+	GSRegEXTBUF		EXTBUF;
+	GSRegEXTDATA	EXTDATA;
+	GSRegEXTWRITE	EXTWRITE;
+	GSRegIMR		IMR;
+	GSRegPMODE		PMODE;
+	GSRegSIGLBLID	SIGLBLID;
+	GSRegSMODE1		SMODE1; 
+	GSRegSMODE2		SMODE2; 
+REG_SET_END
+
+//
+// sps2tags.h
+//
+
+#define SET_GIF_REG(gifTag, iRegNo, uiValue) \
+	{((GIFTag*)&gifTag)->ai64[1] |= (((uiValue) & 0xf) << ((iRegNo) << 2));}
+
+#ifdef _M_AMD64
+#define GET_GIF_REG(tag, reg) \
+	(((tag).ai64[1] >> ((reg) << 2)) & 0xf)
+#else
+#define GET_GIF_REG(tag, reg) \
+	(((tag).ai32[2 + ((reg) >> 3)] >> (((reg) & 7) << 2)) & 0xf)
+#endif
+
+//
+// GIFTag
+
+REG128(GIFTag)
+	UINT32 NLOOP:15;
+	UINT32 EOP:1;
+	UINT32 _PAD1:16;
+	UINT32 _PAD2:14;
+	UINT32 PRE:1;
+	UINT32 PRIM:11;
+	UINT32 FLG:2; // enum GIF_FLG
+	UINT32 NREG:4;
+	UINT64 REGS:64;
+REG_END
+
+// GIFReg
+
+REG64_(GIFReg, ALPHA)
+	UINT32 A:2;
+	UINT32 B:2;
+	UINT32 C:2;
+	UINT32 D:2;
+	UINT32 _PAD1:24;
+	UINT32 FIX:8;
+	UINT32 _PAD2:24;
+REG_END
+
+REG64_(GIFReg, BITBLTBUF)
+	UINT32 SBP:14;
+	UINT32 _PAD1:2;
+	UINT32 SBW:6;
+	UINT32 _PAD2:2;
+	UINT32 SPSM:6;
+	UINT32 _PAD3:2;
+	UINT32 DBP:14;
+	UINT32 _PAD4:2;
+	UINT32 DBW:6;
+	UINT32 _PAD5:2;
+	UINT32 DPSM:6;
+	UINT32 _PAD6:2;
+REG_END
+
+REG64_(GIFReg, CLAMP)
+	UINT64 WMS:2;
+	UINT64 WMT:2;
+	UINT64 MINU:10;
+	UINT64 MAXU:10;
+	UINT64 MINV:10;
+	UINT64 MAXV:10;
+	UINT64 _PAD:20;
+REG_END
+
+REG64_(GIFReg, COLCLAMP)
+	UINT32 CLAMP:1;
+	UINT32 _PAD1:31;
+	UINT32 _PAD2:32;
+REG_END
+
+REG64_(GIFReg, DIMX)
+	UINT32 DM00:3;
+	UINT32 _PAD00:1;
+	UINT32 DM01:3;
+	UINT32 _PAD01:1;
+	UINT32 DM02:3;
+	UINT32 _PAD02:1;
+	UINT32 DM03:3;
+	UINT32 _PAD03:1;
+
+	UINT32 DM10:3;
+	UINT32 _PAD10:1;
+	UINT32 DM11:3;
+	UINT32 _PAD11:1;
+	UINT32 DM12:3;
+	UINT32 _PAD12:1;
+	UINT32 DM13:3;
+	UINT32 _PAD13:1;
+
+	UINT32 DM20:3;
+	UINT32 _PAD20:1;
+	UINT32 DM21:3;
+	UINT32 _PAD21:1;
+	UINT32 DM22:3;
+	UINT32 _PAD22:1;
+	UINT32 DM23:3;
+	UINT32 _PAD23:1;
+
+	UINT32 DM30:3;
+	UINT32 _PAD30:1;
+	UINT32 DM31:3;
+	UINT32 _PAD31:1;
+	UINT32 DM32:3;
+	UINT32 _PAD32:1;
+	UINT32 DM33:3;
+	UINT32 _PAD33:1;
+REG_END
+
+REG64_(GIFReg, DTHE)
+	UINT32 DTHE:1;
+	UINT32 _PAD1:31;
+	UINT32 _PAD2:32;
+REG_END
+
+REG64_(GIFReg, FBA)
+	UINT32 FBA:1;
+	UINT32 _PAD1:31;
+	UINT32 _PAD2:32;
+REG_END
+
+REG64_(GIFReg, FINISH)
+	UINT32 _PAD1:32;
+	UINT32 _PAD2:32;
+REG_END
+
+REG64_(GIFReg, FOG)
+	UINT32 _PAD1:32;
+	UINT32 _PAD2:24;
+	UINT32 F:8;
+REG_END
+
+REG64_(GIFReg, FOGCOL)
+	UINT32 FCR:8;
+	UINT32 FCG:8;
+	UINT32 FCB:8;
+	UINT32 _PAD1:8;
+	UINT32 _PAD2:32;
+REG_END
+
+REG64_(GIFReg, FRAME)
+	UINT32 FBP:9;
+	UINT32 _PAD1:7;
+	UINT32 FBW:6;
+	UINT32 _PAD2:2;
+	UINT32 PSM:6;
+	UINT32 _PAD3:2;
+	UINT32 FBMSK:32;
+REG_END2
+	UINT32 Block() {return FBP<<5;}
+REG_END2
+
+REG64_(GIFReg, HWREG)
+	UINT32 DATA_LOWER:32;
+	UINT32 DATA_UPPER:32;
+REG_END
+
+REG64_(GIFReg, LABEL)
+	UINT32 ID:32;
+	UINT32 IDMSK:32;
+REG_END
+
+REG64_(GIFReg, MIPTBP1)
+	UINT64 TBP1:14;
+	UINT64 TBW1:6;
+	UINT64 TBP2:14;
+	UINT64 TBW2:6;
+	UINT64 TBP3:14;
+	UINT64 TBW3:6;
+	UINT64 _PAD:4;
+REG_END
+
+REG64_(GIFReg, MIPTBP2)
+	UINT64 TBP4:14;
+	UINT64 TBW4:6;
+	UINT64 TBP5:14;
+	UINT64 TBW5:6;
+	UINT64 TBP6:14;
+	UINT64 TBW6:6;
+	UINT64 _PAD:4;
+REG_END
+
+REG64_(GIFReg, NOP)
+	UINT32 _PAD1:32;
+	UINT32 _PAD2:32;
+REG_END
+
+REG64_(GIFReg, PABE)
+	UINT32 PABE:1;
+	UINT32 _PAD1:31;
+	UINT32 _PAD2:32;
+REG_END
+
+REG64_(GIFReg, PRIM)
+	UINT32 PRIM:3;
+	UINT32 IIP:1;
+	UINT32 TME:1;
+	UINT32 FGE:1;
+	UINT32 ABE:1;
+	UINT32 AA1:1;
+	UINT32 FST:1;
+	UINT32 CTXT:1;
+	UINT32 FIX:1;
+	UINT32 _PAD1:21;
+	UINT32 _PAD2:32;
+REG_END
+
+REG64_(GIFReg, PRMODE)
+	UINT32 _PRIM:3;
+	UINT32 IIP:1;
+	UINT32 TME:1;
+	UINT32 FGE:1;
+	UINT32 ABE:1;
+	UINT32 AA1:1;
+	UINT32 FST:1;
+	UINT32 CTXT:1;
+	UINT32 FIX:1;
+	UINT32 _PAD2:21;
+	UINT32 _PAD3:32;
+REG_END
+
+REG64_(GIFReg, PRMODECONT)
+	UINT32 AC:1;
+	UINT32 _PAD1:31;
+	UINT32 _PAD2:32;
+REG_END
+
+REG64_(GIFReg, RGBAQ)
+	UINT32 R:8;
+	UINT32 G:8;
+	UINT32 B:8;
+	UINT32 A:8;
+	float Q;
+REG_END
+
+REG64_(GIFReg, SCANMSK)
+	UINT32 MSK:2;
+	UINT32 _PAD1:30;
+	UINT32 _PAD2:32;
+REG_END
+
+REG64_(GIFReg, SCISSOR)
+	UINT32 SCAX0:11;
+	UINT32 _PAD1:5;
+	UINT32 SCAX1:11;
+	UINT32 _PAD2:5;
+	UINT32 SCAY0:11;
+	UINT32 _PAD3:5;
+	UINT32 SCAY1:11;
+	UINT32 _PAD4:5;
+REG_END
+
+REG64_(GIFReg, SIGNAL)
+	UINT32 ID:32;
+	UINT32 IDMSK:32;
+REG_END
+
+REG64_(GIFReg, ST)
+	float S;
+	float T;
+REG_END
+
+REG64_(GIFReg, TEST)
+	UINT32 ATE:1;
+	UINT32 ATST:3;
+	UINT32 AREF:8;
+	UINT32 AFAIL:2;
+	UINT32 DATE:1;
+	UINT32 DATM:1;
+	UINT32 ZTE:1;
+	UINT32 ZTST:2;
+	UINT32 _PAD1:13;
+	UINT32 _PAD2:32;
+REG_END
+
+REG64_(GIFReg, TEX0)
+	UINT64 TBP0:14;
+	UINT64 TBW:6;
+	UINT64 PSM:6;
+	UINT64 TW:4;
+	UINT64 TH:4;
+	UINT64 TCC:1;
+	UINT64 TFX:2;
+	UINT64 CBP:14;
+	UINT64 CPSM:4;
+	UINT64 CSM:1;
+	UINT64 CSA:5;
+	UINT64 CLD:3;
+REG_END
+
+REG64_(GIFReg, TEX1)
+	UINT32 LCM:1;
+	UINT32 _PAD1:1;
+	UINT32 MXL:3;
+	UINT32 MMAG:1;
+	UINT32 MMIN:3;
+	UINT32 MTBA:1;
+	UINT32 _PAD2:9;
+	UINT32 L:2;
+	UINT32 _PAD3:11;
+	UINT32 K:12;
+	UINT32 _PAD4:20;
+REG_END
+
+REG64_(GIFReg, TEX2)
+	UINT32 _PAD1:20;
+	UINT32 PSM:6;
+	UINT32 _PAD2:6;
+	UINT32 _PAD3:5;
+	UINT32 CBP:14;
+	UINT32 CPSM:4;
+	UINT32 CSM:1;
+	UINT32 CSA:5;
+	UINT32 CLD:3;
+REG_END
+
+REG64_(GIFReg, TEXA)
+	UINT32 TA0:8;
+	UINT32 _PAD1:7;
+	UINT32 AEM:1;
+	UINT32 _PAD2:16;
+	UINT32 TA1:8;
+	UINT32 _PAD3:24;
+REG_END
+
+REG64_(GIFReg, TEXCLUT)
+	UINT32 CBW:6;
+	UINT32 COU:6;
+	UINT32 COV:10;
+	UINT32 _PAD1:10;
+	UINT32 _PAD2:32;
+REG_END
+
+REG64_(GIFReg, TEXFLUSH)
+	UINT32 _PAD1:32;
+	UINT32 _PAD2:32;
+REG_END
+
+REG64_(GIFReg, TRXDIR)
+	UINT32 XDIR:2;
+	UINT32 _PAD1:30;
+	UINT32 _PAD2:32;
+REG_END
+
+REG64_(GIFReg, TRXPOS)
+	UINT32 SSAX:11;
+	UINT32 _PAD1:5;
+	UINT32 SSAY:11;
+	UINT32 _PAD2:5;
+	UINT32 DSAX:11;
+	UINT32 _PAD3:5;
+	UINT32 DSAY:11;
+	UINT32 DIR:2;
+	UINT32 _PAD4:3;
+REG_END
+
+REG64_(GIFReg, TRXREG)
+	UINT32 RRW:12;
+	UINT32 _PAD1:20;
+	UINT32 RRH:12;
+	UINT32 _PAD2:20;
+REG_END
+
+REG64_(GIFReg, UV)
+	UINT32 U:14;
+	UINT32 _PAD1:2;
+	UINT32 V:14;
+	UINT32 _PAD2:2;
+	UINT32 _PAD3:32;
+REG_END
+
+REG64_(GIFReg, XYOFFSET)
+	UINT32 OFX:16;
+	UINT32 _PAD1:16;
+	UINT32 OFY:16;
+	UINT32 _PAD2:16;
+REG_END
+
+REG64_(GIFReg, XYZ)
+	UINT32 X:16;
+	UINT32 Y:16;
+	UINT32 Z:32;
+REG_END
+
+REG64_(GIFReg, XYZF)
+	UINT32 X:16;
+	UINT32 Y:16;
+	UINT32 Z:24;
+	UINT32 F:8;
+REG_END
+
+REG64_(GIFReg, ZBUF)
+	UINT32 ZBP:9;
+	UINT32 _PAD1:15;
+	// UINT32 PSM:4;
+	// UINT32 _PAD2:4;
+	UINT32 PSM:6;
+	UINT32 _PAD2:2;
+	UINT32 ZMSK:1;
+	UINT32 _PAD3:31;
+REG_END2
+	UINT32 Block() {return ZBP<<5;}
+REG_END2
+
+REG64_SET(GIFReg)
+	GIFRegALPHA			ALPHA;
+	GIFRegBITBLTBUF		BITBLTBUF;
+	GIFRegCLAMP			CLAMP;
+	GIFRegCOLCLAMP		COLCLAMP;
+	GIFRegDIMX			DIMX;
+	GIFRegDTHE			DTHE;
+	GIFRegFBA			FBA;
+	GIFRegFINISH		FINISH;
+	GIFRegFOG			FOG;
+	GIFRegFOGCOL		FOGCOL;
+	GIFRegFRAME			FRAME;
+	GIFRegHWREG			HWREG;
+	GIFRegLABEL			LABEL;
+	GIFRegMIPTBP1		MIPTBP1;
+	GIFRegMIPTBP2		MIPTBP2;
+	GIFRegNOP			NOP;
+	GIFRegPABE			PABE;
+	GIFRegPRIM			PRIM;
+	GIFRegPRMODE		PRMODE;
+	GIFRegPRMODECONT	PRMODECONT;
+	GIFRegRGBAQ			RGBAQ;
+	GIFRegSCANMSK		SCANMSK;
+	GIFRegSCISSOR		SCISSOR;
+	GIFRegSIGNAL		SIGNAL;
+	GIFRegST			ST;
+	GIFRegTEST			TEST;
+	GIFRegTEX0			TEX0;
+	GIFRegTEX1			TEX1;
+	GIFRegTEX2			TEX2;
+	GIFRegTEXA			TEXA;
+	GIFRegTEXCLUT		TEXCLUT;
+	GIFRegTEXFLUSH		TEXFLUSH;
+	GIFRegTRXDIR		TRXDIR;
+	GIFRegTRXPOS		TRXPOS;
+	GIFRegTRXREG		TRXREG;
+	GIFRegUV			UV;
+	GIFRegXYOFFSET		XYOFFSET;
+	GIFRegXYZ			XYZ;
+	GIFRegXYZF			XYZF;
+	GIFRegZBUF			ZBUF;
+REG_SET_END
+
+// GIFPacked
+
+REG128_(GIFPacked, PRIM)
+	UINT32 PRIM:11;
+	UINT32 _PAD1:21;
+	UINT32 _PAD2:32;
+	UINT32 _PAD3:32;
+	UINT32 _PAD4:32;
+REG_END
+
+REG128_(GIFPacked, RGBA)
+	UINT32 R:8;
+	UINT32 _PAD1:24;
+	UINT32 G:8;
+	UINT32 _PAD2:24;
+	UINT32 B:8;
+	UINT32 _PAD3:24;
+	UINT32 A:8;
+	UINT32 _PAD4:24;
+REG_END
+
+REG128_(GIFPacked, STQ)
+	float S;
+	float T;
+	float Q;
+	UINT32 _PAD1:32;
+REG_END
+
+REG128_(GIFPacked, UV)
+	UINT32 U:14;
+	UINT32 _PAD1:18;
+	UINT32 V:14;
+	UINT32 _PAD2:18;
+	UINT32 _PAD3:32;
+	UINT32 _PAD4:32;
+REG_END
+
+REG128_(GIFPacked, XYZF2)
+	UINT32 X:16;
+	UINT32 _PAD1:16;
+	UINT32 Y:16;
+	UINT32 _PAD2:16;
+	UINT32 _PAD3:4;
+	UINT32 Z:24;
+	UINT32 _PAD4:4;
+	UINT32 _PAD5:4;
+	UINT32 F:8;
+	UINT32 _PAD6:3;
+	UINT32 ADC:1;
+	UINT32 _PAD7:16;
+REG_END
+
+REG128_(GIFPacked, XYZ2)
+	UINT32 X:16;
+	UINT32 _PAD1:16;
+	UINT32 Y:16;
+	UINT32 _PAD2:16;
+	UINT32 Z:32;
+	UINT32 _PAD3:15;
+	UINT32 ADC:1;
+	UINT32 _PAD4:16;
+REG_END
+
+REG128_(GIFPacked, FOG)
+	UINT32 _PAD1:32;
+	UINT32 _PAD2:32;
+	UINT32 _PAD3:32;
+	UINT32 _PAD4:4;
+	UINT32 F:8;
+	UINT32 _PAD5:20;
+REG_END
+
+REG128_(GIFPacked, A_D)
+	UINT64 DATA:64;
+	UINT32 ADDR:8; // enum GIF_A_D_REG
+	UINT32 _PAD1:24;
+	UINT32 _PAD2:32;
+REG_END
+
+REG128_(GIFPacked, NOP)
+	UINT32 _PAD1:32;
+	UINT32 _PAD2:32;
+	UINT32 _PAD3:32;
+	UINT32 _PAD4:32;
+REG_END
+
+REG128_SET(GIFPackedReg)
+	GIFReg			r;
+	GIFPackedPRIM	PRIM;
+	GIFPackedRGBA	RGBA;
+	GIFPackedSTQ	STQ;
+	GIFPackedUV		UV;
+	GIFPackedXYZF2	XYZF2;
+	GIFPackedXYZ2	XYZ2;
+	GIFPackedFOG	FOG;
+	GIFPackedA_D	A_D;
+	GIFPackedNOP	NOP;
+REG_SET_END
+
+struct GIFPath
+{
+	GIFTag tag; 
+	int nreg;
+
+	DWORD GetGIFReg() {return (DWORD)GET_GIF_REG(tag, nreg);}
+};
+
+#pragma pack(pop)
+
+enum {KEYPRESS=1, KEYRELEASE=2};
+struct keyEvent {UINT32 key, event;};
+
+enum {FREEZE_LOAD=0, FREEZE_SAVE=1, FREEZE_SIZE=2};
+struct freezeData {int size; BYTE* data;};
+
+enum stateType {ST_WRITE, ST_TRANSFER, ST_VSYNC}; 
diff --git a/gsdx/GSDirtyRect.cpp b/gsdx/GSDirtyRect.cpp
new file mode 100644
index 0000000..8f9480a
--- /dev/null
+++ b/gsdx/GSDirtyRect.cpp
@@ -0,0 +1,120 @@
+/* 
+ *	Copyright (C) 2007 Gabest
+ *	http://www.gabest.org
+ *
+ *  This Program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *   
+ *  This Program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *   
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
+ *  http://www.gnu.org/copyleft/gpl.html
+ *
+ */
+
+#include "StdAfx.h"
+#include "GSDirtyRect.h"
+
+GSDirtyRect::GSDirtyRect() 
+	: m_psm(PSM_PSMCT32)
+	, m_rect(0, 0, 0, 0)
+{
+}
+
+GSDirtyRect::GSDirtyRect(DWORD psm, CRect rect)
+{
+	m_psm = psm;
+	m_rect = rect;
+}
+
+CRect GSDirtyRect::GetDirtyRect(const GIFRegTEX0& TEX0)
+{
+	CRect r = m_rect;
+
+	CSize src = GSLocalMemory::m_psm[m_psm].bs;
+
+	r.left = (r.left) & ~(src.cx-1);
+	r.right = (r.right + (src.cx-1) /* + 1 */) & ~(src.cx-1);
+	r.top = (r.top) & ~(src.cy-1);
+	r.bottom = (r.bottom + (src.cy-1) /* + 1 */) & ~(src.cy-1);
+
+	if(m_psm != TEX0.PSM)
+	{
+		CSize dst = GSLocalMemory::m_psm[TEX0.PSM].bs;
+
+		r.left = MulDiv(m_rect.left, dst.cx, src.cx);
+		r.right = MulDiv(m_rect.right, dst.cx, src.cx);
+		r.top = MulDiv(m_rect.top, dst.cy, src.cy);
+		r.bottom = MulDiv(m_rect.bottom, dst.cy, src.cy);
+	}
+
+	return r;
+}
+
+//
+
+CRect GSDirtyRectList::GetDirtyRect(const GIFRegTEX0& TEX0)
+{
+	if(IsEmpty()) return CRect(0, 0, 0, 0);
+	CRect r(INT_MAX, INT_MAX, 0, 0);
+	POSITION pos = GetHeadPosition();
+	while(pos) r |= GetNext(pos).GetDirtyRect(TEX0);
+	return r;
+}
+
+/*
+GSDirtyRectList::GSDirtyRectList()
+	: m_rects(NULL)
+	, m_count(0)
+	, m_maxcount(0)
+{
+}
+
+GSDirtyRectList::~GSDirtyRectList()
+{
+	delete [] m_rects;
+}
+
+void GSDirtyRectList::AddTail(const GSDirtyRect& r)
+{
+	if(m_count == m_maxcount)
+	{
+		m_maxcount = max(m_count, 8) * 3/2;
+
+		GSDirtyRect* rects = new GSDirtyRect[m_maxcount];
+
+		memcpy(rects, m_rects, m_count * sizeof(GSDirtyRect));
+
+		delete [] m_rects;
+
+		m_rects = rects;
+		
+	}
+
+	m_rects[m_count++] = r;
+}
+
+CRect GSDirtyRectList::GetDirtyRect(const GIFRegTEX0& TEX0)
+{
+	if(m_count == 0)
+	{
+		return CRect(0, 0, 0, 0);
+	}
+
+	CRect r(INT_MAX, INT_MAX, 0, 0);
+
+	for(size_t i = 0; i < m_count; i++) 
+	{
+		r |= m_rects[i].GetDirtyRect(TEX0);
+	}
+
+	return r;
+}
+*/
\ No newline at end of file
diff --git a/gsdx/GSDirtyRect.h b/gsdx/GSDirtyRect.h
new file mode 100644
index 0000000..70371b2
--- /dev/null
+++ b/gsdx/GSDirtyRect.h
@@ -0,0 +1,42 @@
+/* 
+ *	Copyright (C) 2007 Gabest
+ *	http://www.gabest.org
+ *
+ *  This Program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *   
+ *  This Program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *   
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
+ *  http://www.gnu.org/copyleft/gpl.html
+ *
+ */
+
+#pragma once
+
+#include "GSLocalMemory.h"
+
+class GSDirtyRect
+{
+	DWORD m_psm;
+	CRect m_rect;
+
+public:
+	GSDirtyRect();
+	GSDirtyRect(DWORD psm, CRect rect);
+	CRect GetDirtyRect(const GIFRegTEX0& TEX0);
+};
+
+class GSDirtyRectList : public CAtlList<GSDirtyRect>
+{
+public:
+	GSDirtyRectList() {}
+	CRect GetDirtyRect(const GIFRegTEX0& TEX0);
+};
\ No newline at end of file
diff --git a/gsdx/GSDrawingContext.h b/gsdx/GSDrawingContext.h
new file mode 100644
index 0000000..8a3f44f
--- /dev/null
+++ b/gsdx/GSDrawingContext.h
@@ -0,0 +1,62 @@
+/* 
+ *	Copyright (C) 2007 Gabest
+ *	http://www.gabest.org
+ *
+ *  This Program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *   
+ *  This Program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *   
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
+ *  http://www.gnu.org/copyleft/gpl.html
+ *
+ */
+
+#pragma once
+
+#include "GS.h"
+#include "GSLocalMemory.h"
+
+#pragma pack(push, 1)
+
+struct GSDrawingContext
+{
+	struct GSDrawingContext() {memset(this, 0, sizeof(*this));}
+
+	GIFRegXYOFFSET	XYOFFSET;
+	GIFRegTEX0		TEX0;
+	GIFRegTEX1		TEX1;
+	GIFRegTEX2		TEX2;
+	GIFRegCLAMP		CLAMP;
+	GIFRegMIPTBP1	MIPTBP1;
+	GIFRegMIPTBP2	MIPTBP2;
+	GIFRegSCISSOR	SCISSOR;
+	GIFRegALPHA		ALPHA;
+	GIFRegTEST		TEST;
+	GIFRegFBA		FBA;
+	GIFRegFRAME		FRAME;
+	GIFRegZBUF		ZBUF;
+
+	GSLocalMemory::psm_t* ftbl;
+	GSLocalMemory::psm_t* ztbl;
+	GSLocalMemory::psm_t* ttbl;
+
+	struct {float x0, y0, x1, y1;} scissor;
+
+	void UpdateScissor()
+	{
+		scissor.x0 = (float)(int)((int)(SCISSOR.SCAX0 << 4) + (int)XYOFFSET.OFX);
+		scissor.y0 = (float)(int)((int)(SCISSOR.SCAY0 << 4) + (int)XYOFFSET.OFY);
+		scissor.x1 = (float)(int)((int)(SCISSOR.SCAX1 << 4) + (int)XYOFFSET.OFX);
+		scissor.y1 = (float)(int)((int)(SCISSOR.SCAY1 << 4) + (int)XYOFFSET.OFY);
+	}
+};
+
+#pragma pack(pop)
\ No newline at end of file
diff --git a/gsdx/GSDrawingEnvironment.h b/gsdx/GSDrawingEnvironment.h
new file mode 100644
index 0000000..e963bb4
--- /dev/null
+++ b/gsdx/GSDrawingEnvironment.h
@@ -0,0 +1,51 @@
+/* 
+ *	Copyright (C) 2007 Gabest
+ *	http://www.gabest.org
+ *
+ *  This Program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *   
+ *  This Program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *   
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
+ *  http://www.gnu.org/copyleft/gpl.html
+ *
+ */
+
+#pragma once
+
+#include "GS.h"
+
+#pragma pack(push, 1)
+
+struct GSDrawingEnvironment
+{
+	struct GSDrawingEnvironment() {memset(this, 0, sizeof(*this));}
+
+	GIFRegPRIM			PRIM;
+	GIFRegPRMODE		PRMODE;
+	GIFRegPRMODECONT	PRMODECONT;
+	GIFRegTEXCLUT		TEXCLUT;
+	GIFRegSCANMSK		SCANMSK;
+	GIFRegTEXA			TEXA;
+	GIFRegFOGCOL		FOGCOL;
+	GIFRegDIMX			DIMX;
+	GIFRegDTHE			DTHE;
+	GIFRegCOLCLAMP		COLCLAMP;
+	GIFRegPABE			PABE;
+	GIFRegBITBLTBUF		BITBLTBUF;
+	GIFRegTRXDIR		TRXDIR;
+	GIFRegTRXPOS		TRXPOS;
+	GIFRegTRXREG		TRXREG;
+	GIFRegTRXREG		TRXREG2;
+	GSDrawingContext	CTXT[2];
+};
+
+#pragma pack(pop)
diff --git a/gsdx/GSLocalMemory.cpp b/gsdx/GSLocalMemory.cpp
new file mode 100644
index 0000000..053e3c6
--- /dev/null
+++ b/gsdx/GSLocalMemory.cpp
@@ -0,0 +1,1937 @@
+/* 
+ *	Copyright (C) 2007 Gabest
+ *	http://www.gabest.org
+ *
+ *  This Program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *   
+ *  This Program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *   
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
+ *  http://www.gnu.org/copyleft/gpl.html
+ *
+ *	Special Notes: 
+ *
+ *	Based on Page.c from GSSoft
+ *	Copyright (C) 2002-2004 GSsoft Team
+ *
+ */
+ 
+#include "StdAfx.h"
+#include "GSLocalMemory.h"
+#include "x86.h"
+
+#define ASSERT_BLOCK(r, w, h) \
+	ASSERT((r).Width() >= w && (r).Height() >= h && !((r).left&(w-1)) && !((r).top&(h-1)) && !((r).right&(w-1)) && !((r).bottom&(h-1))); \
+
+#if defined(_M_AMD64) || _M_IX86_FP >= 2
+#define BLOCK_PREFETCH(mem) \
+	_mm_prefetch(&mem[16*0], _MM_HINT_T0); \
+	_mm_prefetch(&mem[16*2], _MM_HINT_T0); \
+	_mm_prefetch(&mem[16*4], _MM_HINT_T0); \
+	_mm_prefetch(&mem[16*6], _MM_HINT_T0); \
+	_mm_prefetch(&mem[16*8], _MM_HINT_T0); \
+	_mm_prefetch(&mem[16*10], _MM_HINT_T0); \
+	_mm_prefetch(&mem[16*12], _MM_HINT_T0); \
+	_mm_prefetch(&mem[16*14], _MM_HINT_T0); \
+
+#define BLOCK_PREFETCH_32(x, y, w) {const char* next = (const char*)&m_vm32[blockAddress32(x + (w), y, TEX0.TBP0, TEX0.TBW)]; BLOCK_PREFETCH(next);}
+#define BLOCK_PREFETCH_16(x, y, w) {const char* next = (const char*)&m_vm16[blockAddress16(x + (w), y, TEX0.TBP0, TEX0.TBW)]; BLOCK_PREFETCH(next);}
+#define BLOCK_PREFETCH_16S(x, y, w) {const char* next = (const char*)&m_vm16[blockAddress16S(x + (w), y, TEX0.TBP0, TEX0.TBW)]; BLOCK_PREFETCH(next);}
+#define BLOCK_PREFETCH_8(x, y, w) {const char* next = (const char*)&m_vm8[blockAddress8(x + (w), y, TEX0.TBP0, TEX0.TBW)]; BLOCK_PREFETCH(next);}
+#define BLOCK_PREFETCH_4(x, y, w) {const char* next = (const char*)&m_vm8[blockAddress4(x + (w), y, TEX0.TBP0, TEX0.TBW)>>1]; BLOCK_PREFETCH(next);}
+#else 
+#define BLOCK_PREFETCH_32(x, y, w)
+#define BLOCK_PREFETCH_16(x, y, w)
+#define BLOCK_PREFETCH_16S(x, y, w)
+#define BLOCK_PREFETCH_8(x, y, w)
+#define BLOCK_PREFETCH_4(x, y, w)
+#endif
+
+#define FOREACH_BLOCK_START(r, w, h, t) \
+	for(int y = (r).top; y < (r).bottom; y += (h)) \
+	{ 	ASSERT_BLOCK(r, w, h); \
+		BYTE* ptr = dst + (y-(r).top)*dstpitch; \
+		for(int x = (r).left; x < (r).right; x += (w)) \
+		{ \
+			BLOCK_PREFETCH_##t##(x + (w), y, w) \
+
+#define FOREACH_BLOCK_END }}
+
+//
+
+DWORD GSLocalMemory::pageOffset32[32][32][64];
+DWORD GSLocalMemory::pageOffset32Z[32][32][64];
+DWORD GSLocalMemory::pageOffset16[32][64][64];
+DWORD GSLocalMemory::pageOffset16S[32][64][64];
+DWORD GSLocalMemory::pageOffset16Z[32][64][64];
+DWORD GSLocalMemory::pageOffset16SZ[32][64][64];
+DWORD GSLocalMemory::pageOffset8[32][64][128];
+DWORD GSLocalMemory::pageOffset4[32][128][128];
+
+int GSLocalMemory::rowOffset32[2048];
+int GSLocalMemory::rowOffset32Z[2048];
+int GSLocalMemory::rowOffset16[2048];
+int GSLocalMemory::rowOffset16S[2048];
+int GSLocalMemory::rowOffset16Z[2048];
+int GSLocalMemory::rowOffset16SZ[2048];
+int GSLocalMemory::rowOffset8[2][2048];
+int GSLocalMemory::rowOffset4[2][2048];
+
+//
+
+DWORD GSLocalMemory::m_xtbl[1024];
+DWORD GSLocalMemory::m_ytbl[1024]; 
+
+//
+
+GSLocalMemory::psm_t GSLocalMemory::m_psm[64];
+
+//
+
+GSLocalMemory::GSLocalMemory()
+	: m_fCLUTMayBeDirty(true)
+{
+	int len = 1024*1024*4*2; // *2 for safety...
+
+	m_vm8 = (BYTE*)_aligned_malloc(len, 16);
+
+	memset(m_vm8, 0, len);
+
+	m_pCLUT = (WORD*)_aligned_malloc(256*2*sizeof(WORD)*2, 16);
+	m_pCLUT32 = (DWORD*)_aligned_malloc(256*sizeof(DWORD), 16);
+	m_pCLUT64 = (UINT64*)_aligned_malloc(256*sizeof(UINT64), 16);
+
+	for(int bp = 0; bp < 32; bp++)
+	{
+		for(int y = 0; y < 32; y++) for(int x = 0; x < 64; x++)
+		{
+			pageOffset32[bp][y][x] = pixelAddressOrg32(x, y, bp, 0);
+			pageOffset32Z[bp][y][x] = pixelAddressOrg32Z(x, y, bp, 0);
+		}
+
+		for(int y = 0; y < 64; y++) for(int x = 0; x < 64; x++) 
+		{
+			pageOffset16[bp][y][x] = pixelAddressOrg16(x, y, bp, 0);
+			pageOffset16S[bp][y][x] = pixelAddressOrg16S(x, y, bp, 0);
+			pageOffset16Z[bp][y][x] = pixelAddressOrg16Z(x, y, bp, 0);
+			pageOffset16SZ[bp][y][x] = pixelAddressOrg16SZ(x, y, bp, 0);
+		}
+
+		for(int y = 0; y < 64; y++) for(int x = 0; x < 128; x++)
+		{
+			pageOffset8[bp][y][x] = pixelAddressOrg8(x, y, bp, 0);
+		}
+
+		for(int y = 0; y < 128; y++) for(int x = 0; x < 128; x++)
+		{
+			pageOffset4[bp][y][x] = pixelAddressOrg4(x, y, bp, 0);
+		}
+	}
+
+	{
+		for(int x = 0; x < countof(rowOffset32); x++)
+			rowOffset32[x] = (int)pixelAddress32(x, 0, 0, 32) - (int)pixelAddress32(0, 0, 0, 32);
+
+		for(int x = 0; x < countof(rowOffset32Z); x++)
+			rowOffset32Z[x] = (int)pixelAddress32Z(x, 0, 0, 32) - (int)pixelAddress32Z(0, 0, 0, 32);
+
+		for(int x = 0; x < countof(rowOffset16); x++)
+			rowOffset16[x] = (int)pixelAddress16(x, 0, 0, 32) - (int)pixelAddress16(0, 0, 0, 32);
+
+		for(int x = 0; x < countof(rowOffset16S); x++)
+			rowOffset16S[x] = (int)pixelAddress16S(x, 0, 0, 32) - (int)pixelAddress16S(0, 0, 0, 32);
+
+		for(int x = 0; x < countof(rowOffset16Z); x++)
+			rowOffset16Z[x] = (int)pixelAddress16Z(x, 0, 0, 32) - (int)pixelAddress16Z(0, 0, 0, 32);
+
+		for(int x = 0; x < countof(rowOffset16SZ); x++)
+			rowOffset16SZ[x] = (int)pixelAddress16SZ(x, 0, 0, 32) - (int)pixelAddress16SZ(0, 0, 0, 32);
+
+		for(int x = 0; x < countof(rowOffset8[0]); x++)
+			rowOffset8[0][x] = (int)pixelAddress8(x, 0, 0, 32) - (int)pixelAddress8(0, 0, 0, 32),
+			rowOffset8[1][x] = (int)pixelAddress8(x, 2, 0, 32) - (int)pixelAddress8(0, 2, 0, 32);
+
+		for(int x = 0; x < countof(rowOffset4[0]); x++)
+			rowOffset4[0][x] = (int)pixelAddress4(x, 0, 0, 32) - (int)pixelAddress4(0, 0, 0, 32),
+			rowOffset4[1][x] = (int)pixelAddress4(x, 2, 0, 32) - (int)pixelAddress4(0, 2, 0, 32);
+	}
+
+	for(int i = 0; i < countof(m_psm); i++)
+	{
+		m_psm[i].pa = &GSLocalMemory::pixelAddress32;
+		m_psm[i].ba = &GSLocalMemory::blockAddress32;
+		m_psm[i].pga = &GSLocalMemory::pageAddress32;
+		m_psm[i].rp = &GSLocalMemory::readPixel32;
+		m_psm[i].rpa = &GSLocalMemory::readPixel32;
+		m_psm[i].wp = &GSLocalMemory::writePixel32;
+		m_psm[i].wpa = &GSLocalMemory::writePixel32;
+		m_psm[i].rt = &GSLocalMemory::readTexel32;
+		m_psm[i].rtNP = &GSLocalMemory::readTexel32;
+		m_psm[i].rtP = &GSLocalMemory::readTexel32;
+		m_psm[i].rta = &GSLocalMemory::readTexel32;
+		m_psm[i].wfa = &GSLocalMemory::writePixel32;
+		m_psm[i].st = &GSLocalMemory::SwizzleTexture32;
+		m_psm[i].ust = &GSLocalMemory::unSwizzleTexture32;
+		m_psm[i].ustP = &GSLocalMemory::unSwizzleTexture32;
+		m_psm[i].ustNP = &GSLocalMemory::unSwizzleTexture32;
+		m_psm[i].bpp = m_psm[i].trbpp = 32;
+		m_psm[i].pal = 0;
+		m_psm[i].bs = CSize(8, 8);
+		m_psm[i].pgs = CSize(64, 32);
+		for(int j = 0; j < 8; j++) m_psm[i].rowOffset[j] = rowOffset32;
+	}
+
+	m_psm[PSM_PSMCT16].pa = &GSLocalMemory::pixelAddress16;
+	m_psm[PSM_PSMCT16S].pa = &GSLocalMemory::pixelAddress16S;
+	m_psm[PSM_PSMT8].pa = &GSLocalMemory::pixelAddress8;
+	m_psm[PSM_PSMT4].pa = &GSLocalMemory::pixelAddress4;
+	m_psm[PSM_PSMZ32].pa = &GSLocalMemory::pixelAddress32Z;
+	m_psm[PSM_PSMZ24].pa = &GSLocalMemory::pixelAddress32Z;
+	m_psm[PSM_PSMZ16].pa = &GSLocalMemory::pixelAddress16Z;
+	m_psm[PSM_PSMZ16S].pa = &GSLocalMemory::pixelAddress16SZ;
+
+	m_psm[PSM_PSMCT16].ba = &GSLocalMemory::blockAddress16;
+	m_psm[PSM_PSMCT16S].ba = &GSLocalMemory::blockAddress16S;
+	m_psm[PSM_PSMT8].ba = &GSLocalMemory::blockAddress8;
+	m_psm[PSM_PSMT4].ba = &GSLocalMemory::blockAddress4;
+	m_psm[PSM_PSMZ32].ba = &GSLocalMemory::blockAddress32Z;
+	m_psm[PSM_PSMZ24].ba = &GSLocalMemory::blockAddress32Z;
+	m_psm[PSM_PSMZ16].ba = &GSLocalMemory::blockAddress16Z;
+	m_psm[PSM_PSMZ16S].ba = &GSLocalMemory::blockAddress16SZ;
+
+	m_psm[PSM_PSMCT16].pga = &GSLocalMemory::pageAddress16;
+	m_psm[PSM_PSMCT16S].pga = &GSLocalMemory::pageAddress16;
+	m_psm[PSM_PSMZ16].pga = &GSLocalMemory::pageAddress16;
+	m_psm[PSM_PSMZ16S].pga = &GSLocalMemory::pageAddress16;
+	m_psm[PSM_PSMT8].pga = &GSLocalMemory::pageAddress8;
+	m_psm[PSM_PSMT4].pga = &GSLocalMemory::pageAddress4;
+
+	m_psm[PSM_PSMCT24].rp = &GSLocalMemory::readPixel24;
+	m_psm[PSM_PSMCT16].rp = &GSLocalMemory::readPixel16;
+	m_psm[PSM_PSMCT16S].rp = &GSLocalMemory::readPixel16S;
+	m_psm[PSM_PSMT8].rp = &GSLocalMemory::readPixel8;
+	m_psm[PSM_PSMT4].rp = &GSLocalMemory::readPixel4;
+	m_psm[PSM_PSMT8H].rp = &GSLocalMemory::readPixel8H;
+	m_psm[PSM_PSMT4HL].rp = &GSLocalMemory::readPixel4HL;
+	m_psm[PSM_PSMT4HH].rp = &GSLocalMemory::readPixel4HH;
+	m_psm[PSM_PSMZ32].rp = &GSLocalMemory::readPixel32Z;
+	m_psm[PSM_PSMZ24].rp = &GSLocalMemory::readPixel24Z;
+	m_psm[PSM_PSMZ16].rp = &GSLocalMemory::readPixel16Z;
+	m_psm[PSM_PSMZ16S].rp = &GSLocalMemory::readPixel16SZ;
+
+	m_psm[PSM_PSMCT24].rpa = &GSLocalMemory::readPixel24;
+	m_psm[PSM_PSMCT16].rpa = &GSLocalMemory::readPixel16;
+	m_psm[PSM_PSMCT16S].rpa = &GSLocalMemory::readPixel16S;
+	m_psm[PSM_PSMT8].rpa = &GSLocalMemory::readPixel8;
+	m_psm[PSM_PSMT4].rpa = &GSLocalMemory::readPixel4;
+	m_psm[PSM_PSMT8H].rpa = &GSLocalMemory::readPixel8H;
+	m_psm[PSM_PSMT4HL].rpa = &GSLocalMemory::readPixel4HL;
+	m_psm[PSM_PSMT4HH].rpa = &GSLocalMemory::readPixel4HH;
+	m_psm[PSM_PSMZ32].rpa = &GSLocalMemory::readPixel32Z;
+	m_psm[PSM_PSMZ24].rpa = &GSLocalMemory::readPixel24Z;
+	m_psm[PSM_PSMZ16].rpa = &GSLocalMemory::readPixel16Z;
+	m_psm[PSM_PSMZ16S].rpa = &GSLocalMemory::readPixel16SZ;
+
+	m_psm[PSM_PSMCT32].wp = &GSLocalMemory::writePixel32;
+	m_psm[PSM_PSMCT24].wp = &GSLocalMemory::writePixel24;
+	m_psm[PSM_PSMCT16].wp = &GSLocalMemory::writePixel16;
+	m_psm[PSM_PSMCT16S].wp = &GSLocalMemory::writePixel16S;
+	m_psm[PSM_PSMT8].wp = &GSLocalMemory::writePixel8;
+	m_psm[PSM_PSMT4].wp = &GSLocalMemory::writePixel4;
+	m_psm[PSM_PSMT8H].wp = &GSLocalMemory::writePixel8H;
+	m_psm[PSM_PSMT4HL].wp = &GSLocalMemory::writePixel4HL;
+	m_psm[PSM_PSMT4HH].wp = &GSLocalMemory::writePixel4HH;
+	m_psm[PSM_PSMZ32].wp = &GSLocalMemory::writePixel32Z;
+	m_psm[PSM_PSMZ24].wp = &GSLocalMemory::writePixel24Z;
+	m_psm[PSM_PSMZ16].wp = &GSLocalMemory::writePixel16Z;
+	m_psm[PSM_PSMZ16S].wp = &GSLocalMemory::writePixel16SZ;
+
+	m_psm[PSM_PSMCT32].wpa = &GSLocalMemory::writePixel32;
+	m_psm[PSM_PSMCT24].wpa = &GSLocalMemory::writePixel24;
+	m_psm[PSM_PSMCT16].wpa = &GSLocalMemory::writePixel16;
+	m_psm[PSM_PSMCT16S].wpa = &GSLocalMemory::writePixel16S;
+	m_psm[PSM_PSMT8].wpa = &GSLocalMemory::writePixel8;
+	m_psm[PSM_PSMT4].wpa = &GSLocalMemory::writePixel4;
+	m_psm[PSM_PSMT8H].wpa = &GSLocalMemory::writePixel8H;
+	m_psm[PSM_PSMT4HL].wpa = &GSLocalMemory::writePixel4HL;
+	m_psm[PSM_PSMT4HH].wpa = &GSLocalMemory::writePixel4HH;
+	m_psm[PSM_PSMZ32].wpa = &GSLocalMemory::writePixel32Z;
+	m_psm[PSM_PSMZ24].wpa = &GSLocalMemory::writePixel24Z;
+	m_psm[PSM_PSMZ16].wpa = &GSLocalMemory::writePixel16Z;
+	m_psm[PSM_PSMZ16S].wpa = &GSLocalMemory::writePixel16SZ;
+
+	m_psm[PSM_PSMCT24].rt = &GSLocalMemory::readTexel24;
+	m_psm[PSM_PSMCT16].rt = &GSLocalMemory::readTexel16;
+	m_psm[PSM_PSMCT16S].rt = &GSLocalMemory::readTexel16S;
+	m_psm[PSM_PSMT8].rt = &GSLocalMemory::readTexel8;
+	m_psm[PSM_PSMT4].rt = &GSLocalMemory::readTexel4;
+	m_psm[PSM_PSMT8H].rt = &GSLocalMemory::readTexel8H;
+	m_psm[PSM_PSMT4HL].rt = &GSLocalMemory::readTexel4HL;
+	m_psm[PSM_PSMT4HH].rt = &GSLocalMemory::readTexel4HH;
+
+	m_psm[PSM_PSMCT24].rta = &GSLocalMemory::readTexel24;
+	m_psm[PSM_PSMCT16].rta = &GSLocalMemory::readTexel16;
+	m_psm[PSM_PSMCT16S].rta = &GSLocalMemory::readTexel16S;
+	m_psm[PSM_PSMT8].rta = &GSLocalMemory::readTexel8;
+	m_psm[PSM_PSMT4].rta = &GSLocalMemory::readTexel4;
+	m_psm[PSM_PSMT8H].rta = &GSLocalMemory::readTexel8H;
+	m_psm[PSM_PSMT4HL].rta = &GSLocalMemory::readTexel4HL;
+	m_psm[PSM_PSMT4HH].rta = &GSLocalMemory::readTexel4HH;
+
+	m_psm[PSM_PSMCT24].wfa = &GSLocalMemory::writePixel24;
+	m_psm[PSM_PSMCT16].wfa = &GSLocalMemory::writeFrame16;
+	m_psm[PSM_PSMCT16S].wfa = &GSLocalMemory::writeFrame16S;
+
+	m_psm[PSM_PSMCT16].rtP = &GSLocalMemory::readTexel16P;
+	m_psm[PSM_PSMCT16S].rtP = &GSLocalMemory::readTexel16SP;
+	m_psm[PSM_PSMT8].rtP = &GSLocalMemory::readTexel8P;
+	m_psm[PSM_PSMT4].rtP = &GSLocalMemory::readTexel4P;
+	m_psm[PSM_PSMT8H].rtP = &GSLocalMemory::readTexel8HP;
+	m_psm[PSM_PSMT4HL].rtP = &GSLocalMemory::readTexel4HLP;
+	m_psm[PSM_PSMT4HH].rtP = &GSLocalMemory::readTexel4HHP;
+
+	m_psm[PSM_PSMCT16].rtNP = &GSLocalMemory::readTexel16P;
+	m_psm[PSM_PSMCT16S].rtNP = &GSLocalMemory::readTexel16SP;
+	m_psm[PSM_PSMT8].rtNP = &GSLocalMemory::readTexel8;
+	m_psm[PSM_PSMT4].rtNP = &GSLocalMemory::readTexel4;
+	m_psm[PSM_PSMT8H].rtNP = &GSLocalMemory::readTexel8H;
+	m_psm[PSM_PSMT4HL].rtNP = &GSLocalMemory::readTexel4HL;
+	m_psm[PSM_PSMT4HH].rtNP = &GSLocalMemory::readTexel4HH;
+
+	m_psm[PSM_PSMCT24].st = &GSLocalMemory::SwizzleTexture24;
+	m_psm[PSM_PSMCT16].st = &GSLocalMemory::SwizzleTexture16;
+	m_psm[PSM_PSMCT16S].st = &GSLocalMemory::SwizzleTexture16S;
+	m_psm[PSM_PSMT8].st = &GSLocalMemory::SwizzleTexture8;
+	m_psm[PSM_PSMT4].st = &GSLocalMemory::SwizzleTexture4;
+	m_psm[PSM_PSMT8H].st = &GSLocalMemory::SwizzleTexture8H;
+	m_psm[PSM_PSMT4HL].st = &GSLocalMemory::SwizzleTexture4HL;
+	m_psm[PSM_PSMT4HH].st = &GSLocalMemory::SwizzleTexture4HH;
+
+	m_psm[PSM_PSMCT24].ust = &GSLocalMemory::unSwizzleTexture24;
+	m_psm[PSM_PSMCT16].ust = &GSLocalMemory::unSwizzleTexture16;
+	m_psm[PSM_PSMCT16S].ust = &GSLocalMemory::unSwizzleTexture16S;
+	m_psm[PSM_PSMT8].ust = &GSLocalMemory::unSwizzleTexture8;
+	m_psm[PSM_PSMT4].ust = &GSLocalMemory::unSwizzleTexture4;
+	m_psm[PSM_PSMT8H].ust = &GSLocalMemory::unSwizzleTexture8H;
+	m_psm[PSM_PSMT4HL].ust = &GSLocalMemory::unSwizzleTexture4HL;
+	m_psm[PSM_PSMT4HH].ust = &GSLocalMemory::unSwizzleTexture4HH;
+
+	m_psm[PSM_PSMCT16].ustP = &GSLocalMemory::unSwizzleTexture16P;
+	m_psm[PSM_PSMCT16S].ustP = &GSLocalMemory::unSwizzleTexture16SP;
+	m_psm[PSM_PSMT8].ustP = &GSLocalMemory::unSwizzleTexture8P;
+	m_psm[PSM_PSMT4].ustP = &GSLocalMemory::unSwizzleTexture4P;
+	m_psm[PSM_PSMT8H].ustP = &GSLocalMemory::unSwizzleTexture8HP;
+	m_psm[PSM_PSMT4HL].ustP = &GSLocalMemory::unSwizzleTexture4HLP;
+	m_psm[PSM_PSMT4HH].ustP = &GSLocalMemory::unSwizzleTexture4HHP;
+
+	m_psm[PSM_PSMCT16].ustNP = &GSLocalMemory::unSwizzleTexture16P;
+	m_psm[PSM_PSMCT16S].ustNP = &GSLocalMemory::unSwizzleTexture16SP;
+	m_psm[PSM_PSMT8].ustNP = &GSLocalMemory::unSwizzleTexture8NP;
+	m_psm[PSM_PSMT4].ustNP = &GSLocalMemory::unSwizzleTexture4NP;
+	m_psm[PSM_PSMT8H].ustNP = &GSLocalMemory::unSwizzleTexture8HNP;
+	m_psm[PSM_PSMT4HL].ustNP = &GSLocalMemory::unSwizzleTexture4HLNP;
+	m_psm[PSM_PSMT4HH].ustNP = &GSLocalMemory::unSwizzleTexture4HHNP;
+
+	m_psm[PSM_PSMT8].pal = m_psm[PSM_PSMT8H].pal = 256;
+	m_psm[PSM_PSMT4].pal = m_psm[PSM_PSMT4HL].pal = m_psm[PSM_PSMT4HH].pal = 16;
+
+	m_psm[PSM_PSMCT16].bpp = m_psm[PSM_PSMCT16S].bpp = 16;
+	m_psm[PSM_PSMT8].bpp = 8;
+	m_psm[PSM_PSMT4].bpp = 4;
+	m_psm[PSM_PSMZ16].bpp = m_psm[PSM_PSMZ16S].bpp = 16;
+
+	m_psm[PSM_PSMCT24].trbpp = 24;
+	m_psm[PSM_PSMCT16].trbpp = m_psm[PSM_PSMCT16S].trbpp = 16;
+	m_psm[PSM_PSMT8].trbpp = m_psm[PSM_PSMT8H].trbpp = 8;
+	m_psm[PSM_PSMT4].trbpp = m_psm[PSM_PSMT4HL].trbpp = m_psm[PSM_PSMT4HH].trbpp = 4;
+	m_psm[PSM_PSMZ24].trbpp = 24;
+	m_psm[PSM_PSMZ16].trbpp = m_psm[PSM_PSMZ16S].trbpp = 16;
+
+	m_psm[PSM_PSMCT16].bs = m_psm[PSM_PSMCT16S].bs = CSize(16, 8);
+	m_psm[PSM_PSMT8].bs = CSize(16, 16);
+	m_psm[PSM_PSMT4].bs = CSize(32, 32);
+	m_psm[PSM_PSMZ16].bs = m_psm[PSM_PSMZ16S].bs = CSize(16, 8);
+
+	m_psm[PSM_PSMCT16].pgs = m_psm[PSM_PSMCT16S].pgs = CSize(64, 64);
+	m_psm[PSM_PSMT8].pgs = CSize(128, 64);
+	m_psm[PSM_PSMT4].pgs = CSize(128, 128);
+	m_psm[PSM_PSMZ16].pgs = m_psm[PSM_PSMZ16S].pgs = CSize(64, 64);
+
+	for(int i = 0; i < 8; i++) m_psm[PSM_PSMCT16].rowOffset[i] = rowOffset16;
+	for(int i = 0; i < 8; i++) m_psm[PSM_PSMCT16S].rowOffset[i] = rowOffset16S;
+	for(int i = 0; i < 8; i++) m_psm[PSM_PSMT8].rowOffset[i] = rowOffset8[((i+2)>>2)&1];
+	for(int i = 0; i < 8; i++) m_psm[PSM_PSMT4].rowOffset[i] = rowOffset4[((i+2)>>2)&1];
+	for(int i = 0; i < 8; i++) m_psm[PSM_PSMZ32].rowOffset[i] = rowOffset32Z;
+	for(int i = 0; i < 8; i++) m_psm[PSM_PSMZ24].rowOffset[i] = rowOffset32Z;
+	for(int i = 0; i < 8; i++) m_psm[PSM_PSMZ16].rowOffset[i] = rowOffset16Z;
+	for(int i = 0; i < 8; i++) m_psm[PSM_PSMZ16S].rowOffset[i] = rowOffset16SZ;
+}
+
+GSLocalMemory::~GSLocalMemory()
+{
+	_aligned_free(m_vm8);
+	_aligned_free(m_pCLUT);
+	_aligned_free(m_pCLUT32);
+	_aligned_free(m_pCLUT64);	
+}
+
+////////////////////
+
+bool GSLocalMemory::FillRect(const CRect& r, DWORD c, DWORD psm, DWORD fbp, DWORD fbw)
+{
+	const psm_t& tbl = m_psm[psm];
+
+	writePixel wp = tbl.wp;
+	pixelAddress ba = tbl.ba;
+
+	int w = tbl.bs.cx;
+	int h = tbl.bs.cy;
+	int bpp = tbl.bpp;
+
+	int shift = 0;
+
+	switch(bpp)
+	{
+	case 32: shift = 0; break;
+	case 16: shift = 1; c = (c&0xffff)*0x00010001; break;
+	case 8: shift = 2; c = (c&0xff)*0x01010101; break;
+	case 4: shift = 3; c = (c&0xf)*0x11111111; break;
+	}
+
+	CRect clip((r.left+(w-1))&~(w-1), (r.top+(h-1))&~(h-1), r.right&~(w-1), r.bottom&~(h-1));
+
+	for(int y = r.top; y < clip.top; y++)
+		for(int x = r.left; x < r.right; x++)
+			(this->*wp)(x, y, c, fbp, fbw);
+
+	for(int y = clip.top; y < clip.bottom; y += h)
+	{
+		for(int ys = y, ye = y + h; ys < ye; ys++)
+		{
+			for(int x = r.left; x < clip.left; x++)
+				(this->*wp)(x, ys, c, fbp, fbw);
+			for(int x = clip.right; x < r.right; x++)
+				(this->*wp)(x, ys, c, fbp, fbw);
+		}
+	}
+
+	if(psm == PSM_PSMCT24 || psm == PSM_PSMZ24)
+	{
+		c &= 0x00ffffff;
+
+		for(int y = clip.top; y < clip.bottom; y += h)
+		{
+			for(int x = clip.left; x < clip.right; x += w)
+			{
+				DWORD* p = &m_vm32[ba(x, y, fbp, fbw)];
+
+				for(int i = 0; i < 64; i++)
+				{
+					p[i] = (p[i] & 0xff000000) | c;
+				}
+			}
+		}
+	}
+	else
+	{
+		for(int y = clip.top; y < clip.bottom; y += h)
+			for(int x = clip.left; x < clip.right; x += w)
+				memsetd(&m_vm8[ba(x, y, fbp, fbw) << 2 >> shift], c, 64);
+	}
+
+	for(int y = clip.bottom; y < r.bottom; y++)
+		for(int x = r.left; x < r.right; x++)
+			(this->*wp)(x, y, c, fbp, fbw);
+
+	return(true);
+}
+
+////////////////////
+
+bool GSLocalMemory::IsCLUTDirty(GIFRegTEX0 TEX0, GIFRegTEXCLUT TEXCLUT)
+{
+	return m_fCLUTMayBeDirty || m_prevTEX0.i64 != TEX0.i64 || m_prevTEXCLUT.i64 != TEXCLUT.i64;
+}
+
+bool GSLocalMemory::WriteCLUT(GIFRegTEX0 TEX0, GIFRegTEXCLUT TEXCLUT)
+{
+	switch(TEX0.CLD)
+	{
+	default:
+	case 0: return false;
+	case 1: break;
+	case 2: m_CBP[0] = TEX0.CBP; break;
+	case 3: m_CBP[1] = TEX0.CBP; break;
+	case 4: if(m_CBP[0] == TEX0.CBP) return false;
+	case 5: if(m_CBP[1] == TEX0.CBP) return false;
+	}
+
+	if(!IsCLUTDirty(TEX0, TEXCLUT))
+	{
+		return false;
+	}
+
+	m_prevTEX0 = TEX0;
+	m_prevTEXCLUT = TEXCLUT;
+
+	m_fCLUTMayBeDirty = false;
+
+	DWORD bp = TEX0.CBP;
+	DWORD bw = TEX0.CSM == 0 ? 1 : TEXCLUT.CBW;
+
+	WORD* pCLUT = m_pCLUT + (TEX0.CSA<<4);
+
+	// NOTE: TEX0.CPSM == PSM_PSMCT24 is non-standard, KH uses it
+
+	if(TEX0.CSM == 0)
+	{
+		if(TEX0.CPSM == PSM_PSMCT16 || TEX0.CPSM == PSM_PSMCT16S)
+		{
+			WORD* vm = &m_vm16[TEX0.CPSM == PSM_PSMCT16 ? blockAddress16(0, 0, bp, bw) : blockAddress16S(0, 0, bp, bw)];
+
+			if(TEX0.PSM == PSM_PSMT8 || TEX0.PSM == PSM_PSMT8H)
+			{
+				WriteCLUT_T16_I8_CSM1(vm, pCLUT);
+			}
+			else if(TEX0.PSM == PSM_PSMT4HH || TEX0.PSM == PSM_PSMT4HL || TEX0.PSM == PSM_PSMT4)
+			{
+				WriteCLUT_T16_I4_CSM1(vm, pCLUT);
+			}
+		}
+		else if(TEX0.CPSM == PSM_PSMCT32 || TEX0.CPSM == PSM_PSMCT24)
+		{
+			DWORD* vm = &m_vm32[blockAddress32(0, 0, bp, bw)];
+
+			if(TEX0.PSM == PSM_PSMT8 || TEX0.PSM == PSM_PSMT8H)
+			{
+				WriteCLUT_T32_I8_CSM1(vm, pCLUT);
+			}
+			else if(TEX0.PSM == PSM_PSMT4HH || TEX0.PSM == PSM_PSMT4HL || TEX0.PSM == PSM_PSMT4)
+			{
+				WriteCLUT_T32_I4_CSM1(vm, pCLUT);
+			}
+		}
+	}
+	else
+	{
+		readPixel rp = m_psm[TEX0.CPSM].rp;
+
+		int nPaletteEntries = m_psm[TEX0.PSM].pal;
+
+		ASSERT(nPaletteEntries == 0 || TEX0.CPSM == PSM_PSMCT16); // this is the only allowed format for CSM2, but we implement all of them, just in case...
+
+		if(TEX0.CPSM == PSM_PSMCT16 || TEX0.CPSM == PSM_PSMCT16S)
+		{
+			for(int i = 0; i < nPaletteEntries; i++)
+			{
+				pCLUT[i] = (WORD)(this->*rp)((TEXCLUT.COU<<4) + i, TEXCLUT.COV, bp, bw);
+			}
+		}
+		else if(TEX0.CPSM == PSM_PSMCT32 || TEX0.CPSM == PSM_PSMCT24)
+		{
+			for(int i = 0; i < nPaletteEntries; i++)
+			{
+				DWORD dw = (this->*rp)((TEXCLUT.COU<<4) + i, TEXCLUT.COV, bp, bw);
+				pCLUT[i] = (WORD)(dw & 0xffff);
+				pCLUT[i+256] = (WORD)(dw >> 16);
+			}
+		}
+	}
+
+	return true;
+}
+
+//
+
+void GSLocalMemory::ReadCLUT(GIFRegTEX0 TEX0, DWORD* pCLUT32)
+{
+	ASSERT(pCLUT32);
+
+	WORD* pCLUT = m_pCLUT + (TEX0.CSA << 4);
+
+	if(TEX0.CPSM == PSM_PSMCT32)
+	{
+		switch(TEX0.PSM)
+		{
+		case PSM_PSMT8:
+		case PSM_PSMT8H:
+			ReadCLUT32_T32_I8(pCLUT, pCLUT32);
+			break;
+		case PSM_PSMT4:
+		case PSM_PSMT4HL:
+		case PSM_PSMT4HH:
+			ReadCLUT32_T32_I4(pCLUT, pCLUT32);
+			break;
+		}
+	}
+	else if(TEX0.CPSM == PSM_PSMCT16 || TEX0.CPSM == PSM_PSMCT16S)
+	{
+		switch(TEX0.PSM)
+		{
+		case PSM_PSMT8:
+		case PSM_PSMT8H:
+			ReadCLUT32_T16_I8(pCLUT, pCLUT32);
+			break;
+		case PSM_PSMT4:
+		case PSM_PSMT4HL:
+		case PSM_PSMT4HH:
+			ReadCLUT32_T16_I4(pCLUT, pCLUT32);
+			break;
+		}
+	}
+}
+
+void GSLocalMemory::SetupCLUT(GIFRegTEX0 TEX0)
+{
+	// TODO: cache m_pCLUT*
+
+	ReadCLUT(TEX0, m_pCLUT32);
+
+	switch(TEX0.PSM)
+	{
+	case PSM_PSMT4:
+	case PSM_PSMT4HL:
+	case PSM_PSMT4HH:
+		// sse2?
+		if(TEX0.CPSM == PSM_PSMCT32)
+		{
+			for(int j = 0, k = 0; j < 16; j++)
+				for(int i = 0; i < 16; i++, k++)
+					m_pCLUT64[k] = ((UINT64)m_pCLUT32[j] << 32) | m_pCLUT32[i];
+		}
+		else
+		{
+			for(int j = 0, k = 0; j < 16; j++)
+				for(int i = 0; i < 16; i++, k++)
+					m_pCLUT64[k] = ((UINT64)m_pCLUT32[j] << 16) | (m_pCLUT32[i] & 0xffff);
+		}
+		break;
+	}
+}
+
+//
+
+void GSLocalMemory::ReadCLUT32(GIFRegTEX0 TEX0, GIFRegTEXA TEXA, DWORD* pCLUT32)
+{
+	ASSERT(pCLUT32);
+
+	WORD* pCLUT = m_pCLUT + (TEX0.CSA << 4);
+
+	if(TEX0.CPSM == PSM_PSMCT32)
+	{
+		switch(TEX0.PSM)
+		{
+		case PSM_PSMT8:
+		case PSM_PSMT8H:
+			ReadCLUT32_T32_I8(pCLUT, pCLUT32);
+			break;
+		case PSM_PSMT4:
+		case PSM_PSMT4HL:
+		case PSM_PSMT4HH:
+			ReadCLUT32_T32_I4(pCLUT, pCLUT32);
+			break;
+		}
+	}
+	else if(TEX0.CPSM == PSM_PSMCT16 || TEX0.CPSM == PSM_PSMCT16S)
+	{
+		Expand16(pCLUT, pCLUT32, m_psm[TEX0.PSM].pal, &TEXA);
+	}
+}
+
+void GSLocalMemory::SetupCLUT32(GIFRegTEX0 TEX0, GIFRegTEXA TEXA)
+{
+	// TODO: cache m_pCLUT*
+
+	ReadCLUT32(TEX0, TEXA, m_pCLUT32);
+
+	switch(TEX0.PSM)
+	{
+	case PSM_PSMT4:
+	case PSM_PSMT4HL:
+	case PSM_PSMT4HH:
+		// sse2?
+		for(int j = 0, k = 0; j < 16; j++)
+			for(int i = 0; i < 16; i++, k++)
+				m_pCLUT64[k] = ((UINT64)m_pCLUT32[j] << 32) | m_pCLUT32[i];
+		break;
+	}
+}
+
+void GSLocalMemory::CopyCLUT32(DWORD* pCLUT32, int nPaletteEntries)
+{
+	memcpy(pCLUT32, m_pCLUT32, sizeof(DWORD)*nPaletteEntries);
+}
+
+////////////////////
+
+static void SwizzleTextureStep(int& tx, int& ty, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG)
+{
+//	if(ty == TRXREG.RRH && tx == TRXPOS.DSAX) ASSERT(0);
+
+	if(++tx == TRXREG.RRW)
+	{
+		tx = TRXPOS.DSAX;
+		ty++;
+	}
+}
+
+#define IsTopLeftAligned(dsax, tx, ty, bw, bh) \
+	(((dsax) & ((bw)-1)) == 0 && ((tx) & ((bw)-1)) == 0 && (dsax) == (tx) && ((ty) & ((bh)-1)) == 0)
+
+void GSLocalMemory::SwizzleTexture32(int& tx, int& ty, BYTE* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG)
+{
+	if(TRXREG.RRW == 0) return;
+
+	int tw = TRXREG.RRW, srcpitch = (TRXREG.RRW - TRXPOS.DSAX)*4;
+	int th = len / srcpitch;
+
+	bool fTopLeftAligned = IsTopLeftAligned(TRXPOS.DSAX, tx, ty, 8, 8);
+
+	if(!fTopLeftAligned || (tw & 7) || (th & 7) || (len % srcpitch))
+	{
+		if(fTopLeftAligned && tw >= 8 && th >= 8)
+		{
+			int twa = tw & ~7;
+			int tha = th & ~7;
+
+			len -= tha * srcpitch;
+			th -= tha;
+
+			for(int j = 0; j < tha; j += 8)
+			{
+				for(int x = tx; x < twa; x += 8)
+					SwizzleBlock32u((BYTE*)&m_vm32[blockAddress32(x, ty, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx)*4, srcpitch);
+
+				for(int i = 0; i < 8; i++, ty++, src += srcpitch)
+					for(int x = twa; x < tw; x++)
+						writePixel32(x, ty, ((DWORD*)src)[x - tx], BITBLTBUF.DBP, BITBLTBUF.DBW);
+			}
+		}
+
+		if(len > 0 && tw >= 8 && th >= 2 && IsTopLeftAligned(TRXPOS.DSAX, tx, ty, 8, 2))
+		{
+			int twa = tw & ~7;
+			int tha = th & ~1;
+
+			len -= tha * srcpitch;
+			th -= tha;
+
+			for(int j = 0; j < tha; j += 2)
+			{
+				for(int x = tx; x < twa; x += 8)
+					SwizzleColumn32(ty, (BYTE*)&m_vm32[blockAddress32(x, ty&~7, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx)*4, srcpitch);
+
+				for(int i = 0; i < 2; i++, ty++, src += srcpitch)
+					for(int x = twa; x < tw; x++)
+						writePixel32(x, ty, ((DWORD*)src)[x - tx], BITBLTBUF.DBP, BITBLTBUF.DBW);
+			}
+		}
+
+		SwizzleTextureX(tx, ty, src, len, BITBLTBUF, TRXPOS, TRXREG);
+	}
+	else
+	{
+		th += ty;
+
+		if((DWORD_PTR)src & 0xf)
+		{
+			for(int y = ty; y < th; y += 8, src += srcpitch*8)
+				for(int x = tx; x < tw; x += 8)
+					SwizzleBlock32u((BYTE*)&m_vm32[blockAddress32(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx)*4, srcpitch);
+		}
+		else
+		{
+			for(int y = ty; y < th; y += 8, src += srcpitch*8)
+				for(int x = tx; x < tw; x += 8)
+					SwizzleBlock32((BYTE*)&m_vm32[blockAddress32(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx)*4, srcpitch);
+		}
+
+		ty = th;
+	}
+}
+
+void GSLocalMemory::SwizzleTexture24(int& tx, int& ty, BYTE* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG)
+{
+	if(TRXREG.RRW == 0) return;
+
+	int tw = TRXREG.RRW, srcpitch = (TRXREG.RRW - TRXPOS.DSAX)*3;
+	int th = len / srcpitch;
+
+	bool fTopLeftAligned = IsTopLeftAligned(TRXPOS.DSAX, tx, ty, 8, 8);
+
+	if(!fTopLeftAligned || (tw & 7) || (th & 7) || (len % srcpitch))
+	{
+		// TODO
+
+		SwizzleTextureX(tx, ty, src, len, BITBLTBUF, TRXPOS, TRXREG);
+	}
+	else
+	{
+		__declspec(align(16)) DWORD block[8*8];
+
+		th += ty;
+
+		for(int y = ty; y < th; y += 8, src += srcpitch*8)
+		{
+			for(int x = tx; x < tw; x += 8)
+			{
+				BYTE* s = src + (x - tx)*3;
+				DWORD* d = block;
+
+				for(int j = 0, diff = srcpitch - 8*3; j < 8; j++, s += diff, d += 8)
+					for(int i = 0; i < 8; i++, s += 3)
+						d[i] = (s[2]<<16)|(s[1]<<8)|s[0];
+
+				SwizzleBlock32((BYTE*)&m_vm32[blockAddress32(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)], (BYTE*)block, sizeof(block)/8, 0x00ffffff);
+			}
+		}
+
+		ty = th;
+	}
+}
+
+void GSLocalMemory::SwizzleTexture16(int& tx, int& ty, BYTE* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG)
+{
+	if(TRXREG.RRW == 0) return;
+
+	int tw = TRXREG.RRW, srcpitch = (TRXREG.RRW - TRXPOS.DSAX)*2;
+	int th = len / srcpitch;
+
+	bool fTopLeftAligned = IsTopLeftAligned(TRXPOS.DSAX, tx, ty, 16, 8);
+
+	if(!fTopLeftAligned || (tw & 15) || (th & 7) || (len % srcpitch))
+	{
+		if(fTopLeftAligned && tw >= 16 && th >= 8)
+		{
+			int twa = tw & ~15;
+			int tha = th & ~7;
+
+			len -= tha * srcpitch;
+			th -= tha;
+
+			for(int j = 0; j < tha; j += 8)
+			{
+				for(int x = tx; x < twa; x += 16)
+					SwizzleBlock16u((BYTE*)&m_vm16[blockAddress16(x, ty, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx)*2, srcpitch);
+
+				for(int i = 0; i < 8; i++, ty++, src += srcpitch)
+					for(int x = twa; x < tw; x++)
+						writePixel16(x, ty, ((WORD*)src)[x - tx], BITBLTBUF.DBP, BITBLTBUF.DBW);
+			}
+		}
+
+		if(len > 0 && tw >= 16 && th >= 2 && IsTopLeftAligned(TRXPOS.DSAX, tx, ty, 16, 2))
+		{
+			int twa = tw & ~15;
+			int tha = th & ~1;
+
+			len -= tha * srcpitch;
+			th -= tha;
+
+			for(int j = 0; j < tha; j += 2)
+			{
+				for(int x = tx; x < twa; x += 16)
+					SwizzleColumn16(ty, (BYTE*)&m_vm16[blockAddress16(x, ty&~7, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx)*2, srcpitch);
+
+				for(int i = 0; i < 2; i++, ty++, src += srcpitch)
+					for(int x = twa; x < tw; x++)
+						writePixel16(x, ty, ((WORD*)src)[x - tx], BITBLTBUF.DBP, BITBLTBUF.DBW);
+			}
+		}
+
+		SwizzleTextureX(tx, ty, src, len, BITBLTBUF, TRXPOS, TRXREG);
+	}
+	else
+	{
+		th += ty;
+
+		if((DWORD_PTR)src & 0xf)
+		{
+			for(int y = ty; y < th; y += 8, src += srcpitch*8)
+				for(int x = tx; x < tw; x += 16)
+					SwizzleBlock16u((BYTE*)&m_vm16[blockAddress16(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx)*2, srcpitch);
+		}
+		else
+		{
+			for(int y = ty; y < th; y += 8, src += srcpitch*8)
+				for(int x = tx; x < tw; x += 16)
+					SwizzleBlock16((BYTE*)&m_vm16[blockAddress16(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx)*2, srcpitch);
+		}
+
+		ty = th;
+	}
+}
+
+void GSLocalMemory::SwizzleTexture16S(int& tx, int& ty, BYTE* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG)
+{
+	if(TRXREG.RRW == 0) return;
+
+	int tw = TRXREG.RRW, srcpitch = (TRXREG.RRW - TRXPOS.DSAX)*2;
+	int th = len / srcpitch;
+
+	bool fTopLeftAligned = IsTopLeftAligned(TRXPOS.DSAX, tx, ty, 16, 8);
+
+	if(!fTopLeftAligned || (tw & 15) || (th & 7) || (len % srcpitch))
+	{
+		if(fTopLeftAligned && tw >= 16 && th >= 8)
+		{
+			int twa = tw & ~15;
+			int tha = th & ~7;
+
+			len -= tha * srcpitch;
+			th -= tha;
+
+			for(int j = 0; j < tha; j += 8)
+			{
+				for(int x = tx; x < twa; x += 16)
+					SwizzleBlock16u((BYTE*)&m_vm16[blockAddress16S(x, ty, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx)*2, srcpitch);
+
+				for(int i = 0; i < 8; i++, ty++, src += srcpitch)
+					for(int x = twa; x < tw; x++)
+						writePixel16S(x, ty, ((WORD*)src)[x - tx], BITBLTBUF.DBP, BITBLTBUF.DBW);
+			}
+		}
+
+		if(len > 0 && tw >= 16 && th >= 2 && IsTopLeftAligned(TRXPOS.DSAX, tx, ty, 16, 2))
+		{
+			int twa = tw & ~15;
+			int tha = th & ~1;
+
+			len -= tha * srcpitch;
+			th -= tha;
+
+			for(int j = 0; j < tha; j += 2)
+			{
+				for(int x = tx; x < twa; x += 16)
+					SwizzleColumn16(ty, (BYTE*)&m_vm16[blockAddress16S(x, ty&~7, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx)*2, srcpitch);
+
+				for(int i = 0; i < 2; i++, ty++, src += srcpitch)
+					for(int x = twa; x < tw; x++)
+						writePixel16S(x, ty, ((WORD*)src)[x - tx], BITBLTBUF.DBP, BITBLTBUF.DBW);
+			}
+		}
+
+		SwizzleTextureX(tx, ty, src, len, BITBLTBUF, TRXPOS, TRXREG);
+	}
+	else
+	{
+		th += ty;
+
+		if((DWORD_PTR)src & 0xf)
+		{
+			for(int y = ty; y < th; y += 8, src += srcpitch*8)
+				for(int x = tx; x < tw; x += 16)
+					SwizzleBlock16((BYTE*)&m_vm16[blockAddress16S(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx)*2, srcpitch);
+		}
+		else
+		{
+			for(int y = ty; y < th; y += 8, src += srcpitch*8)
+				for(int x = tx; x < tw; x += 16)
+					SwizzleBlock16((BYTE*)&m_vm16[blockAddress16S(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx)*2, srcpitch);
+		}
+
+		ty = th;
+	}
+}
+
+void GSLocalMemory::SwizzleTexture8(int& tx, int& ty, BYTE* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG)
+{
+	if(TRXREG.RRW == 0) return;
+
+	int tw = TRXREG.RRW, srcpitch = TRXREG.RRW - TRXPOS.DSAX;
+	int th = len / srcpitch;
+
+	bool fTopLeftAligned = IsTopLeftAligned(TRXPOS.DSAX, tx, ty, 16, 16);
+
+	if(!fTopLeftAligned || (tw & 15) || (th & 15) || (len % srcpitch))
+	{
+		if(fTopLeftAligned && tw >= 16 && th >= 16)
+		{
+			int twa = tw & ~15;
+			int tha = th & ~15;
+
+			len -= tha * srcpitch;
+			th -= tha;
+
+			for(int j = 0; j < tha; j += 16)
+			{
+				for(int x = tx; x < twa; x += 16)
+					SwizzleBlock8u((BYTE*)&m_vm8[blockAddress8(x, ty, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx), srcpitch);
+
+				for(int i = 0; i < 16; i++, ty++, src += srcpitch)
+					for(int x = twa; x < tw; x++)
+						writePixel8(x, ty, src[x - tx], BITBLTBUF.DBP, BITBLTBUF.DBW);
+			}
+		}
+
+		if(len > 0 && tw >= 16 && th >= 4 && IsTopLeftAligned(TRXPOS.DSAX, tx, ty, 16, 4))
+		{
+			int twa = tw & ~15;
+			int tha = th & ~3;
+
+			len -= tha * srcpitch;
+			th -= tha;
+
+			for(int j = 0; j < tha; j += 4)
+			{
+				for(int x = tx; x < twa; x += 16)
+					SwizzleColumn8(ty, (BYTE*)&m_vm8[blockAddress8(x, ty&~15, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx), srcpitch);
+
+				for(int i = 0; i < 4; i++, ty++, src += srcpitch)
+					for(int x = twa; x < tw; x++)
+						writePixel8(x, ty, src[x - tx], BITBLTBUF.DBP, BITBLTBUF.DBW);
+			}
+		}
+
+		SwizzleTextureX(tx, ty, src, len, BITBLTBUF, TRXPOS, TRXREG);
+	}
+	else
+	{
+		th += ty;
+
+		if((DWORD_PTR)src & 0xf)
+		{
+			for(int y = ty; y < th; y += 16, src += srcpitch*16)
+				for(int x = tx; x < tw; x += 16)
+					SwizzleBlock8u((BYTE*)&m_vm8[blockAddress8(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx), srcpitch);
+		}
+		else
+		{
+			for(int y = ty; y < th; y += 16, src += srcpitch*16)
+				for(int x = tx; x < tw; x += 16)
+					SwizzleBlock8((BYTE*)&m_vm8[blockAddress8(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)], src + (x - tx), srcpitch);
+		}
+
+		ty = th;
+	}
+}
+
+void GSLocalMemory::SwizzleTexture8H(int& tx, int& ty, BYTE* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG)
+{
+	if(TRXREG.RRW == 0) return;
+
+	int tw = TRXREG.RRW, srcpitch = TRXREG.RRW - TRXPOS.DSAX;
+	int th = len / srcpitch;
+
+	bool fTopLeftAligned = IsTopLeftAligned(TRXPOS.DSAX, tx, ty, 8, 8);
+
+	if(!fTopLeftAligned || (tw & 7) || (th & 7) || (len % srcpitch))
+	{
+		// TODO
+
+		SwizzleTextureX(tx, ty, src, len, BITBLTBUF, TRXPOS, TRXREG);
+	}
+	else
+	{
+		__declspec(align(16)) DWORD block[8*8];
+
+		th += ty;
+
+		for(int y = ty; y < th; y += 8, src += srcpitch*8)
+		{
+			for(int x = tx; x < tw; x += 8)
+			{
+				BYTE* s = src + (x - tx);
+				DWORD* d = block;
+
+				for(int j = 0; j < 8; j++, s += srcpitch, d += 8)
+					for(int i = 0; i < 8; i++)
+						d[i] = s[i] << 24;
+
+				SwizzleBlock32((BYTE*)&m_vm32[blockAddress32(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)], (BYTE*)block, sizeof(block)/8, 0xff000000);
+			}
+		}
+
+		ty = th;
+	}
+}
+
+void GSLocalMemory::SwizzleTexture4(int& tx, int& ty, BYTE* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG)
+{
+	if(TRXREG.RRW == 0) return;
+
+	int tw = TRXREG.RRW, srcpitch = (TRXREG.RRW - TRXPOS.DSAX)/2;
+	int th = len / srcpitch;
+
+	bool fTopLeftAligned = IsTopLeftAligned(TRXPOS.DSAX, tx, ty, 32, 16);
+
+	if(!fTopLeftAligned || (tw & 31) || (th & 15) || (len % srcpitch))
+	{
+		if(fTopLeftAligned && tw >= 32 && th >= 16)
+		{
+			int twa = tw & ~31;
+			int tha = th & ~15;
+
+			len -= tha * srcpitch;
+			th -= tha;
+
+			for(int j = 0; j < tha; j += 16)
+			{
+				for(int x = tx; x < twa; x += 32)
+					SwizzleBlock4u((BYTE*)&m_vm8[blockAddress4(x, ty, BITBLTBUF.DBP, BITBLTBUF.DBW)>>1], src + (x - tx)/2, srcpitch);
+
+				for(int i = 0; i < 16; i++, ty++, src += srcpitch)
+				{
+					BYTE* s = src + (twa - tx)/2;
+
+					for(int x = twa; x < tw; x += 2, s++)
+					{
+						writePixel4(x, ty, *s&0xf, BITBLTBUF.DBP, BITBLTBUF.DBW),
+						writePixel4(x+1, ty, *s>>4, BITBLTBUF.DBP, BITBLTBUF.DBW);
+					}
+				}
+			}
+		}
+
+		if(len > 0 && tw >= 32 && th >= 4 && IsTopLeftAligned(TRXPOS.DSAX, tx, ty, 32, 4))
+		{
+			int twa = tw & ~31;
+			int tha = th & ~3;
+
+			len -= tha * srcpitch;
+			th -= tha;
+
+			for(int j = 0; j < tha; j += 4)
+			{
+				for(int x = tx; x < twa; x += 32)
+					SwizzleColumn4(ty, (BYTE*)&m_vm8[blockAddress4(x, ty&~15, BITBLTBUF.DBP, BITBLTBUF.DBW)>>1], src + (x - tx)/2, srcpitch);
+
+				for(int i = 0; i < 4; i++, ty++, src += srcpitch)
+				{
+					BYTE* s = src + (twa - tx)/2;
+
+					for(int x = twa; x < tw; x += 2, s++)
+					{
+						writePixel4(x, ty, *s&0xf, BITBLTBUF.DBP, BITBLTBUF.DBW),
+						writePixel4(x+1, ty, *s>>4, BITBLTBUF.DBP, BITBLTBUF.DBW);
+					}
+				}
+			}
+		}
+
+		SwizzleTextureX(tx, ty, src, len, BITBLTBUF, TRXPOS, TRXREG);
+	}
+	else
+	{
+		th += ty;
+
+		if((DWORD_PTR)src & 0xf)
+		{
+			for(int y = ty; y < th; y += 16, src += srcpitch*16)
+				for(int x = tx; x < tw; x += 32)
+					SwizzleBlock4u((BYTE*)&m_vm8[blockAddress4(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)>>1], src + (x - tx)/2, srcpitch);
+		}
+		else
+		{
+			for(int y = ty; y < th; y += 16, src += srcpitch*16)
+				for(int x = tx; x < tw; x += 32)
+					SwizzleBlock4((BYTE*)&m_vm8[blockAddress4(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)>>1], src + (x - tx)/2, srcpitch);
+		}
+
+		ty = th;
+	}
+}
+
+void GSLocalMemory::SwizzleTexture4HL(int& tx, int& ty, BYTE* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG)
+{
+	if(TRXREG.RRW == 0) return;
+
+	int tw = TRXREG.RRW, srcpitch = (TRXREG.RRW - TRXPOS.DSAX)/2;
+	int th = len / srcpitch;
+
+	bool fTopLeftAligned = IsTopLeftAligned(TRXPOS.DSAX, tx, ty, 8, 8);
+
+	if(!fTopLeftAligned || (tw & 7) || (th & 7) || (len % srcpitch))
+	{
+		// TODO
+
+		SwizzleTextureX(tx, ty, src, len, BITBLTBUF, TRXPOS, TRXREG);
+	}
+	else
+	{
+		__declspec(align(16)) DWORD block[8*8];
+
+		th += ty;
+
+		for(int y = ty; y < th; y += 8, src += srcpitch*8)
+		{
+			for(int x = tx; x < tw; x += 8)
+			{
+				BYTE* s = src + (x - tx)/2;
+				DWORD* d = block;
+
+				for(int j = 0; j < 8; j++, s += srcpitch, d += 8)
+					for(int i = 0; i < 8/2; i++)
+						d[i*2] = (s[i]&0x0f) << 24, 
+						d[i*2+1] = (s[i]&0xf0) << 20;
+
+				SwizzleBlock32((BYTE*)&m_vm32[blockAddress32(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)], (BYTE*)block, sizeof(block)/8, 0x0f000000);
+			}
+		}
+
+		ty = th;
+	}
+}
+
+void GSLocalMemory::SwizzleTexture4HH(int& tx, int& ty, BYTE* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG)
+{
+	if(TRXREG.RRW == 0) return;
+
+	int tw = TRXREG.RRW, srcpitch = (TRXREG.RRW - TRXPOS.DSAX)/2;
+	int th = len / srcpitch;
+
+	bool fTopLeftAligned = IsTopLeftAligned(TRXPOS.DSAX, tx, ty, 8, 8);
+
+	if(!fTopLeftAligned || (tw & 7) || (th & 7) || (len % srcpitch))
+	{
+		// TODO
+
+		SwizzleTextureX(tx, ty, src, len, BITBLTBUF, TRXPOS, TRXREG);
+	}
+	else
+	{
+		__declspec(align(16)) DWORD block[8*8];
+
+		th += ty;
+
+		for(int y = ty; y < th; y += 8, src += srcpitch*8)
+		{
+			for(int x = tx; x < tw; x += 8)
+			{
+				BYTE* s = src + (x - tx)/2;
+				DWORD* d = block;
+
+				for(int j = 0; j < 8; j++, s += srcpitch, d += 8)
+					for(int i = 0; i < 8/2; i++)
+						d[i*2] = (s[i]&0x0f) << 28, 
+						d[i*2+1] = (s[i]&0xf0) << 24;
+
+				SwizzleBlock32((BYTE*)&m_vm32[blockAddress32(x, y, BITBLTBUF.DBP, BITBLTBUF.DBW)], (BYTE*)block, sizeof(block)/8, 0xf0000000);
+			}
+		}
+
+		ty = th;
+	}
+}
+
+void GSLocalMemory::SwizzleTextureX(int& tx, int& ty, BYTE* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG)
+{
+	if(len <= 0) return;
+
+	BYTE* pb = (BYTE*)src;
+	WORD* pw = (WORD*)src;
+	DWORD* pd = (DWORD*)src;
+
+	// if(ty >= (int)TRXREG.RRH) {ASSERT(0); return;}
+
+	switch(BITBLTBUF.DPSM)
+	{
+	case PSM_PSMCT32:
+		for(len /= 4; len-- > 0; SwizzleTextureStep(tx, ty, TRXPOS, TRXREG), pd++)
+			writePixel32(tx, ty, *pd, BITBLTBUF.DBP, BITBLTBUF.DBW);
+		break;
+	case PSM_PSMCT24:
+		for(len /= 3; len-- > 0; SwizzleTextureStep(tx, ty, TRXPOS, TRXREG), pb+=3)
+			writePixel24(tx, ty, *(DWORD*)pb, BITBLTBUF.DBP, BITBLTBUF.DBW);
+		break;
+	case PSM_PSMCT16:
+		for(len /= 2; len-- > 0; SwizzleTextureStep(tx, ty, TRXPOS, TRXREG), pw++)
+			writePixel16(tx, ty, *pw, BITBLTBUF.DBP, BITBLTBUF.DBW);
+		break;
+	case PSM_PSMCT16S:
+		for(len /= 2; len-- > 0; SwizzleTextureStep(tx, ty, TRXPOS, TRXREG), pw++)
+			writePixel16S(tx, ty, *pw, BITBLTBUF.DBP, BITBLTBUF.DBW);
+		break;
+	case PSM_PSMT8:
+		for(; len-- > 0; SwizzleTextureStep(tx, ty, TRXPOS, TRXREG), pb++)
+			writePixel8(tx, ty, *pb, BITBLTBUF.DBP, BITBLTBUF.DBW);
+		break;
+	case PSM_PSMT4:
+		for(; len-- > 0; SwizzleTextureStep(tx, ty, TRXPOS, TRXREG), SwizzleTextureStep(tx, ty, TRXPOS, TRXREG), pb++)
+			writePixel4(tx, ty, *pb&0xf, BITBLTBUF.DBP, BITBLTBUF.DBW),
+			writePixel4(tx+1, ty, *pb>>4, BITBLTBUF.DBP, BITBLTBUF.DBW);
+		break;
+	case PSM_PSMT8H:
+		for(; len-- > 0; SwizzleTextureStep(tx, ty, TRXPOS, TRXREG), pb++)
+			writePixel8H(tx, ty, *pb, BITBLTBUF.DBP, BITBLTBUF.DBW);
+		break;
+	case PSM_PSMT4HL:
+		for(; len-- > 0; SwizzleTextureStep(tx, ty, TRXPOS, TRXREG), SwizzleTextureStep(tx, ty, TRXPOS, TRXREG), pb++)
+			writePixel4HL(tx, ty, *pb&0xf, BITBLTBUF.DBP, BITBLTBUF.DBW),
+			writePixel4HL(tx+1, ty, *pb>>4, BITBLTBUF.DBP, BITBLTBUF.DBW);
+		break;
+	case PSM_PSMT4HH:
+		for(; len-- > 0; SwizzleTextureStep(tx, ty, TRXPOS, TRXREG), SwizzleTextureStep(tx, ty, TRXPOS, TRXREG), pb++)
+			writePixel4HH(tx, ty, *pb&0xf, BITBLTBUF.DBP, BITBLTBUF.DBW),
+			writePixel4HH(tx+1, ty, *pb>>4, BITBLTBUF.DBP, BITBLTBUF.DBW);
+		break;
+	case PSM_PSMZ32:
+		for(len /= 4; len-- > 0; SwizzleTextureStep(tx, ty, TRXPOS, TRXREG), pd++)
+			writePixel32Z(tx, ty, *pd, BITBLTBUF.DBP, BITBLTBUF.DBW);
+		break;
+	case PSM_PSMZ24:
+		for(len /= 3; len-- > 0; SwizzleTextureStep(tx, ty, TRXPOS, TRXREG), pb+=3)
+			writePixel24Z(tx, ty, *(DWORD*)pb, BITBLTBUF.DBP, BITBLTBUF.DBW);
+		break;
+	case PSM_PSMZ16:
+		for(len /= 2; len-- > 0; SwizzleTextureStep(tx, ty, TRXPOS, TRXREG), pw++)
+			writePixel16Z(tx, ty, *pw, BITBLTBUF.DBP, BITBLTBUF.DBW);
+		break;
+	case PSM_PSMZ16S:
+		for(len /= 2; len-- > 0; SwizzleTextureStep(tx, ty, TRXPOS, TRXREG), pw++)
+			writePixel16SZ(tx, ty, *pw, BITBLTBUF.DBP, BITBLTBUF.DBW);
+		break;
+	}
+}
+
+///////////////////
+
+void GSLocalMemory::unSwizzleTexture32(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA)
+{
+	FOREACH_BLOCK_START(r, 8, 8, 32)
+	{
+		unSwizzleBlock32((BYTE*)&m_vm32[blockAddress32(x, y, TEX0.TBP0, TEX0.TBW)], ptr + (x-r.left)*4, dstpitch);
+	}
+	FOREACH_BLOCK_END
+}
+
+void GSLocalMemory::unSwizzleTexture24(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA)
+{
+	FOREACH_BLOCK_START(r, 8, 8, 32)
+	{
+		__declspec(align(16)) DWORD block[8*8];
+		unSwizzleBlock32((BYTE*)&m_vm32[blockAddress32(x, y, TEX0.TBP0, TEX0.TBW)], (BYTE*)block, sizeof(block)/8);
+		ExpandBlock24(block, (DWORD*)ptr + (x-r.left), dstpitch, &TEXA);
+	}
+	FOREACH_BLOCK_END
+}
+
+void GSLocalMemory::unSwizzleTexture16(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA)
+{
+	FOREACH_BLOCK_START(r, 16, 8, 16)
+	{
+		__declspec(align(16)) WORD block[16*8];
+		unSwizzleBlock16((BYTE*)&m_vm16[blockAddress16(x, y, TEX0.TBP0, TEX0.TBW)], (BYTE*)block, sizeof(block)/8);
+		ExpandBlock16(block, (DWORD*)ptr + (x-r.left), dstpitch, &TEXA);
+	}
+	FOREACH_BLOCK_END
+}
+
+void GSLocalMemory::unSwizzleTexture16S(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA)
+{
+	FOREACH_BLOCK_START(r, 16, 8, 16S)
+	{
+		__declspec(align(16)) WORD block[16*8];
+		unSwizzleBlock16((BYTE*)&m_vm16[blockAddress16S(x, y, TEX0.TBP0, TEX0.TBW)], (BYTE*)block, sizeof(block)/8);
+		ExpandBlock16(block, (DWORD*)ptr + (x-r.left), dstpitch, &TEXA);
+	}
+	FOREACH_BLOCK_END
+}
+
+void GSLocalMemory::unSwizzleTexture8(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA)
+{
+	FOREACH_BLOCK_START(r, 16, 16, 8)
+	{
+		__declspec(align(16)) BYTE block[16*16];
+		unSwizzleBlock8((BYTE*)&m_vm8[blockAddress8(x, y, TEX0.TBP0, TEX0.TBW)], (BYTE*)block, sizeof(block)/16);
+
+		BYTE* s = block;
+		BYTE* d = ptr + (x-r.left)*4;
+
+		for(int j = 0; j < 16; j++, s += 16, d += dstpitch)
+			for(int i = 0; i < 16; i++)
+				((DWORD*)d)[i] = m_pCLUT32[s[i]];
+	}
+	FOREACH_BLOCK_END
+}
+
+void GSLocalMemory::unSwizzleTexture8H(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA)
+{
+	FOREACH_BLOCK_START(r, 8, 8, 32)
+	{
+		__declspec(align(16)) DWORD block[8*8];
+		unSwizzleBlock32((BYTE*)&m_vm32[blockAddress32(x, y, TEX0.TBP0, TEX0.TBW)], (BYTE*)block, sizeof(block)/8);
+
+		DWORD* s = block;
+		BYTE* d = ptr + (x-r.left)*4;
+
+		for(int j = 0; j < 8; j++, s += 8, d += dstpitch)
+			for(int i = 0; i < 8; i++)
+				((DWORD*)d)[i] = m_pCLUT32[s[i] >> 24];
+	}
+	FOREACH_BLOCK_END
+}
+
+void GSLocalMemory::unSwizzleTexture4(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA)
+{
+	FOREACH_BLOCK_START(r, 32, 16, 4)
+	{
+		__declspec(align(16)) BYTE block[(32/2)*16];
+		unSwizzleBlock4((BYTE*)&m_vm8[blockAddress4(x, y, TEX0.TBP0, TEX0.TBW)>>1], (BYTE*)block, sizeof(block)/16);
+
+		BYTE* s = block;
+		BYTE* d = ptr + (x-r.left)*4;
+
+		for(int j = 0; j < 16; j++, s += 32/2, d += dstpitch)
+			for(int i = 0; i < 32/2; i++)
+				((UINT64*)d)[i] = m_pCLUT64[s[i]];
+	}
+	FOREACH_BLOCK_END
+}
+
+void GSLocalMemory::unSwizzleTexture4HL(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA)
+{
+	FOREACH_BLOCK_START(r, 8, 8, 32)
+	{
+		__declspec(align(16)) DWORD block[8*8];
+		unSwizzleBlock32((BYTE*)&m_vm32[blockAddress32(x, y, TEX0.TBP0, TEX0.TBW)], (BYTE*)block, sizeof(block)/8);
+
+		DWORD* s = block;
+		BYTE* d = ptr + (x-r.left)*4;
+
+		for(int j = 0; j < 8; j++, s += 8, d += dstpitch)
+			for(int i = 0; i < 8; i++)
+				((DWORD*)d)[i] = m_pCLUT32[(s[i] >> 24)&0x0f];
+	}
+	FOREACH_BLOCK_END
+}
+
+void GSLocalMemory::unSwizzleTexture4HH(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA)
+{
+	FOREACH_BLOCK_START(r, 8, 8, 32)
+	{
+		__declspec(align(16)) DWORD block[8*8];
+		unSwizzleBlock32((BYTE*)&m_vm32[blockAddress32(x, y, TEX0.TBP0, TEX0.TBW)], (BYTE*)block, sizeof(block)/8);
+
+		DWORD* s = block;
+		BYTE* d = ptr + (x-r.left)*4;
+
+		for(int j = 0; j < 8; j++, s += 8, d += dstpitch)
+			for(int i = 0; i < 8; i++)
+				((DWORD*)d)[i] = m_pCLUT32[s[i] >> 28];
+	}
+	FOREACH_BLOCK_END
+}
+
+///////////////////
+
+void GSLocalMemory::ReadTexture(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA, GIFRegCLAMP& CLAMP)
+{
+	unSwizzleTexture st = m_psm[TEX0.PSM].ust;
+	readTexel rt = m_psm[TEX0.PSM].rt;
+	CSize bs = m_psm[TEX0.PSM].bs;
+
+	if(r.Width() < bs.cx || r.Height() < bs.cy 
+	|| (r.left & (bs.cx-1)) || (r.top & (bs.cy-1)) 
+	|| (r.right & (bs.cx-1)) || (r.bottom & (bs.cy-1)) 
+	|| (CLAMP.WMS == 3) || (CLAMP.WMT == 3))
+	{
+		ReadTexture<DWORD>(r, dst, dstpitch, TEX0, TEXA, CLAMP, rt, st);
+	}
+	else
+	{
+		(this->*st)(r, dst, dstpitch, TEX0, TEXA);
+	}
+}
+
+///////////////////
+
+void GSLocalMemory::unSwizzleTexture16P(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA)
+{
+	FOREACH_BLOCK_START(r, 16, 8, 16)
+	{
+		unSwizzleBlock16((BYTE*)&m_vm16[blockAddress16(x, y, TEX0.TBP0, TEX0.TBW)], ptr + (x-r.left)*2, dstpitch);
+	}
+	FOREACH_BLOCK_END
+}
+
+void GSLocalMemory::unSwizzleTexture16SP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA)
+{
+	FOREACH_BLOCK_START(r, 16, 8, 16S)
+	{
+		unSwizzleBlock16((BYTE*)&m_vm16[blockAddress16S(x, y, TEX0.TBP0, TEX0.TBW)], ptr + (x-r.left)*2, dstpitch);
+	}
+	FOREACH_BLOCK_END
+}
+
+void GSLocalMemory::unSwizzleTexture8P(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA)
+{
+	FOREACH_BLOCK_START(r, 16, 16, 8)
+	{
+		unSwizzleBlock8((BYTE*)&m_vm8[blockAddress8(x, y, TEX0.TBP0, TEX0.TBW)], ptr + (x-r.left), dstpitch);
+	}
+	FOREACH_BLOCK_END
+}
+
+void GSLocalMemory::unSwizzleTexture8HP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA)
+{
+	FOREACH_BLOCK_START(r, 8, 8, 32)
+	{
+		unSwizzleBlock8HP((BYTE*)&m_vm32[blockAddress32(x, y, TEX0.TBP0, TEX0.TBW)], ptr + (x-r.left), dstpitch);
+	}
+	FOREACH_BLOCK_END
+}
+
+void GSLocalMemory::unSwizzleTexture4P(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA)
+{
+	FOREACH_BLOCK_START(r, 32, 16, 4)
+	{
+		unSwizzleBlock4P((BYTE*)&m_vm8[blockAddress4(x, y, TEX0.TBP0, TEX0.TBW)>>1], ptr + (x-r.left), dstpitch);
+	}
+	FOREACH_BLOCK_END
+}
+
+void GSLocalMemory::unSwizzleTexture4HLP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA)
+{
+	FOREACH_BLOCK_START(r, 8, 8, 32)
+	{
+		unSwizzleBlock4HLP((BYTE*)&m_vm32[blockAddress32(x, y, TEX0.TBP0, TEX0.TBW)], ptr + (x-r.left), dstpitch);
+	}
+	FOREACH_BLOCK_END
+}
+
+void GSLocalMemory::unSwizzleTexture4HHP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA)
+{
+	FOREACH_BLOCK_START(r, 8, 8, 32)
+	{
+		unSwizzleBlock4HHP((BYTE*)&m_vm32[blockAddress32(x, y, TEX0.TBP0, TEX0.TBW)], ptr + (x-r.left), dstpitch);
+	}
+	FOREACH_BLOCK_END
+}
+
+///////////////////
+
+void GSLocalMemory::ReadTextureP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA, GIFRegCLAMP& CLAMP)
+{
+	unSwizzleTexture st = m_psm[TEX0.PSM].ustP;
+	readTexel rt = m_psm[TEX0.PSM].rtP;
+	CSize bs = m_psm[TEX0.PSM].bs;
+
+	if(r.Width() < bs.cx || r.Height() < bs.cy 
+	|| (r.left & (bs.cx-1)) || (r.top & (bs.cy-1)) 
+	|| (r.right & (bs.cx-1)) || (r.bottom & (bs.cy-1)) 
+	|| (CLAMP.WMS == 3) || (CLAMP.WMT == 3))
+	{
+		switch(TEX0.PSM)
+		{
+		default:
+			ASSERT(0);
+		case PSM_PSMCT32:
+		case PSM_PSMCT24:
+			ReadTexture<DWORD>(r, dst, dstpitch, TEX0, TEXA, CLAMP, rt, st);
+			break;
+		case PSM_PSMCT16:
+		case PSM_PSMCT16S:
+			ReadTexture<WORD>(r, dst, dstpitch, TEX0, TEXA, CLAMP, rt, st);
+			break;
+		case PSM_PSMT8:
+		case PSM_PSMT8H:
+		case PSM_PSMT4:
+		case PSM_PSMT4HL:
+		case PSM_PSMT4HH:
+			ReadTexture<BYTE>(r, dst, dstpitch, TEX0, TEXA, CLAMP, rt, st);
+			break;
+		}
+	}
+	else
+	{
+		(this->*st)(r, dst, dstpitch, TEX0, TEXA);
+	}
+}
+
+///////////////////
+
+void GSLocalMemory::unSwizzleTexture8NP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA)
+{
+	FOREACH_BLOCK_START(r, 16, 16, 8)
+	{
+		__declspec(align(16)) BYTE block[16*16];
+		unSwizzleBlock8((BYTE*)&m_vm8[blockAddress8(x, y, TEX0.TBP0, TEX0.TBW)], (BYTE*)block, sizeof(block)/16);
+
+		BYTE* s = block;
+
+		if(TEX0.CPSM == PSM_PSMCT32)
+		{
+			BYTE* d = ptr + (x-r.left)*4;
+			for(int j = 0; j < 16; j++, s += 16, d += dstpitch)
+				for(int i = 0; i < 16; i++)
+					((DWORD*)d)[i] = m_pCLUT32[s[i]];
+		}
+		else
+		{
+			ASSERT(TEX0.CPSM == PSM_PSMCT16 || TEX0.CPSM == PSM_PSMCT16S);
+
+			BYTE* d = ptr + (x-r.left)*2;
+			for(int j = 0; j < 16; j++, s += 16, d += dstpitch)
+				for(int i = 0; i < 16; i++)
+					((WORD*)d)[i] = (WORD)m_pCLUT32[s[i]];
+		}
+	}
+	FOREACH_BLOCK_END
+}
+
+void GSLocalMemory::unSwizzleTexture8HNP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA)
+{
+	FOREACH_BLOCK_START(r, 8, 8, 32)
+	{
+		__declspec(align(16)) DWORD block[8*8];
+		unSwizzleBlock32((BYTE*)&m_vm32[blockAddress32(x, y, TEX0.TBP0, TEX0.TBW)], (BYTE*)block, sizeof(block)/8);
+
+		DWORD* s = block;
+
+		if(TEX0.CPSM == PSM_PSMCT32)
+		{
+			BYTE* d = ptr + (x-r.left)*4;
+			for(int j = 0; j < 8; j++, s += 8, d += dstpitch)
+				for(int i = 0; i < 8; i++)
+					((DWORD*)d)[i] = m_pCLUT32[s[i] >> 24];
+		}
+		else
+		{
+			ASSERT(TEX0.CPSM == PSM_PSMCT16 || TEX0.CPSM == PSM_PSMCT16S);
+
+			BYTE* d = ptr + (x-r.left)*2;
+			for(int j = 0; j < 8; j++, s += 8, d += dstpitch)
+				for(int i = 0; i < 8; i++)
+					((WORD*)d)[i] = (WORD)m_pCLUT32[s[i] >> 24];
+		}
+	}
+	FOREACH_BLOCK_END
+}
+
+void GSLocalMemory::unSwizzleTexture4NP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA)
+{
+	FOREACH_BLOCK_START(r, 32, 16, 4)
+	{
+		__declspec(align(16)) BYTE block[(32/2)*16];
+		unSwizzleBlock4((BYTE*)&m_vm8[blockAddress4(x, y, TEX0.TBP0, TEX0.TBW)>>1], (BYTE*)block, sizeof(block)/16);
+
+		BYTE* s = block;
+
+		if(TEX0.CPSM == PSM_PSMCT32)
+		{
+			BYTE* d = ptr + (x-r.left)*4;
+
+			for(int j = 0; j < 16; j++, s += 32/2, d += dstpitch)
+				for(int i = 0; i < 32/2; i++)
+					((UINT64*)d)[i] = m_pCLUT64[s[i]];
+		}
+		else
+		{
+			ASSERT(TEX0.CPSM == PSM_PSMCT16 || TEX0.CPSM == PSM_PSMCT16S);
+
+			BYTE* d = ptr + (x-r.left)*2;
+			for(int j = 0; j < 16; j++, s += 32/2, d += dstpitch)
+				for(int i = 0; i < 32/2; i++)
+					((DWORD*)d)[i] = (DWORD)m_pCLUT64[s[i]];
+		}
+	}
+	FOREACH_BLOCK_END
+}
+
+void GSLocalMemory::unSwizzleTexture4HLNP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA)
+{
+	FOREACH_BLOCK_START(r, 8, 8, 32)
+	{
+		__declspec(align(16)) DWORD block[8*8];
+		unSwizzleBlock32((BYTE*)&m_vm32[blockAddress32(x, y, TEX0.TBP0, TEX0.TBW)], (BYTE*)block, sizeof(block)/8);
+
+		DWORD* s = block;
+
+		if(TEX0.CPSM == PSM_PSMCT32)
+		{
+			BYTE* d = ptr + (x-r.left)*4;
+			for(int j = 0; j < 8; j++, s += 8, d += dstpitch)
+				for(int i = 0; i < 8; i++)
+					((DWORD*)d)[i] = m_pCLUT32[(s[i] >> 24)&0x0f];
+		}
+		else
+		{
+			ASSERT(TEX0.CPSM == PSM_PSMCT16 || TEX0.CPSM == PSM_PSMCT16S);
+
+			BYTE* d = ptr + (x-r.left)*2;
+			for(int j = 0; j < 8; j++, s += 8, d += dstpitch)
+				for(int i = 0; i < 8; i++)
+					((WORD*)d)[i] = (WORD)m_pCLUT32[(s[i] >> 24)&0x0f];
+		}
+	}
+	FOREACH_BLOCK_END
+}
+
+void GSLocalMemory::unSwizzleTexture4HHNP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA)
+{
+	FOREACH_BLOCK_START(r, 8, 8, 32)
+	{
+		__declspec(align(16)) DWORD block[8*8];
+		unSwizzleBlock32((BYTE*)&m_vm32[blockAddress32(x, y, TEX0.TBP0, TEX0.TBW)], (BYTE*)block, sizeof(block)/8);
+
+		DWORD* s = block;
+
+		if(TEX0.CPSM == PSM_PSMCT32)
+		{
+			BYTE* d = ptr + (x-r.left)*4;
+			for(int j = 0; j < 8; j++, s += 8, d += dstpitch)
+				for(int i = 0; i < 8; i++)
+					((DWORD*)d)[i] = m_pCLUT32[s[i] >> 28];
+		}
+		else
+		{
+			ASSERT(TEX0.CPSM == PSM_PSMCT16 || TEX0.CPSM == PSM_PSMCT16S);
+
+			BYTE* d = ptr + (x-r.left)*2;
+			for(int j = 0; j < 8; j++, s += 8, d += dstpitch)
+				for(int i = 0; i < 8; i++)
+					((WORD*)d)[i] = (WORD)m_pCLUT32[s[i] >> 28];
+		}
+	}
+	FOREACH_BLOCK_END
+}
+
+///////////////////
+
+void GSLocalMemory::ReadTextureNP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA, GIFRegCLAMP& CLAMP)
+{
+	unSwizzleTexture st = m_psm[TEX0.PSM].ustNP;
+	readTexel rt = m_psm[TEX0.PSM].rtNP;
+	CSize bs = m_psm[TEX0.PSM].bs;
+
+	if(r.Width() < bs.cx || r.Height() < bs.cy 
+	|| (r.left & (bs.cx-1)) || (r.top & (bs.cy-1)) 
+	|| (r.right & (bs.cx-1)) || (r.bottom & (bs.cy-1)) 
+	|| (CLAMP.WMS == 3) || (CLAMP.WMT == 3))
+	{
+		switch(TEX0.PSM)
+		{
+		default:
+		case PSM_PSMCT32:
+		case PSM_PSMCT24:
+			ReadTexture<DWORD>(r, dst, dstpitch, TEX0, TEXA, CLAMP, rt, st);
+			break;
+		case PSM_PSMCT16:
+		case PSM_PSMCT16S:
+			ReadTexture<WORD>(r, dst, dstpitch, TEX0, TEXA, CLAMP, rt, st);
+			break;
+		case PSM_PSMT8:
+		case PSM_PSMT8H:
+		case PSM_PSMT4:
+		case PSM_PSMT4HL:
+		case PSM_PSMT4HH:
+			switch(TEX0.CPSM)
+			{
+			default:
+				ASSERT(0);
+			case PSM_PSMCT32:
+				ReadTexture<DWORD>(r, dst, dstpitch, TEX0, TEXA, CLAMP, rt, st);
+				break;
+			case PSM_PSMCT16:
+			case PSM_PSMCT16S:
+				ReadTexture<WORD>(r, dst, dstpitch, TEX0, TEXA, CLAMP, rt, st);
+				break;
+			}
+			break;
+		}
+	}
+	else
+	{
+		(this->*st)(r, dst, dstpitch, TEX0, TEXA);
+	}
+}
+
+//
+
+template<typename T> 
+void GSLocalMemory::ReadTexture(CRect r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA, GIFRegCLAMP& CLAMP, readTexel rt, unSwizzleTexture st)
+{
+	// this function is not thread safe!
+
+	DWORD wms = CLAMP.WMS, wmt = CLAMP.WMT;
+	DWORD minu = CLAMP.MINU, maxu = CLAMP.MAXU;
+	DWORD minv = CLAMP.MINV, maxv = CLAMP.MAXV;
+
+	if(wms == 2)
+	{
+		r.left = min(r.right, max(r.left, (int)minu));
+		r.right = max(r.left, min(r.right, (int)maxu));
+	}
+
+	if(wmt == 2)
+	{
+		r.top = min(r.bottom, max(r.top, (int)minv));
+		r.bottom = max(r.top, min(r.bottom, (int)maxv));
+	}
+
+	CSize bs = m_psm[TEX0.PSM].bs;
+
+	int bsxm = bs.cx - 1;
+	int bsym = bs.cy - 1;
+
+	CRect cr((r.left + bsxm) & ~bsxm, (r.top + bsym) & ~bsym, r.right & ~bsxm, r.bottom & ~bsym);
+
+	bool aligned = ((DWORD_PTR)(dst + (cr.left - r.left) * sizeof(T)) & 0xf) == 0;
+
+	if(wms == 3 || wmt == 3) // TODO: do region repeat in pixel shader
+	{
+		if(wms == 3 && wmt == 3)
+		{
+			int w = minu + 1;
+			int h = minv + 1;
+
+			w = (w + bsxm) & ~bsxm;
+			h = (h + bsym) & ~bsym;
+
+			if(w % bs.cx == 0 && maxu % bs.cx == 0 && h % bs.cy == 0 && maxv % bs.cy == 0)
+			{
+//printf("!!! 3 wms = %d, wmt = %d, %3x %3x %3x %3x, %d %d - %d %d\n", wms, wmt, minu, maxu, minv, maxv, r.left, r.top, r.right, r.bottom);
+
+				T* buff = (T*)_aligned_malloc(w * h * sizeof(T), 16);
+
+				(this->*st)(CRect(CPoint(maxu, maxv), CSize(w, h)), (BYTE*)buff, w * sizeof(T), TEX0, TEXA);
+
+				dst -= r.left;
+
+				int k = (r.right - r.left) >> 2;
+
+				for(int y = r.top; y < r.bottom; y++, dst += dstpitch)
+				{
+					T* src = &buff[(y & minv) * w];
+
+					int x = r.left;
+
+					for(int i = 0; i < k; x += 4, i++)
+					{
+						((T*)dst)[x+0] = src[(x+0) & minu];
+						((T*)dst)[x+1] = src[(x+1) & minu];
+						((T*)dst)[x+2] = src[(x+2) & minu];
+						((T*)dst)[x+3] = src[(x+3) & minu];
+					}
+
+					for(; x < r.right; x++)
+					{
+						((T*)dst)[x] = src[x & minu];
+					}
+				}
+
+				_aligned_free(buff);
+
+				return;
+			}
+		}
+
+		switch(wms)
+		{
+		default: for(int x = r.left; x < r.right; x++) m_xtbl[x] = x; break;
+		case 3: for(int x = r.left; x < r.right; x++) m_xtbl[x] = (x & minu) | maxu; break;
+		}
+
+		switch(wmt)
+		{
+		default: for(int y = r.top; y < r.bottom; y++) m_ytbl[y] = y; break;
+		case 3: for(int y = r.top; y < r.bottom; y++) m_ytbl[y] = (y & minv) | maxv; break;
+		}
+
+//printf("1 wms = %d, wmt = %d, %3x %3x %3x %3x, %d %d - %d %d\n", wms, wmt, minu, maxu, minv, maxv, r.left, r.top, r.right, r.bottom);
+
+		for(int y = r.top; y < r.bottom; y++, dst += dstpitch)
+			for(int x = r.left, i = 0; x < r.right; x++, i++)
+				((T*)dst)[i] = (T)(this->*rt)(m_xtbl[x], m_ytbl[y], TEX0, TEXA);
+	}
+	else
+	{
+		if(aligned)
+		{
+			for(int y = r.top; y < cr.top; y++, dst += dstpitch)
+				for(int x = r.left, i = 0; x < r.right; x++, i++)
+					((T*)dst)[i] = (T)(this->*rt)(x, y, TEX0, TEXA);
+
+			if(!cr.IsRectEmpty())
+			{
+				(this->*st)(cr, dst + (cr.left - r.left)*sizeof(T), dstpitch, TEX0, TEXA);
+			}
+
+			for(int y = cr.top; y < cr.bottom; y++, dst += dstpitch)
+			{
+				for(int x = r.left, i = 0; x < cr.left; x++, i++)
+					((T*)dst)[i] = (T)(this->*rt)(x, y, TEX0, TEXA);
+				for(int x = cr.right, i = x - r.left; x < r.right; x++, i++)
+					((T*)dst)[i] = (T)(this->*rt)(x, y, TEX0, TEXA);
+			}
+
+			for(int y = cr.bottom; y < r.bottom; y++, dst += dstpitch)
+				for(int x = r.left, i = 0; x < r.right; x++, i++)
+					((T*)dst)[i] = (T)(this->*rt)(x, y, TEX0, TEXA);
+		}
+		else
+		{
+//printf("2 wms = %d, wmt = %d, %3x %3x %3x %3x, %d %d - %d %d\n", wms, wmt, minu, maxu, minv, maxv, r.left, r.top, r.right, r.bottom);
+
+			for(int y = r.top; y < r.bottom; y++, dst += dstpitch)
+				for(int x = r.left, i = 0; x < r.right; x++, i++)
+					((T*)dst)[i] = (T)(this->*rt)(x, y, TEX0, TEXA);
+		}
+	}
+}
+
+//
+/*
+HRESULT GSLocalMemory::SaveBMP(ID3D10Device* dev, LPCTSTR fn, DWORD bp, DWORD bw, DWORD psm, int w, int h)
+{
+	D3D10_TEXTURE2D_DESC desc;
+
+	memset(&desc, 0, sizeof(desc));
+
+	desc.Width = w;
+	desc.Height = h;
+	desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
+	desc.MipLevels = 1;
+	desc.ArraySize = 1;
+	desc.SampleDesc.Count = 1;
+	desc.Usage = D3D10_USAGE_STAGING;
+	desc.BindFlags = 0;
+	desc.CPUAccessFlags = D3D10_CPU_ACCESS_READ | D3D10_CPU_ACCESS_WRITE;
+
+	CComPtr<ID3D10Texture2D> texture;
+
+	HRESULT hr = dev->CreateTexture2D(&desc, NULL, &texture);
+
+	D3D10_MAPPED_TEXTURE2D map;
+
+	if(FAILED(hr) || FAILED(texture->Map(0, D3D10_MAP_WRITE, 0, &map)))
+	{
+		return E_FAIL;
+	}
+
+	GIFRegTEX0 TEX0;
+
+	TEX0.TBP0 = bp;
+	TEX0.TBW = bw;
+	TEX0.PSM = psm;
+
+	GIFRegTEXA TEXA;
+
+	TEXA.AEM = 0;
+	TEXA.TA0 = 0;
+	TEXA.TA1 = 0x80;
+
+	// (this->*m_psm[TEX0.PSM].ust)(CRect(0, 0, w, h), (BYTE*)lr.pBits, lr.Pitch, TEX0, TEXA);
+
+	readTexel rt = m_psm[psm].rt;
+
+	BYTE* p = (BYTE*)map.pData;
+
+	for(int j = 0; j < h; j++, p += map.RowPitch)
+		for(int i = 0; i < w; i++)
+			((DWORD*)p)[i] = (this->*rt)(i, j, TEX0, TEXA);
+
+	texture->Unmap(0);
+
+	return D3DX10SaveTextureToFile(texture, D3DX10_IFF_BMP, fn);
+}
+*/
\ No newline at end of file
diff --git a/gsdx/GSLocalMemory.h b/gsdx/GSLocalMemory.h
new file mode 100644
index 0000000..1888c20
--- /dev/null
+++ b/gsdx/GSLocalMemory.h
@@ -0,0 +1,917 @@
+/* 
+ *	Copyright (C) 2007 Gabest
+ *	http://www.gabest.org
+ *
+ *  This Program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *   
+ *  This Program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *   
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
+ *  http://www.gnu.org/copyleft/gpl.html
+ *
+ */
+
+#pragma once
+
+#pragma warning(disable: 4100) // warning C4100: 'TEXA' : unreferenced formal parameter
+#pragma warning(disable: 4244) // warning C4244: '=' : conversion from 'const UINT64' to 'int', possible loss of data (really???)
+
+#include "GS.h"
+#include "GSTables.h"
+
+class GSLocalMemory
+{
+public:
+	typedef DWORD (*pixelAddress)(int x, int y, DWORD bp, DWORD bw);
+	typedef void (GSLocalMemory::*writePixel)(int x, int y, DWORD c, DWORD bp, DWORD bw);
+	typedef void (GSLocalMemory::*writeFrame)(int x, int y, DWORD c, DWORD bp, DWORD bw);
+	typedef DWORD (GSLocalMemory::*readPixel)(int x, int y, DWORD bp, DWORD bw);
+	typedef DWORD (GSLocalMemory::*readTexel)(int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA);
+	typedef void (GSLocalMemory::*writePixelAddr)(DWORD addr, DWORD c);
+	typedef void (GSLocalMemory::*writeFrameAddr)(DWORD addr, DWORD c);
+	typedef DWORD (GSLocalMemory::*readPixelAddr)(DWORD addr);
+	typedef DWORD (GSLocalMemory::*readTexelAddr)(DWORD addr, GIFRegTEXA& TEXA);
+	typedef void (GSLocalMemory::*SwizzleTexture)(int& tx, int& ty, BYTE* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG);
+	typedef void (GSLocalMemory::*unSwizzleTexture)(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA);
+	typedef void (GSLocalMemory::*readTexture)(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA, GIFRegCLAMP& CLAMP);
+
+	typedef union 
+	{
+		struct
+		{
+			pixelAddress pa, ba, pga;
+			readPixel rp;
+			readPixelAddr rpa;
+			writePixel wp;
+			writePixelAddr wpa;
+			readTexel rt, rtP, rtNP;
+			readTexelAddr rta;
+			writeFrameAddr wfa;
+			SwizzleTexture st;
+			unSwizzleTexture ust, ustP, ustNP;
+			DWORD bpp, pal, trbpp; 
+			CSize bs, pgs;
+			int* rowOffset[8];
+		};
+		BYTE dummy[128];
+	} psm_t;
+
+	static psm_t m_psm[64];
+
+protected:
+	static DWORD pageOffset32[32][32][64];
+	static DWORD pageOffset32Z[32][32][64];
+	static DWORD pageOffset16[32][64][64];
+	static DWORD pageOffset16S[32][64][64];
+	static DWORD pageOffset16Z[32][64][64];
+	static DWORD pageOffset16SZ[32][64][64];
+	static DWORD pageOffset8[32][64][128];
+	static DWORD pageOffset4[32][128][128];
+
+	static int rowOffset32[2048];
+	static int rowOffset32Z[2048];
+	static int rowOffset16[2048];
+	static int rowOffset16S[2048];
+	static int rowOffset16Z[2048];
+	static int rowOffset16SZ[2048];
+	static int rowOffset8[2][2048];
+	static int rowOffset4[2][2048];
+
+	union {BYTE* m_vm8; WORD* m_vm16; DWORD* m_vm32;};
+
+	DWORD m_CBP[2];
+	WORD* m_pCLUT;
+	DWORD* m_pCLUT32;
+	UINT64* m_pCLUT64;
+
+	GIFRegTEX0 m_prevTEX0;
+	GIFRegTEXCLUT m_prevTEXCLUT;
+	bool m_fCLUTMayBeDirty;
+
+public:
+	GSLocalMemory();
+	virtual ~GSLocalMemory();
+
+	BYTE* GetVM() 
+	{
+		return m_vm8;
+	}
+
+	__forceinline static void RoundDown(CSize& s, CSize bs)
+	{
+		s.cx &= ~(bs.cx-1);
+		s.cy &= ~(bs.cy-1);
+	}
+
+	__forceinline static void RoundUp(CSize& s, CSize bs)
+	{
+		s.cx = (s.cx + (bs.cx-1)) & ~(bs.cx-1);
+		s.cy = (s.cy + (bs.cy-1)) & ~(bs.cy-1);
+	}
+
+	__forceinline static DWORD Expand24To32(DWORD c, GIFRegTEXA& TEXA)
+	{
+		return (((!TEXA.AEM | (c & 0xffffff)) ? TEXA.TA0 : 0) << 24) | (c & 0xffffff);
+	}
+
+	__forceinline static DWORD Expand16To32(WORD c, GIFRegTEXA& TEXA)
+	{
+		return (((c & 0x8000) ? TEXA.TA1 : (!TEXA.AEM | c) ? TEXA.TA0 : 0) << 24) | ((c & 0x7c00) << 9) | ((c & 0x03e0) << 6) | ((c & 0x001f) << 3);
+	}
+
+	// address
+
+	static DWORD pageAddress32(int x, int y, DWORD bp, DWORD bw)
+	{
+		return ((bp >> 5) + (y >> 5) * bw + (x >> 6)) << 11; 
+	}
+
+	static DWORD pageAddress16(int x, int y, DWORD bp, DWORD bw)
+	{
+		return ((bp >> 5) + (y >> 6) * bw + (x >> 6)) << 12;
+	}
+
+	static DWORD pageAddress8(int x, int y, DWORD bp, DWORD bw)
+	{
+		return ((bp >> 5) + (y >> 6) * ((bw+1)>>1) + (x >> 7)) << 13; 
+	}
+
+	static DWORD pageAddress4(int x, int y, DWORD bp, DWORD bw)
+	{
+		return ((bp >> 5) + (y >> 7) * ((bw+1)>>1) + (x >> 7)) << 14;
+	}
+
+	static DWORD blockAddress32(int x, int y, DWORD bp, DWORD bw)
+	{
+		DWORD page = bp + (y & ~0x1f) * bw + ((x >> 1) & ~0x1f);
+		DWORD block = blockTable32[(y >> 3) & 3][(x >> 3) & 7];
+		return (page + block) << 6;
+	}
+
+	static DWORD blockAddress16(int x, int y, DWORD bp, DWORD bw)
+	{
+		DWORD page = bp + ((y >> 1) & ~0x1f) * bw + ((x >> 1) & ~0x1f); 
+		DWORD block = blockTable16[(y >> 3) & 7][(x >> 4) & 3];
+		return (page + block) << 7;
+	}
+
+	static DWORD blockAddress16S(int x, int y, DWORD bp, DWORD bw)
+	{
+		DWORD page = bp + ((y >> 1) & ~0x1f) * bw + ((x >> 1) & ~0x1f); 
+		DWORD block = blockTable16S[(y >> 3) & 7][(x >> 4) & 3];
+		return (page + block) << 7;
+	}
+
+	static DWORD blockAddress8(int x, int y, DWORD bp, DWORD bw)
+	{
+		DWORD page = bp + ((y >> 1) & ~0x1f) * ((bw+1)>>1) + ((x >> 2) & ~0x1f); 
+		DWORD block = blockTable8[(y >> 4) & 3][(x >> 4) & 7];
+		return (page + block) << 8;
+	}
+
+	static DWORD blockAddress4(int x, int y, DWORD bp, DWORD bw)
+	{
+		DWORD page = bp + ((y >> 2) & ~0x1f) * ((bw+1)>>1) + ((x >> 2) & ~0x1f); 
+		DWORD block = blockTable4[(y >> 4) & 7][(x >> 5) & 3];
+		return (page + block) << 9;
+	}
+
+	static DWORD blockAddress32Z(int x, int y, DWORD bp, DWORD bw)
+	{
+		DWORD page = bp + (y & ~0x1f) * bw + ((x >> 1) & ~0x1f); 
+		DWORD block = blockTable32Z[(y >> 3) & 3][(x >> 3) & 7];
+		return (page + block) << 6;
+	}
+
+	static DWORD blockAddress16Z(int x, int y, DWORD bp, DWORD bw)
+	{
+		DWORD page = bp + ((y >> 1) & ~0x1f) * bw + ((x >> 1) & ~0x1f); 
+		DWORD block = blockTable16Z[(y >> 3) & 7][(x >> 4) & 3];
+		return (page + block) << 7;
+	}
+
+	static DWORD blockAddress16SZ(int x, int y, DWORD bp, DWORD bw)
+	{
+		DWORD page = bp + ((y >> 1) & ~0x1f) * bw + ((x >> 1) & ~0x1f); 
+		DWORD block = blockTable16SZ[(y >> 3) & 7][(x >> 4) & 3];
+		return (page + block) << 7;
+	}
+
+	static DWORD pixelAddressOrg32(int x, int y, DWORD bp, DWORD bw)
+	{
+		DWORD page = bp + (y & ~0x1f) * bw + ((x >> 1) & ~0x1f);
+		DWORD block = blockTable32[(y >> 3) & 3][(x >> 3) & 7];
+		DWORD word = ((page + block) << 6) + columnTable32[y & 7][x & 7];
+		ASSERT(word < 1024*1024);
+		return word;
+	}
+
+	static DWORD pixelAddressOrg16(int x, int y, DWORD bp, DWORD bw)
+	{
+		DWORD page = bp + ((y >> 1) & ~0x1f) * bw + ((x >> 1) & ~0x1f); 
+		DWORD block = blockTable16[(y >> 3) & 7][(x >> 4) & 3];
+		DWORD word = ((page + block) << 7) + columnTable16[y & 7][x & 15];
+		ASSERT(word < 1024*1024*2);
+		return word;
+	}
+
+	static DWORD pixelAddressOrg16S(int x, int y, DWORD bp, DWORD bw)
+	{
+		DWORD page = bp + ((y >> 1) & ~0x1f) * bw + ((x >> 1) & ~0x1f); 
+		DWORD block = blockTable16S[(y >> 3) & 7][(x >> 4) & 3];
+		DWORD word = ((page + block) << 7) + columnTable16[y & 7][x & 15];
+		ASSERT(word < 1024*1024*2);
+		return word;
+	}
+
+	static DWORD pixelAddressOrg8(int x, int y, DWORD bp, DWORD bw)
+	{
+		DWORD page = bp + ((y >> 1) & ~0x1f) * ((bw + 1)>>1) + ((x >> 2) & ~0x1f); 
+		DWORD block = blockTable8[(y >> 4) & 3][(x >> 4) & 7];
+		DWORD word = ((page + block) << 8) + columnTable8[y & 15][x & 15];
+	//	ASSERT(word < 1024*1024*4);
+		return word;
+	}
+
+	static DWORD pixelAddressOrg4(int x, int y, DWORD bp, DWORD bw)
+	{
+		DWORD page = bp + ((y >> 2) & ~0x1f) * ((bw + 1)>>1) + ((x >> 2) & ~0x1f); 
+		DWORD block = blockTable4[(y >> 4) & 7][(x >> 5) & 3];
+		DWORD word = ((page + block) << 9) + columnTable4[y & 15][x & 31];
+		ASSERT(word < 1024*1024*8);
+		return word;
+	}
+
+	static DWORD pixelAddressOrg32Z(int x, int y, DWORD bp, DWORD bw)
+	{
+		DWORD page = bp + (y & ~0x1f) * bw + ((x >> 1) & ~0x1f); 
+		DWORD block = blockTable32Z[(y >> 3) & 3][(x >> 3) & 7];
+		DWORD word = ((page + block) << 6) + ((y & 7) << 3) + (x & 7);
+		ASSERT(word < 1024*1024);
+		return word;
+	}
+
+	static DWORD pixelAddressOrg16Z(int x, int y, DWORD bp, DWORD bw)
+	{
+		DWORD page = bp + ((y >> 1) & ~0x1f) * bw + ((x >> 1) & ~0x1f); 
+		DWORD block = blockTable16Z[(y >> 3) & 7][(x >> 4) & 3];
+		DWORD word = ((page + block) << 7) + ((y & 7) << 4) + (x & 15);
+		ASSERT(word < 1024*1024*2);
+		return word;
+	}
+
+	static DWORD pixelAddressOrg16SZ(int x, int y, DWORD bp, DWORD bw)
+	{
+		DWORD page = bp + ((y >> 1) & ~0x1f) * bw + ((x >> 1) & ~0x1f); 
+		DWORD block = blockTable16SZ[(y >> 3) & 7][(x >> 4) & 3];
+		DWORD word = ((page + block) << 7) + ((y & 7) << 4) + (x & 15);
+		ASSERT(word < 1024*1024*2);
+		return word;
+	}
+
+	static DWORD pixelAddress32(int x, int y, DWORD bp, DWORD bw)
+	{
+		DWORD page = (bp >> 5) + (y >> 5) * bw + (x >> 6); 
+		DWORD word = (page << 11) + pageOffset32[bp & 0x1f][y & 0x1f][x & 0x3f];
+		ASSERT(word < 1024*1024);
+		return word;
+	}
+
+	static DWORD pixelAddress16(int x, int y, DWORD bp, DWORD bw)
+	{
+		DWORD page = (bp >> 5) + (y >> 6) * bw + (x >> 6); 
+		DWORD word = (page << 12) + pageOffset16[bp & 0x1f][y & 0x3f][x & 0x3f];
+		ASSERT(word < 1024*1024*2);
+		return word;
+	}
+
+	static DWORD pixelAddress16S(int x, int y, DWORD bp, DWORD bw)
+	{
+		DWORD page = (bp >> 5) + (y >> 6) * bw + (x >> 6); 
+		DWORD word = (page << 12) + pageOffset16S[bp & 0x1f][y & 0x3f][x & 0x3f];
+		ASSERT(word < 1024*1024*2);
+		return word;
+	}
+
+	static DWORD pixelAddress8(int x, int y, DWORD bp, DWORD bw)
+	{
+		DWORD page = (bp >> 5) + (y >> 6) * ((bw + 1)>>1) + (x >> 7); 
+		DWORD word = (page << 13) + pageOffset8[bp & 0x1f][y & 0x3f][x & 0x7f];
+		ASSERT(word < 1024*1024*4);
+		return word;
+	}
+
+	static DWORD pixelAddress4(int x, int y, DWORD bp, DWORD bw)
+	{
+		DWORD page = (bp >> 5) + (y >> 7) * ((bw + 1)>>1) + (x >> 7);
+		DWORD word = (page << 14) + pageOffset4[bp & 0x1f][y & 0x7f][x & 0x7f];
+		ASSERT(word < 1024*1024*8);
+		return word;
+	}
+
+	static DWORD pixelAddress32Z(int x, int y, DWORD bp, DWORD bw)
+	{
+		DWORD page = (bp >> 5) + (y >> 5) * bw + (x >> 6); 
+		DWORD word = (page << 11) + pageOffset32Z[bp & 0x1f][y & 0x1f][x & 0x3f];
+		ASSERT(word < 1024*1024);
+		return word;
+	}
+
+	static DWORD pixelAddress16Z(int x, int y, DWORD bp, DWORD bw)
+	{
+		DWORD page = (bp >> 5) + (y >> 6) * bw + (x >> 6); 
+		DWORD word = (page << 12) + pageOffset16Z[bp & 0x1f][y & 0x3f][x & 0x3f];
+		ASSERT(word < 1024*1024*2);
+		return word;
+	}
+
+	static DWORD pixelAddress16SZ(int x, int y, DWORD bp, DWORD bw)
+	{
+		DWORD page = (bp >> 5) + (y >> 6) * bw + (x >> 6); 
+		DWORD word = (page << 12) + pageOffset16SZ[bp & 0x1f][y & 0x3f][x & 0x3f];
+		ASSERT(word < 1024*1024*2);
+		return word;
+	}
+
+	// pixel R/W
+
+	__forceinline DWORD readPixel32(DWORD addr) 
+	{
+		return m_vm32[addr];
+	}
+
+	__forceinline DWORD readPixel24(DWORD addr) 
+	{
+		return m_vm32[addr] & 0x00ffffff;
+	}
+
+	__forceinline DWORD readPixel16(DWORD addr) 
+	{
+		return (DWORD)m_vm16[addr];
+	}
+
+	__forceinline DWORD readPixel16S(DWORD addr) 
+	{
+		return (DWORD)m_vm16[addr];
+	}
+
+	__forceinline DWORD readPixel8(DWORD addr) 
+	{
+		return (DWORD)m_vm8[addr];
+	}
+
+	__forceinline DWORD readPixel4(DWORD addr) 
+	{
+		return (m_vm8[addr>>1] >> ((addr&1) << 2)) & 0x0f;
+	}
+
+	__forceinline DWORD readPixel8H(DWORD addr) 
+	{
+		return m_vm32[addr] >> 24;
+	}
+
+	__forceinline DWORD readPixel4HL(DWORD addr) 
+	{
+		return (m_vm32[addr] >> 24) & 0x0f;
+	}
+
+	__forceinline DWORD readPixel4HH(DWORD addr) 
+	{
+		return (m_vm32[addr] >> 28) & 0x0f;
+	}
+
+	__forceinline DWORD readPixel32Z(DWORD addr) 
+	{
+		return m_vm32[addr];
+	}
+
+	__forceinline DWORD readPixel24Z(DWORD addr) 
+	{
+		return m_vm32[addr] & 0x00ffffff;
+	}
+
+	__forceinline DWORD readPixel16Z(DWORD addr) 
+	{
+		return (DWORD)m_vm16[addr];
+	}
+
+	__forceinline DWORD readPixel16SZ(DWORD addr) 
+	{
+		return (DWORD)m_vm16[addr];
+	}
+
+	__forceinline DWORD readPixel32(int x, int y, DWORD bp, DWORD bw)
+	{
+		return readPixel32(pixelAddress32(x, y, bp, bw));
+	}
+
+	__forceinline DWORD readPixel24(int x, int y, DWORD bp, DWORD bw)
+	{
+		return readPixel24(pixelAddress32(x, y, bp, bw));
+	}
+
+	__forceinline DWORD readPixel16(int x, int y, DWORD bp, DWORD bw)
+	{
+		return readPixel16(pixelAddress16(x, y, bp, bw));
+	}
+
+	__forceinline DWORD readPixel16S(int x, int y, DWORD bp, DWORD bw)
+	{
+		return readPixel16S(pixelAddress16S(x, y, bp, bw));
+	}
+
+	__forceinline DWORD readPixel8(int x, int y, DWORD bp, DWORD bw)
+	{
+		return readPixel8(pixelAddress8(x, y, bp, bw));
+	}
+
+	__forceinline DWORD readPixel4(int x, int y, DWORD bp, DWORD bw)
+	{
+		return readPixel4(pixelAddress4(x, y, bp, bw));
+	}
+
+	__forceinline DWORD readPixel8H(int x, int y, DWORD bp, DWORD bw)
+	{
+		return readPixel8H(pixelAddress32(x, y, bp, bw));
+	}
+
+	__forceinline DWORD readPixel4HL(int x, int y, DWORD bp, DWORD bw)
+	{
+		return readPixel4HL(pixelAddress32(x, y, bp, bw));
+	}
+
+	__forceinline DWORD readPixel4HH(int x, int y, DWORD bp, DWORD bw)
+	{
+		return readPixel4HH(pixelAddress32(x, y, bp, bw));
+	}
+
+	__forceinline DWORD readPixel32Z(int x, int y, DWORD bp, DWORD bw)
+	{
+		return readPixel32Z(pixelAddress32Z(x, y, bp, bw));
+	}
+
+	__forceinline DWORD readPixel24Z(int x, int y, DWORD bp, DWORD bw)
+	{
+		return readPixel24Z(pixelAddress32Z(x, y, bp, bw));
+	}
+
+	__forceinline DWORD readPixel16Z(int x, int y, DWORD bp, DWORD bw)
+	{
+		return readPixel16Z(pixelAddress16Z(x, y, bp, bw));
+	}
+
+	__forceinline DWORD readPixel16SZ(int x, int y, DWORD bp, DWORD bw)
+	{
+		return readPixel16SZ(pixelAddress16SZ(x, y, bp, bw));
+	}
+
+	__forceinline void writePixel32(DWORD addr, DWORD c) 
+	{
+		m_vm32[addr] = c;
+	}
+
+	__forceinline void writePixel24(DWORD addr, DWORD c) 
+	{
+		m_vm32[addr] = (m_vm32[addr] & 0xff000000) | (c & 0x00ffffff);
+	}
+
+	__forceinline void writePixel16(DWORD addr, DWORD c) 
+	{
+		m_vm16[addr] = (WORD)c;
+	}
+
+	__forceinline void writePixel16S(DWORD addr, DWORD c)
+	{
+		m_vm16[addr] = (WORD)c;
+	}
+
+	__forceinline void writePixel8(DWORD addr, DWORD c)
+	{
+		m_vm8[addr] = (BYTE)c;
+	}
+
+	__forceinline void writePixel4(DWORD addr, DWORD c) 
+	{
+		int shift = (addr&1) << 2; addr >>= 1; 
+		m_vm8[addr] = (BYTE)((m_vm8[addr] & (0xf0 >> shift)) | ((c & 0x0f) << shift));
+	}
+
+	__forceinline void writePixel8H(DWORD addr, DWORD c)
+	{
+		m_vm32[addr] = (m_vm32[addr] & 0x00ffffff) | (c << 24);
+	}
+
+	__forceinline void writePixel4HL(DWORD addr, DWORD c) 
+	{
+		m_vm32[addr] = (m_vm32[addr] & 0xf0ffffff) | ((c & 0x0f) << 24);
+	}
+
+	__forceinline void writePixel4HH(DWORD addr, DWORD c)
+	{
+		m_vm32[addr] = (m_vm32[addr] & 0x0fffffff) | ((c & 0x0f) << 28);
+	}
+
+	__forceinline void writePixel32Z(DWORD addr, DWORD c)
+	{
+		m_vm32[addr] = c;
+	}
+
+	__forceinline void writePixel24Z(DWORD addr, DWORD c)
+	{
+		m_vm32[addr] = (m_vm32[addr] & 0xff000000) | (c & 0x00ffffff);
+	}
+
+	__forceinline void writePixel16Z(DWORD addr, DWORD c)
+	{
+		m_vm16[addr] = (WORD)c;
+	}
+
+	__forceinline void writePixel16SZ(DWORD addr, DWORD c)
+	{
+		m_vm16[addr] = (WORD)c;
+	}
+
+	__forceinline void writeFrame16(DWORD addr, DWORD c) 
+	{
+		writePixel16(addr, ((c>>16)&0x8000) | ((c>>9)&0x7c00) | ((c>>6)&0x03e0) | ((c>>3)&0x001f));
+	}
+
+	__forceinline void writeFrame16S(DWORD addr, DWORD c) 
+	{
+		writePixel16S(addr, ((c>>16)&0x8000) | ((c>>9)&0x7c00) | ((c>>6)&0x03e0) | ((c>>3)&0x001f));
+	}
+
+	__forceinline void writePixel32(int x, int y, DWORD c, DWORD bp, DWORD bw)
+	{
+		writePixel32(pixelAddress32(x, y, bp, bw), c);
+	}
+
+	__forceinline void writePixel24(int x, int y, DWORD c, DWORD bp, DWORD bw)
+	{
+		writePixel24(pixelAddress32(x, y, bp, bw), c);
+	}
+
+	__forceinline void writePixel16(int x, int y, DWORD c, DWORD bp, DWORD bw)
+	{
+		writePixel16(pixelAddress16(x, y, bp, bw), c);
+	}
+
+	__forceinline void writePixel16S(int x, int y, DWORD c, DWORD bp, DWORD bw)
+	{
+		writePixel16S(pixelAddress16S(x, y, bp, bw), c);
+	}
+
+	__forceinline void writePixel8(int x, int y, DWORD c, DWORD bp, DWORD bw)
+	{
+		writePixel8(pixelAddress8(x, y, bp, bw), c);
+	}
+
+	__forceinline void writePixel4(int x, int y, DWORD c, DWORD bp, DWORD bw)
+	{
+		writePixel4(pixelAddress4(x, y, bp, bw), c);
+	}
+
+	__forceinline void writePixel8H(int x, int y, DWORD c, DWORD bp, DWORD bw)
+	{
+		writePixel8H(pixelAddress32(x, y, bp, bw), c);
+	}
+
+    __forceinline void writePixel4HL(int x, int y, DWORD c, DWORD bp, DWORD bw)
+	{
+		writePixel4HL(pixelAddress32(x, y, bp, bw), c);
+	}
+
+	__forceinline void writePixel4HH(int x, int y, DWORD c, DWORD bp, DWORD bw)
+	{
+		writePixel4HH(pixelAddress32(x, y, bp, bw), c);
+	}
+
+	__forceinline void writePixel32Z(int x, int y, DWORD c, DWORD bp, DWORD bw)
+	{
+		writePixel32Z(pixelAddress32Z(x, y, bp, bw), c);
+	}
+
+	__forceinline void writePixel24Z(int x, int y, DWORD c, DWORD bp, DWORD bw)
+	{
+		writePixel24Z(pixelAddress32Z(x, y, bp, bw), c);
+	}
+
+	__forceinline void writePixel16Z(int x, int y, DWORD c, DWORD bp, DWORD bw)
+	{
+		writePixel16Z(pixelAddress16Z(x, y, bp, bw), c);
+	}
+
+	__forceinline void writePixel16SZ(int x, int y, DWORD c, DWORD bp, DWORD bw)
+	{
+		writePixel16SZ(pixelAddress16SZ(x, y, bp, bw), c);
+	}
+
+	__forceinline void writeFrame16(int x, int y, DWORD c, DWORD bp, DWORD bw)
+	{
+		writeFrame16(pixelAddress16(x, y, bp, bw), c);
+	}
+
+	__forceinline void writeFrame16S(int x, int y, DWORD c, DWORD bp, DWORD bw)
+	{
+		writeFrame16S(pixelAddress16S(x, y, bp, bw), c);
+	}
+
+	__forceinline DWORD readTexel32(DWORD addr, GIFRegTEXA& TEXA) 
+	{
+		return m_vm32[addr];
+	}
+
+	__forceinline DWORD readTexel24(DWORD addr, GIFRegTEXA& TEXA) 
+	{
+		return Expand24To32(m_vm32[addr], TEXA);
+	}
+
+	__forceinline DWORD readTexel16(DWORD addr, GIFRegTEXA& TEXA) 
+	{
+		return Expand16To32(m_vm16[addr], TEXA);
+	}
+
+	__forceinline DWORD readTexel16S(DWORD addr, GIFRegTEXA& TEXA) 
+	{
+		return Expand16To32(m_vm16[addr], TEXA);
+	}
+
+	__forceinline DWORD readTexel8(DWORD addr, GIFRegTEXA& TEXA) 
+	{
+		return m_pCLUT32[readPixel8(addr)];
+	}
+
+	__forceinline DWORD readTexel4(DWORD addr, GIFRegTEXA& TEXA) 
+	{
+		return m_pCLUT32[readPixel4(addr)];
+	}
+
+	__forceinline DWORD readTexel8H(DWORD addr, GIFRegTEXA& TEXA) 
+	{
+		return m_pCLUT32[readPixel8H(addr)];
+	}
+
+	__forceinline DWORD readTexel4HL(DWORD addr, GIFRegTEXA& TEXA)
+	{
+		return m_pCLUT32[readPixel4HL(addr)];
+	}
+
+	__forceinline DWORD readTexel4HH(DWORD addr, GIFRegTEXA& TEXA) 
+	{
+		return m_pCLUT32[readPixel4HH(addr)];
+	}
+
+	__forceinline DWORD readTexel32(int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA)
+	{
+		return readTexel32(pixelAddress32(x, y, TEX0.TBP0, TEX0.TBW), TEXA);
+	}
+
+	__forceinline DWORD readTexel24(int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA)
+	{
+		return readTexel24(pixelAddress32(x, y, TEX0.TBP0, TEX0.TBW), TEXA);
+	}
+
+	__forceinline DWORD readTexel16(int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA)
+	{
+		return readTexel16(pixelAddress16(x, y, TEX0.TBP0, TEX0.TBW), TEXA);
+	}
+
+	__forceinline DWORD readTexel16S(int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA)
+	{
+		return readTexel16S(pixelAddress16S(x, y, TEX0.TBP0, TEX0.TBW), TEXA);
+	}
+
+	__forceinline DWORD readTexel8(int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA)
+	{
+		return readTexel8(pixelAddress8(x, y, TEX0.TBP0, TEX0.TBW), TEXA);
+	}
+
+	__forceinline DWORD readTexel4(int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA)
+	{
+		return readTexel4(pixelAddress4(x, y, TEX0.TBP0, TEX0.TBW), TEXA);
+	}
+
+	__forceinline DWORD readTexel8H(int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA)
+	{
+		return readTexel8H(pixelAddress32(x, y, TEX0.TBP0, TEX0.TBW), TEXA);
+	}
+
+	__forceinline DWORD readTexel4HL(int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA)
+	{
+		return readTexel4HL(pixelAddress32(x, y, TEX0.TBP0, TEX0.TBW), TEXA);
+	}
+
+	__forceinline DWORD readTexel4HH(int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA)
+	{
+		return readTexel4HH(pixelAddress32(x, y, TEX0.TBP0, TEX0.TBW), TEXA);
+	}
+
+	__forceinline DWORD readTexel16P(int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA)
+	{
+		return readPixel16(x, y, TEX0.TBP0, TEX0.TBW);
+	}
+
+	__forceinline DWORD readTexel16SP(int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA)
+	{
+		return readPixel16S(x, y, TEX0.TBP0, TEX0.TBW);
+	}
+
+	__forceinline DWORD readTexel8P(int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA)
+	{
+		return readPixel8(x, y, TEX0.TBP0, TEX0.TBW);
+	}
+
+	__forceinline DWORD readTexel8HP(int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA)
+	{
+		return readPixel8H(x, y, TEX0.TBP0, TEX0.TBW);
+	}
+
+	__forceinline DWORD readTexel4P(int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA)
+	{
+		return readPixel4(x, y, TEX0.TBP0, TEX0.TBW);
+	}
+
+	__forceinline DWORD readTexel4HLP(int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA)
+	{
+		return readPixel4HL(x, y, TEX0.TBP0, TEX0.TBW);
+	}
+
+	__forceinline DWORD readTexel4HHP(int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA)
+	{
+		return readPixel4HH(x, y, TEX0.TBP0, TEX0.TBW);
+	}
+
+	//
+
+	__forceinline DWORD readPixelX(int PSM, DWORD addr)
+	{
+		switch(PSM)
+		{
+		case PSM_PSMCT32: return readPixel32(addr); 
+		case PSM_PSMCT24: return readPixel24(addr); 
+		case PSM_PSMCT16: return readPixel16(addr);
+		case PSM_PSMCT16S: return readPixel16S(addr);
+		case PSM_PSMT8: return readPixel8(addr);
+		case PSM_PSMT4: return readPixel4(addr);
+		case PSM_PSMT8H: return readPixel8H(addr);
+		case PSM_PSMT4HL: return readPixel4HL(addr);
+		case PSM_PSMT4HH: return readPixel4HH(addr);
+		case PSM_PSMZ32: return readPixel32Z(addr);
+		case PSM_PSMZ24: return readPixel24Z(addr);
+		case PSM_PSMZ16: return readPixel16Z(addr);
+		case PSM_PSMZ16S: return readPixel16SZ(addr);
+		default: ASSERT(0); return readPixel32(addr);
+		}
+	}
+
+	__forceinline DWORD readTexelX(int PSM, DWORD addr, GIFRegTEXA& TEXA)
+	{
+		switch(PSM)
+		{
+		case PSM_PSMCT32: return readTexel32(addr, TEXA);
+		case PSM_PSMCT24: return readTexel24(addr, TEXA);
+		case PSM_PSMCT16: return readTexel16(addr, TEXA);
+		case PSM_PSMCT16S: return readTexel16S(addr, TEXA);
+		case PSM_PSMT8: return readTexel8(addr, TEXA);
+		case PSM_PSMT4: return readTexel4(addr, TEXA);
+		case PSM_PSMT8H: return readTexel8H(addr, TEXA);
+		case PSM_PSMT4HL: return readTexel4HL(addr, TEXA);
+		case PSM_PSMT4HH: return readTexel4HH(addr, TEXA);
+		default: ASSERT(0); return readTexel32(addr, TEXA);
+		}
+	}
+
+	__forceinline DWORD readTexelX(int PSM, int x, int y, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA)
+	{
+		switch(PSM)
+		{
+		case PSM_PSMCT32: return readTexel32(x, y, TEX0, TEXA);
+		case PSM_PSMCT24: return readTexel24(x, y, TEX0, TEXA);
+		case PSM_PSMCT16: return readTexel16(x, y, TEX0, TEXA);
+		case PSM_PSMCT16S: return readTexel16S(x, y, TEX0, TEXA);
+		case PSM_PSMT8: return readTexel8(x, y, TEX0, TEXA);
+		case PSM_PSMT4: return readTexel4(x, y, TEX0, TEXA);
+		case PSM_PSMT8H: return readTexel8H(x, y, TEX0, TEXA);
+		case PSM_PSMT4HL: return readTexel4HL(x, y, TEX0, TEXA);
+		case PSM_PSMT4HH: return readTexel4HH(x, y, TEX0, TEXA);
+		default: ASSERT(0); return readTexel32(x, y, TEX0, TEXA);
+		}
+	}
+
+	__forceinline void writePixelX(int PSM, DWORD addr, DWORD c)
+	{
+		switch(PSM)
+		{
+		case PSM_PSMCT32: writePixel32(addr, c); break; 
+		case PSM_PSMCT24: writePixel24(addr, c); break; 
+		case PSM_PSMCT16: writePixel16(addr, c); break;
+		case PSM_PSMCT16S: writePixel16S(addr, c); break;
+		case PSM_PSMT8: writePixel8(addr, c); break;
+		case PSM_PSMT4: writePixel4(addr, c); break;
+		case PSM_PSMT8H: writePixel8H(addr, c); break;
+		case PSM_PSMT4HL: writePixel4HL(addr, c); break;
+		case PSM_PSMT4HH: writePixel4HH(addr, c); break;
+		case PSM_PSMZ32: writePixel32Z(addr, c); break;
+		case PSM_PSMZ24: writePixel24Z(addr, c); break;
+		case PSM_PSMZ16: writePixel16Z(addr, c); break;
+		case PSM_PSMZ16S: writePixel16SZ(addr, c); break;
+		default: ASSERT(0); writePixel32(addr, c); break;
+		}
+	}
+
+	__forceinline void writeFrameX(int PSM, DWORD addr, DWORD c)
+	{
+		switch(PSM)
+		{
+		case PSM_PSMCT32: writePixel32(addr, c); break; 
+		case PSM_PSMCT24: writePixel24(addr, c); break; 
+		case PSM_PSMCT16: writeFrame16(addr, c); break;
+		case PSM_PSMCT16S: writeFrame16S(addr, c); break;
+		default: ASSERT(0); writePixel32(addr, c); break;
+		}
+	}
+
+	// FillRect
+
+	bool FillRect(const CRect& r, DWORD c, DWORD psm, DWORD fbp, DWORD fbw);
+
+	// CLUT
+
+	void InvalidateCLUT() {m_fCLUTMayBeDirty = true;}
+	bool IsCLUTDirty(GIFRegTEX0 TEX0, GIFRegTEXCLUT TEXCLUT);
+	bool WriteCLUT(GIFRegTEX0 TEX0, GIFRegTEXCLUT TEXCLUT);
+
+	void ReadCLUT(GIFRegTEX0 TEX0, DWORD* pCLUT32);
+	void SetupCLUT(GIFRegTEX0 TEX0);
+
+	// expands 16->32
+
+	void ReadCLUT32(GIFRegTEX0 TEX0, GIFRegTEXA TEXA, DWORD* pCLUT32);
+	void SetupCLUT32(GIFRegTEX0 TEX0, GIFRegTEXA TEXA);
+	void CopyCLUT32(DWORD* pCLUT32, int nPaletteEntries);
+
+	// 
+
+	void SwizzleTexture32(int& tx, int& ty, BYTE* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG);
+	void SwizzleTexture24(int& tx, int& ty, BYTE* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG);
+	void SwizzleTexture16(int& tx, int& ty, BYTE* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG);
+	void SwizzleTexture16S(int& tx, int& ty, BYTE* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG);
+	void SwizzleTexture8(int& tx, int& ty, BYTE* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG);
+	void SwizzleTexture8H(int& tx, int& ty, BYTE* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG);
+	void SwizzleTexture4(int& tx, int& ty, BYTE* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG);
+	void SwizzleTexture4HL(int& tx, int& ty, BYTE* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG);
+	void SwizzleTexture4HH(int& tx, int& ty, BYTE* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG);
+	void SwizzleTextureX(int& tx, int& ty, BYTE* src, int len, GIFRegBITBLTBUF& BITBLTBUF, GIFRegTRXPOS& TRXPOS, GIFRegTRXREG& TRXREG);
+
+	void unSwizzleTexture32(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA);
+	void unSwizzleTexture24(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA);
+	void unSwizzleTexture16(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA);
+	void unSwizzleTexture16S(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA);
+	void unSwizzleTexture8(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA);
+	void unSwizzleTexture8H(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA);
+	void unSwizzleTexture4(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA);
+	void unSwizzleTexture4HL(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA);
+	void unSwizzleTexture4HH(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA);
+
+	void ReadTexture(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA, GIFRegCLAMP& CLAMP);
+
+	// 32/16/8P
+
+	void unSwizzleTexture16P(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA);
+	void unSwizzleTexture16SP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA);
+	void unSwizzleTexture8P(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA);
+	void unSwizzleTexture8HP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA);
+	void unSwizzleTexture4P(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA);
+	void unSwizzleTexture4HLP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA);
+	void unSwizzleTexture4HHP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA);
+
+	void ReadTextureP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA, GIFRegCLAMP& CLAMP);
+
+	// 32/16
+
+	void unSwizzleTexture8NP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA);
+	void unSwizzleTexture8HNP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA);
+	void unSwizzleTexture4NP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA);
+	void unSwizzleTexture4HLNP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA);
+	void unSwizzleTexture4HHNP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA);
+
+	void ReadTextureNP(const CRect& r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA, GIFRegCLAMP& CLAMP);
+
+	//
+
+	static DWORD m_xtbl[1024], m_ytbl[1024]; 
+
+	template<typename T> void ReadTexture(CRect r, BYTE* dst, int dstpitch, GIFRegTEX0& TEX0, GIFRegTEXA& TEXA, GIFRegCLAMP& CLAMP, readTexel rt, unSwizzleTexture st);
+
+	// HRESULT SaveBMP(ID3D10Device* dev, LPCTSTR fn, DWORD bp, DWORD bw, DWORD psm, int w, int h);
+};
+
+#pragma warning(default: 4244)
\ No newline at end of file
diff --git a/gsdx/GSPerfMon.cpp b/gsdx/GSPerfMon.cpp
new file mode 100644
index 0000000..9e1c549
--- /dev/null
+++ b/gsdx/GSPerfMon.cpp
@@ -0,0 +1,103 @@
+/* 
+ *	Copyright (C) 2007 Gabest
+ *	http://www.gabest.org
+ *
+ *  This Program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *   
+ *  This Program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *   
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
+ *  http://www.gnu.org/copyleft/gpl.html
+ *
+ */
+
+#include "stdafx.h"
+#include "GSPerfMon.h"
+
+extern "C" unsigned __int64 __rdtsc();
+
+GSPerfMon::GSPerfMon()
+	: m_total(0)
+	, m_begin(0)
+	, m_frame(0)
+	, m_lastframe(0)
+	, m_count(0)
+{
+	memset(m_counters, 0, sizeof(m_counters));
+	memset(m_stats, 0, sizeof(m_stats));
+	memset(m_warnings, 0, sizeof(m_warnings));
+}
+
+void GSPerfMon::Put(counter_t c, double val)
+{
+	if(c == Frame)
+	{
+		clock_t now = clock();
+		
+		if(m_lastframe != 0)
+		{
+			m_counters[c] += now - m_lastframe;
+		}
+
+		m_lastframe = now;
+		m_frame++;
+		m_count++;
+	}
+	else
+	{
+		m_counters[c] += val;
+	}
+}
+
+void GSPerfMon::Update()
+{
+	if(m_count > 0)
+	{
+		for(int i = 0; i < countof(m_counters); i++)
+		{
+			m_stats[i] = m_counters[i] / m_count;
+		}
+
+		m_count = 0;
+	}
+
+	memset(m_counters, 0, sizeof(m_counters));
+}
+
+void GSPerfMon::Start()
+{
+	m_start = __rdtsc();
+
+	if(m_begin == 0)
+	{
+		m_begin = m_start;
+	}
+}
+
+void GSPerfMon::Stop()
+{
+	if(m_start > 0)
+	{
+		m_total += __rdtsc() - m_start;
+		m_start = 0;
+	}
+}
+
+int GSPerfMon::CPU()
+{
+	int percent = (int)(100 * m_total / (__rdtsc() - m_begin));
+
+	m_begin = 0;
+	m_start = 0;
+	m_total = 0;
+
+	return percent;
+}
\ No newline at end of file
diff --git a/gsdx/GSPerfMon.h b/gsdx/GSPerfMon.h
new file mode 100644
index 0000000..07cc15c
--- /dev/null
+++ b/gsdx/GSPerfMon.h
@@ -0,0 +1,65 @@
+/* 
+ *	Copyright (C) 2007 Gabest
+ *	http://www.gabest.org
+ *
+ *  This Program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *   
+ *  This Program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *   
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
+ *  http://www.gnu.org/copyleft/gpl.html
+ *
+ */
+
+#pragma once
+
+#include "x86.h"
+
+class GSPerfMon
+{
+public:
+	enum counter_t {Frame, Prim, Draw, Swizzle, Unswizzle, Unswizzle2, Texture, ConvertRT2T, ReadRT, WriteRT, WriteTexture, CounterLast};
+	enum warning_t {DATE, PABE, ABE, COLCLAMP, DepthTexture, WarningLast};
+
+protected:
+	double m_counters[CounterLast];
+	double m_stats[CounterLast];
+	bool m_warnings[WarningLast];
+	UINT64 m_begin, m_total, m_start, m_frame;
+	clock_t m_lastframe;
+	int m_count;
+
+	void Start();
+	void Stop();
+
+	friend class GSPerfMonAutoTimer;
+
+public:
+	GSPerfMon();
+
+	void SetFrame(UINT64 frame) {m_frame = frame;}
+	UINT64 GetFrame() {return m_frame;}
+	void Put(counter_t c, double val = 0);
+	double Get(counter_t c) {return m_stats[c];}
+	void Put(warning_t c) {m_warnings[c] = true;}
+	bool Get(warning_t c) {bool b = m_warnings[c]; m_warnings[c] = false; return b;}
+	void Update();
+	int CPU();
+};
+
+class GSPerfMonAutoTimer
+{
+	GSPerfMon* m_pm;
+
+public:
+	GSPerfMonAutoTimer(GSPerfMon& pm) {(m_pm = &pm)->Start();}
+	~GSPerfMonAutoTimer() {m_pm->Stop();}
+};
diff --git a/gsdx/GSScale.h b/gsdx/GSScale.h
new file mode 100644
index 0000000..efe849b
--- /dev/null
+++ b/gsdx/GSScale.h
@@ -0,0 +1,33 @@
+/* 
+ *	Copyright (C) 2007 Gabest
+ *	http://www.gabest.org
+ *
+ *  This Program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *   
+ *  This Program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *   
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
+ *  http://www.gnu.org/copyleft/gpl.html
+ *
+ */
+
+#pragma once
+
+#include <math.h>
+
+struct GSScale
+{
+	float x, y;
+	struct GSScale() {x = y = 1;}
+	struct GSScale(float x, float y) {this->x = x; this->y = y;}
+	bool operator == (const struct GSScale& s) {return fabs(x - s.x) < 0.001 && fabs(y - s.y) < 0.001;}
+};
+
diff --git a/gsdx/GSState.cpp b/gsdx/GSState.cpp
new file mode 100644
index 0000000..88bdc08
--- /dev/null
+++ b/gsdx/GSState.cpp
@@ -0,0 +1,1314 @@
+/* 
+ *	Copyright (C) 2007 Gabest
+ *	http://www.gabest.org
+ *
+ *  This Program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *   
+ *  This Program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *   
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
+ *  http://www.gnu.org/copyleft/gpl.html
+ *
+ */
+
+#include "stdafx.h"
+#include "GSState.h"
+
+GSState::GSState(BYTE* base, bool mt, void (*irq)(), bool nloophack)
+	: m_mt(mt)
+	, m_irq(irq)
+	, m_nloophack(nloophack)
+	, m_path3hack(0)
+	, m_q(1.0f)
+	, m_version(4)
+	, m_vmsize(4 * 1024 * 1024)
+{
+	m_sssize = sizeof(m_version) + sizeof(m_env) + sizeof(m_v) + sizeof(m_x) + sizeof(m_y) + m_vmsize + sizeof(m_path) + sizeof(m_q);
+
+	ASSERT(base);
+
+	PMODE = (GSRegPMODE*)(base + GS_PMODE);
+	SMODE1 = (GSRegSMODE1*)(base + GS_SMODE1);
+	SMODE2 = (GSRegSMODE2*)(base + GS_SMODE2);
+	// SRFSH = (GSRegPMODE*)(base + GS_SRFSH);
+	// SYNCH1 = (GSRegPMODE*)(base + GS_SYNCH1);
+	// SYNCH2 = (GSRegPMODE*)(base + GS_SYNCH2);
+	// SYNCV = (GSRegPMODE*)(base + GS_SYNCV);
+	DISPFB[0] = (GSRegDISPFB*)(base + GS_DISPFB1);
+	DISPFB[1] = (GSRegDISPFB*)(base + GS_DISPFB2);
+	DISPLAY[0] = (GSRegDISPLAY*)(base + GS_DISPLAY1);
+	DISPLAY[1] = (GSRegDISPLAY*)(base + GS_DISPLAY2);
+	EXTBUF = (GSRegEXTBUF*)(base + GS_EXTBUF);
+	EXTDATA = (GSRegEXTDATA*)(base + GS_EXTDATA);
+	EXTWRITE = (GSRegEXTWRITE*)(base + GS_EXTWRITE);
+	BGCOLOR = (GSRegBGCOLOR*)(base + GS_BGCOLOR);
+	CSR = (GSRegCSR*)(base + GS_CSR);
+	IMR = (GSRegIMR*)(base + GS_IMR);
+	BUSDIR = (GSRegBUSDIR*)(base + GS_BUSDIR);
+	SIGLBLID = (GSRegSIGLBLID*)(base + GS_SIGLBLID);
+
+	PRIM = &m_env.PRIM;
+//	CSR->rREV = 0x20;
+	m_env.PRMODECONT.AC = 1;
+
+	m_x = m_y = 0;
+	m_bytes = 0;
+	m_maxbytes = 1024 * 1024 * 4;
+	m_buff = (BYTE*)_aligned_malloc(m_maxbytes, 16);
+
+	Reset();
+
+	ResetHandlers();
+}
+
+GSState::~GSState()
+{
+	_aligned_free(m_buff);
+}
+
+void GSState::Reset()
+{
+	memset(&m_env, 0, sizeof(m_env));
+	memset(m_path, 0, sizeof(m_path));
+	memset(&m_v, 0, sizeof(m_v));
+
+//	PRIM = &m_env.PRIM;
+//	m_env.PRMODECONT.AC = 1;
+
+	m_context = &m_env.CTXT[0];
+
+	m_env.CTXT[0].ftbl = &GSLocalMemory::m_psm[m_env.CTXT[0].FRAME.PSM];
+	m_env.CTXT[0].ztbl = &GSLocalMemory::m_psm[m_env.CTXT[0].ZBUF.PSM];
+	m_env.CTXT[0].ttbl = &GSLocalMemory::m_psm[m_env.CTXT[0].TEX0.PSM];
+
+	m_env.CTXT[1].ftbl = &GSLocalMemory::m_psm[m_env.CTXT[1].FRAME.PSM];
+	m_env.CTXT[1].ztbl = &GSLocalMemory::m_psm[m_env.CTXT[1].ZBUF.PSM];
+	m_env.CTXT[1].ttbl = &GSLocalMemory::m_psm[m_env.CTXT[1].TEX0.PSM];
+}
+
+void GSState::ResetHandlers()
+{
+	for(int i = 0; i < countof(m_fpGIFPackedRegHandlers); i++)
+	{
+		m_fpGIFPackedRegHandlers[i] = &GSState::GIFPackedRegHandlerNull;
+	}
+
+	m_fpGIFPackedRegHandlers[GIF_REG_PRIM] = &GSState::GIFPackedRegHandlerPRIM;
+	m_fpGIFPackedRegHandlers[GIF_REG_RGBA] = &GSState::GIFPackedRegHandlerRGBA;
+	m_fpGIFPackedRegHandlers[GIF_REG_STQ] = &GSState::GIFPackedRegHandlerSTQ;
+	m_fpGIFPackedRegHandlers[GIF_REG_UV] = &GSState::GIFPackedRegHandlerUV;
+	m_fpGIFPackedRegHandlers[GIF_REG_XYZF2] = &GSState::GIFPackedRegHandlerXYZF2;
+	m_fpGIFPackedRegHandlers[GIF_REG_XYZ2] = &GSState::GIFPackedRegHandlerXYZ2;
+	m_fpGIFPackedRegHandlers[GIF_REG_TEX0_1] = &GSState::GIFPackedRegHandlerTEX0<0>;
+	m_fpGIFPackedRegHandlers[GIF_REG_TEX0_2] = &GSState::GIFPackedRegHandlerTEX0<1>;
+	m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_1] = &GSState::GIFPackedRegHandlerCLAMP<0>;
+	m_fpGIFPackedRegHandlers[GIF_REG_CLAMP_2] = &GSState::GIFPackedRegHandlerCLAMP<1>;
+	m_fpGIFPackedRegHandlers[GIF_REG_FOG] = &GSState::GIFPackedRegHandlerFOG;
+	m_fpGIFPackedRegHandlers[GIF_REG_XYZF3] = &GSState::GIFPackedRegHandlerXYZF3;
+	m_fpGIFPackedRegHandlers[GIF_REG_XYZ3] = &GSState::GIFPackedRegHandlerXYZ3;
+	m_fpGIFPackedRegHandlers[GIF_REG_A_D] = &GSState::GIFPackedRegHandlerA_D;
+	m_fpGIFPackedRegHandlers[GIF_REG_NOP] = &GSState::GIFPackedRegHandlerNOP;
+
+	for(int i = 0; i < countof(m_fpGIFRegHandlers); i++)
+	{
+		m_fpGIFRegHandlers[i] = &GSState::GIFRegHandlerNull;
+	}
+
+	m_fpGIFRegHandlers[GIF_A_D_REG_PRIM] = &GSState::GIFRegHandlerPRIM;
+	m_fpGIFRegHandlers[GIF_A_D_REG_RGBAQ] = &GSState::GIFRegHandlerRGBAQ;
+	m_fpGIFRegHandlers[GIF_A_D_REG_ST] = &GSState::GIFRegHandlerST;
+	m_fpGIFRegHandlers[GIF_A_D_REG_UV] = &GSState::GIFRegHandlerUV;
+	m_fpGIFRegHandlers[GIF_A_D_REG_XYZF2] = &GSState::GIFRegHandlerXYZF2;
+	m_fpGIFRegHandlers[GIF_A_D_REG_XYZ2] = &GSState::GIFRegHandlerXYZ2;
+	m_fpGIFRegHandlers[GIF_A_D_REG_TEX0_1] = &GSState::GIFRegHandlerTEX0<0>;
+	m_fpGIFRegHandlers[GIF_A_D_REG_TEX0_2] = &GSState::GIFRegHandlerTEX0<1>;
+	m_fpGIFRegHandlers[GIF_A_D_REG_CLAMP_1] = &GSState::GIFRegHandlerCLAMP<0>;
+	m_fpGIFRegHandlers[GIF_A_D_REG_CLAMP_2] = &GSState::GIFRegHandlerCLAMP<1>;
+	m_fpGIFRegHandlers[GIF_A_D_REG_FOG] = &GSState::GIFRegHandlerFOG;
+	m_fpGIFRegHandlers[GIF_A_D_REG_XYZF3] = &GSState::GIFRegHandlerXYZF3;
+	m_fpGIFRegHandlers[GIF_A_D_REG_XYZ3] = &GSState::GIFRegHandlerXYZ3;
+	m_fpGIFRegHandlers[GIF_A_D_REG_NOP] = &GSState::GIFRegHandlerNOP;
+	m_fpGIFRegHandlers[GIF_A_D_REG_TEX1_1] = &GSState::GIFRegHandlerTEX1<0>;
+	m_fpGIFRegHandlers[GIF_A_D_REG_TEX1_2] = &GSState::GIFRegHandlerTEX1<1>;
+	m_fpGIFRegHandlers[GIF_A_D_REG_TEX2_1] = &GSState::GIFRegHandlerTEX2<0>;
+	m_fpGIFRegHandlers[GIF_A_D_REG_TEX2_2] = &GSState::GIFRegHandlerTEX2<1>;
+	m_fpGIFRegHandlers[GIF_A_D_REG_XYOFFSET_1] = &GSState::GIFRegHandlerXYOFFSET<0>;
+	m_fpGIFRegHandlers[GIF_A_D_REG_XYOFFSET_2] = &GSState::GIFRegHandlerXYOFFSET<1>;
+	m_fpGIFRegHandlers[GIF_A_D_REG_PRMODECONT] = &GSState::GIFRegHandlerPRMODECONT;
+	m_fpGIFRegHandlers[GIF_A_D_REG_PRMODE] = &GSState::GIFRegHandlerPRMODE;
+	m_fpGIFRegHandlers[GIF_A_D_REG_TEXCLUT] = &GSState::GIFRegHandlerTEXCLUT;
+	m_fpGIFRegHandlers[GIF_A_D_REG_SCANMSK] = &GSState::GIFRegHandlerSCANMSK;
+	m_fpGIFRegHandlers[GIF_A_D_REG_MIPTBP1_1] = &GSState::GIFRegHandlerMIPTBP1<0>;
+	m_fpGIFRegHandlers[GIF_A_D_REG_MIPTBP1_2] = &GSState::GIFRegHandlerMIPTBP1<1>;
+	m_fpGIFRegHandlers[GIF_A_D_REG_MIPTBP2_1] = &GSState::GIFRegHandlerMIPTBP2<0>;
+	m_fpGIFRegHandlers[GIF_A_D_REG_MIPTBP2_2] = &GSState::GIFRegHandlerMIPTBP2<1>;
+	m_fpGIFRegHandlers[GIF_A_D_REG_TEXA] = &GSState::GIFRegHandlerTEXA;
+	m_fpGIFRegHandlers[GIF_A_D_REG_FOGCOL] = &GSState::GIFRegHandlerFOGCOL;
+	m_fpGIFRegHandlers[GIF_A_D_REG_TEXFLUSH] = &GSState::GIFRegHandlerTEXFLUSH;
+	m_fpGIFRegHandlers[GIF_A_D_REG_SCISSOR_1] = &GSState::GIFRegHandlerSCISSOR<0>;
+	m_fpGIFRegHandlers[GIF_A_D_REG_SCISSOR_2] = &GSState::GIFRegHandlerSCISSOR<1>;
+	m_fpGIFRegHandlers[GIF_A_D_REG_ALPHA_1] = &GSState::GIFRegHandlerALPHA<0>;
+	m_fpGIFRegHandlers[GIF_A_D_REG_ALPHA_2] = &GSState::GIFRegHandlerALPHA<1>;
+	m_fpGIFRegHandlers[GIF_A_D_REG_DIMX] = &GSState::GIFRegHandlerDIMX;
+	m_fpGIFRegHandlers[GIF_A_D_REG_DTHE] = &GSState::GIFRegHandlerDTHE;
+	m_fpGIFRegHandlers[GIF_A_D_REG_COLCLAMP] = &GSState::GIFRegHandlerCOLCLAMP;
+	m_fpGIFRegHandlers[GIF_A_D_REG_TEST_1] = &GSState::GIFRegHandlerTEST<0>;
+	m_fpGIFRegHandlers[GIF_A_D_REG_TEST_2] = &GSState::GIFRegHandlerTEST<1>;
+	m_fpGIFRegHandlers[GIF_A_D_REG_PABE] = &GSState::GIFRegHandlerPABE;
+	m_fpGIFRegHandlers[GIF_A_D_REG_FBA_1] = &GSState::GIFRegHandlerFBA<0>;
+	m_fpGIFRegHandlers[GIF_A_D_REG_FBA_2] = &GSState::GIFRegHandlerFBA<1>;
+	m_fpGIFRegHandlers[GIF_A_D_REG_FRAME_1] = &GSState::GIFRegHandlerFRAME<0>;
+	m_fpGIFRegHandlers[GIF_A_D_REG_FRAME_2] = &GSState::GIFRegHandlerFRAME<1>;
+	m_fpGIFRegHandlers[GIF_A_D_REG_ZBUF_1] = &GSState::GIFRegHandlerZBUF<0>;
+	m_fpGIFRegHandlers[GIF_A_D_REG_ZBUF_2] = &GSState::GIFRegHandlerZBUF<1>;
+	m_fpGIFRegHandlers[GIF_A_D_REG_BITBLTBUF] = &GSState::GIFRegHandlerBITBLTBUF;
+	m_fpGIFRegHandlers[GIF_A_D_REG_TRXPOS] = &GSState::GIFRegHandlerTRXPOS;
+	m_fpGIFRegHandlers[GIF_A_D_REG_TRXREG] = &GSState::GIFRegHandlerTRXREG;
+	m_fpGIFRegHandlers[GIF_A_D_REG_TRXDIR] = &GSState::GIFRegHandlerTRXDIR;
+	m_fpGIFRegHandlers[GIF_A_D_REG_HWREG] = &GSState::GIFRegHandlerHWREG;
+	m_fpGIFRegHandlers[GIF_A_D_REG_SIGNAL] = &GSState::GIFRegHandlerSIGNAL;
+	m_fpGIFRegHandlers[GIF_A_D_REG_FINISH] = &GSState::GIFRegHandlerFINISH;
+	m_fpGIFRegHandlers[GIF_A_D_REG_LABEL] = &GSState::GIFRegHandlerLABEL;
+}
+
+CPoint GSState::GetDisplayPos(int i)
+{
+	ASSERT(i >= 0 && i < 2);
+
+	CPoint p;
+
+	p.x = DISPLAY[i]->DX / (DISPLAY[i]->MAGH + 1);
+	p.y = DISPLAY[i]->DY / (DISPLAY[i]->MAGV + 1);
+
+	return p;
+}
+
+CSize GSState::GetDisplaySize(int i)
+{
+	ASSERT(i >= 0 && i < 2);
+
+	CSize s;
+
+	s.cx = (DISPLAY[i]->DW + 1) / (DISPLAY[i]->MAGH + 1);
+	s.cy = (DISPLAY[i]->DH + 1) / (DISPLAY[i]->MAGV + 1);
+
+	if(s.cy & 1) s.cy++;
+
+	return s;
+}
+
+CRect GSState::GetDisplayRect(int i)
+{
+	return CRect(GetDisplayPos(i), GetDisplaySize(i));
+}
+
+CSize GSState::GetDisplayPos()
+{
+	return GetDisplayPos(IsEnabled(1) ? 1 : 0);
+}	
+
+CSize GSState::GetDisplaySize()
+{
+	return GetDisplaySize(IsEnabled(1) ? 1 : 0);
+}	
+
+CRect GSState::GetDisplayRect()
+{
+	return GetDisplayRect(IsEnabled(1) ? 1 : 0);
+}
+
+CPoint GSState::GetFramePos(int i)
+{
+	ASSERT(i >= 0 && i < 2);
+
+	return CPoint(DISPFB[i]->DBX, DISPFB[i]->DBY);
+}
+
+CSize GSState::GetFrameSize(int i)
+{
+	CSize s = GetDisplaySize(i);
+
+	if(SMODE2->INT && SMODE2->FFMD && s.cy > 1) s.cy >>= 1;
+
+	return s;
+}
+
+CRect GSState::GetFrameRect(int i)
+{
+	return CRect(GetFramePos(i), GetFrameSize(i));
+}
+
+CSize GSState::GetFramePos()
+{
+	return GetFramePos(IsEnabled(1) ? 1 : 0);
+}	
+
+CSize GSState::GetFrameSize()
+{
+	return GetFrameSize(IsEnabled(1) ? 1 : 0);
+}	
+
+CRect GSState::GetFrameRect()
+{
+	return GetFrameRect(IsEnabled(1) ? 1 : 0);
+}
+
+bool GSState::IsEnabled(int i)
+{
+	ASSERT(i >= 0 && i < 2);
+
+	if(i == 0 && PMODE->EN1) 
+	{
+		return DISPLAY[0]->DW || DISPLAY[0]->DH;
+	}
+	else if(i == 1 && PMODE->EN2) 
+	{
+		return DISPLAY[1]->DW || DISPLAY[1]->DH;
+	}
+
+	return false;
+}
+
+int GSState::GetFPS()
+{
+	return ((SMODE1->CMOD & 1) ? 50 : 60) / (SMODE2->INT ? 1 : 2);
+}
+
+//
+
+static __m128i _000000ff = _mm_set1_epi32(0x000000ff);
+static __m128i _00003fff = _mm_set1_epi32(0x00003fff);
+
+// GIFPackedRegHandler*
+
+void GSState::GIFPackedRegHandlerNull(GIFPackedReg* r)
+{
+	// ASSERT(0);
+}
+
+void GSState::GIFPackedRegHandlerPRIM(GIFPackedReg* r)
+{
+	ASSERT(r->PRIM.PRIM < 7);
+
+	GIFReg r2;
+	r2.PRIM.i64 = r->PRIM.PRIM;
+	GIFRegHandlerPRIM(&r2);
+}
+
+void GSState::GIFPackedRegHandlerRGBA(GIFPackedReg* r)
+{
+#if defined(_M_AMD64) || _M_IX86_FP >= 2
+
+	__m128i r0 = _mm_loadu_si128((__m128i*)r);
+	r0 = _mm_and_si128(r0, _000000ff);
+	r0 = _mm_packs_epi32(r0, r0);
+	r0 = _mm_packus_epi16(r0, r0);
+	m_v.RGBAQ.ai32[0] = _mm_cvtsi128_si32(r0);
+
+#else
+
+	m_v.RGBAQ.R = r->RGBA.R;
+	m_v.RGBAQ.G = r->RGBA.G;
+	m_v.RGBAQ.B = r->RGBA.B;
+	m_v.RGBAQ.A = r->RGBA.A;
+
+#endif
+
+	m_v.RGBAQ.Q = m_q;
+}
+
+void GSState::GIFPackedRegHandlerSTQ(GIFPackedReg* r)
+{
+#if defined(_M_AMD64)
+
+	m_v.ST.i64 = r->ai64[0];
+
+#elif _M_IX86_FP >= 2
+
+	_mm_storel_epi64((__m128i*)&m_v.ST.i64, _mm_loadl_epi64((__m128i*)r));
+
+#else
+
+	m_v.ST.S = r->STQ.S;
+	m_v.ST.T = r->STQ.T;
+
+#endif
+
+	m_q = r->STQ.Q;
+}
+
+void GSState::GIFPackedRegHandlerUV(GIFPackedReg* r)
+{
+#if defined(_M_AMD64) || _M_IX86_FP >= 2
+
+	__m128i r0 = _mm_loadu_si128((__m128i*)r);
+	r0 = _mm_and_si128(r0, _00003fff);
+	r0 = _mm_packs_epi32(r0, r0);
+	m_v.UV.ai32[0] = _mm_cvtsi128_si32(r0);
+
+#else
+
+	m_v.UV.U = r->UV.U;
+	m_v.UV.V = r->UV.V;
+
+#endif
+}
+
+void GSState::GIFPackedRegHandlerXYZF2(GIFPackedReg* r)
+{
+	m_v.XYZ.X = r->XYZF2.X;
+	m_v.XYZ.Y = r->XYZF2.Y;
+	m_v.XYZ.Z = r->XYZF2.Z;
+	m_v.FOG.F = r->XYZF2.F;
+
+	VertexKick(r->XYZF2.ADC);
+}
+
+void GSState::GIFPackedRegHandlerXYZ2(GIFPackedReg* r)
+{
+	m_v.XYZ.X = r->XYZ2.X;
+	m_v.XYZ.Y = r->XYZ2.Y;
+	m_v.XYZ.Z = r->XYZ2.Z;
+
+	VertexKick(r->XYZ2.ADC);
+}
+
+template<int i> void GSState::GIFPackedRegHandlerTEX0(GIFPackedReg* r)
+{
+	GIFRegHandlerTEX0<i>((GIFReg*)&r->ai64[0]);
+}
+
+template<int i> void GSState::GIFPackedRegHandlerCLAMP(GIFPackedReg* r)
+{
+	GIFRegHandlerCLAMP<i>((GIFReg*)&r->ai64[0]);
+}
+
+void GSState::GIFPackedRegHandlerFOG(GIFPackedReg* r)
+{
+	m_v.FOG.F = r->FOG.F;
+}
+
+void GSState::GIFPackedRegHandlerXYZF3(GIFPackedReg* r)
+{
+	GIFRegHandlerXYZF3((GIFReg*)&r->ai64[0]);
+}
+
+void GSState::GIFPackedRegHandlerXYZ3(GIFPackedReg* r)
+{
+	GIFRegHandlerXYZ3((GIFReg*)&r->ai64[0]);
+}
+
+void GSState::GIFPackedRegHandlerA_D(GIFPackedReg* r)
+{
+	(this->*m_fpGIFRegHandlers[(BYTE)r->A_D.ADDR])(&r->r);
+}
+
+void GSState::GIFPackedRegHandlerNOP(GIFPackedReg* r)
+{
+}
+
+// GIFRegHandler*
+
+void GSState::GIFRegHandlerNull(GIFReg* r)
+{
+	// ASSERT(0);
+}
+
+void GSState::GIFRegHandlerPRIM(GIFReg* r)
+{
+	// ASSERT(r->PRIM.PRIM < 7);
+
+	if(m_env.PRIM.i64 != r->PRIM.i64)
+	{
+		Flush();
+	}
+
+	m_env.PRIM = r->PRIM;
+	m_env.PRMODE._PRIM = r->PRIM.PRIM;
+
+	if(m_env.PRMODECONT.AC)
+	{
+		m_context = &m_env.CTXT[m_env.PRIM.CTXT];
+	}
+
+	ResetPrim();
+}
+
+void GSState::GIFRegHandlerRGBAQ(GIFReg* r)
+{
+	m_v.RGBAQ = r->RGBAQ;
+}
+
+void GSState::GIFRegHandlerST(GIFReg* r)
+{
+	m_v.ST = r->ST;
+}
+
+void GSState::GIFRegHandlerUV(GIFReg* r)
+{
+	m_v.UV = r->UV;
+}
+
+void GSState::GIFRegHandlerXYZF2(GIFReg* r)
+{
+/*
+	m_v.XYZ.X = r->XYZF.X;
+	m_v.XYZ.Y = r->XYZF.Y;
+	m_v.XYZ.Z = r->XYZF.Z;
+	m_v.FOG.F = r->XYZF.F;
+*/
+	m_v.XYZ.ai32[0] = r->XYZF.ai32[0];
+	m_v.XYZ.ai32[1] = r->XYZF.ai32[1] & 0x00ffffff;
+	m_v.FOG.ai32[1] = r->XYZF.ai32[1] & 0xff000000;
+
+	VertexKick(false);
+}
+
+void GSState::GIFRegHandlerXYZ2(GIFReg* r)
+{
+	m_v.XYZ = r->XYZ;
+
+	VertexKick(false);
+}
+
+template<int i> void GSState::GIFRegHandlerTEX0(GIFReg* r)
+{
+	// even if TEX0 did not change, a new palette may have been uploaded and will overwrite the currently queued for drawing
+
+	if(PRIM->CTXT == i && m_env.CTXT[i].TEX0.i64 != r->TEX0.i64
+	|| r->TEX0.CLD >= 1 && r->TEX0.CLD <= 3 && m_mem.IsCLUTDirty(r->TEX0, m_env.TEXCLUT))
+	{
+		Flush(); 
+	}
+
+	m_env.CTXT[i].TEX0 = r->TEX0;
+
+	// ASSERT(m_env.CTXT[i].TEX0.TW <= 10 && m_env.CTXT[i].TEX0.TH <= 10 && (m_env.CTXT[i].TEX0.CPSM & ~0xa) == 0);
+
+	if(m_env.CTXT[i].TEX0.TW > 10) m_env.CTXT[i].TEX0.TW = 10;
+	if(m_env.CTXT[i].TEX0.TH > 10) m_env.CTXT[i].TEX0.TH = 10;
+
+	m_env.CTXT[i].TEX0.CPSM &= 0xa; // 1010b
+
+	m_env.CTXT[i].ttbl = &GSLocalMemory::m_psm[m_env.CTXT[i].TEX0.PSM];
+
+	FlushWrite();
+
+	m_mem.WriteCLUT(r->TEX0, m_env.TEXCLUT);
+}
+
+template<int i> void GSState::GIFRegHandlerCLAMP(GIFReg* r)
+{
+	if(PRIM->CTXT == i && m_env.CTXT[i].CLAMP.i64 != r->CLAMP.i64)
+	{
+		Flush();
+	}
+
+	m_env.CTXT[i].CLAMP = r->CLAMP;
+}
+
+void GSState::GIFRegHandlerFOG(GIFReg* r)
+{
+	m_v.FOG = r->FOG;
+}
+
+void GSState::GIFRegHandlerXYZF3(GIFReg* r)
+{
+/*
+	m_v.XYZ.X = r->XYZF.X;
+	m_v.XYZ.Y = r->XYZF.Y;
+	m_v.XYZ.Z = r->XYZF.Z;
+	m_v.FOG.F = r->XYZF.F;
+*/
+	m_v.XYZ.ai32[0] = r->XYZF.ai32[0];
+	m_v.XYZ.ai32[1] = r->XYZF.ai32[1] & 0x00ffffff;
+	m_v.FOG.ai32[1] = r->XYZF.ai32[1] & 0xff000000;
+
+	VertexKick(true);
+}
+
+void GSState::GIFRegHandlerXYZ3(GIFReg* r)
+{
+	m_v.XYZ = r->XYZ;
+
+	VertexKick(true);
+}
+
+void GSState::GIFRegHandlerNOP(GIFReg* r)
+{
+}
+
+template<int i> void GSState::GIFRegHandlerTEX1(GIFReg* r)
+{
+	if(PRIM->CTXT == i && m_env.CTXT[i].TEX1.i64 != r->TEX1.i64)
+	{
+		Flush();
+	}
+
+	m_env.CTXT[i].TEX1 = r->TEX1;
+}
+
+template<int i> void GSState::GIFRegHandlerTEX2(GIFReg* r)
+{
+	// m_env.CTXT[i].TEX2 = r->TEX2; // not used
+
+	UINT64 mask = 0xFFFFFFE003F00000ui64; // TEX2 bits
+
+	r->i64 = (r->i64 & mask) | (m_env.CTXT[i].TEX0.i64 & ~mask);
+
+	GIFRegHandlerTEX0<i>(r);
+}
+
+template<int i> void GSState::GIFRegHandlerXYOFFSET(GIFReg* r)
+{
+	if(m_env.CTXT[i].XYOFFSET.i64 != r->XYOFFSET.i64)
+	{
+		Flush();
+	}
+
+	m_env.CTXT[i].XYOFFSET = r->XYOFFSET;
+
+	m_env.CTXT[i].UpdateScissor();
+}
+
+void GSState::GIFRegHandlerPRMODECONT(GIFReg* r)
+{
+	if(m_env.PRMODECONT.i64 != r->PRMODECONT.i64)
+	{
+		Flush();
+	}
+
+	m_env.PRMODECONT = r->PRMODECONT;
+
+	PRIM = !m_env.PRMODECONT.AC ? (GIFRegPRIM*)&m_env.PRMODE : &m_env.PRIM;
+
+	ASSERT(PRIM->PRIM < 7);
+
+	m_context = &m_env.CTXT[PRIM->CTXT];
+}
+
+void GSState::GIFRegHandlerPRMODE(GIFReg* r)
+{
+	if(!m_env.PRMODECONT.AC)
+	{
+		Flush();
+	}
+
+	UINT32 _PRIM = m_env.PRMODE._PRIM;
+	m_env.PRMODE = r->PRMODE;
+	m_env.PRMODE._PRIM = _PRIM;
+
+	m_context = &m_env.CTXT[PRIM->CTXT];
+}
+
+void GSState::GIFRegHandlerTEXCLUT(GIFReg* r)
+{
+	if(m_env.TEXCLUT.i64 != r->TEXCLUT.i64)
+	{
+		Flush();
+	}
+
+	m_env.TEXCLUT = r->TEXCLUT;
+}
+
+void GSState::GIFRegHandlerSCANMSK(GIFReg* r)
+{
+	if(m_env.SCANMSK.i64 != r->SCANMSK.i64)
+	{
+		Flush();
+	}
+
+	m_env.SCANMSK = r->SCANMSK;
+}
+
+template<int i> void GSState::GIFRegHandlerMIPTBP1(GIFReg* r)
+{
+	if(PRIM->CTXT == i && m_env.CTXT[i].MIPTBP1.i64 != r->MIPTBP1.i64)
+	{
+		Flush();
+	}
+
+	m_env.CTXT[i].MIPTBP1 = r->MIPTBP1;
+}
+
+template<int i> void GSState::GIFRegHandlerMIPTBP2(GIFReg* r)
+{
+	if(PRIM->CTXT == i && m_env.CTXT[i].MIPTBP2.i64 != r->MIPTBP2.i64)
+	{
+		Flush();
+	}
+
+	m_env.CTXT[i].MIPTBP2 = r->MIPTBP2;
+}
+
+void GSState::GIFRegHandlerTEXA(GIFReg* r)
+{
+	if(m_env.TEXA.i64 != r->TEXA.i64)
+	{
+		Flush();
+	}
+
+	m_env.TEXA = r->TEXA;
+}
+
+void GSState::GIFRegHandlerFOGCOL(GIFReg* r)
+{
+	if(m_env.FOGCOL.i64 != r->FOGCOL.i64)
+	{
+		Flush();
+	}
+
+	m_env.FOGCOL = r->FOGCOL;
+}
+
+void GSState::GIFRegHandlerTEXFLUSH(GIFReg* r)
+{
+	// what should we do here?
+}
+
+template<int i> void GSState::GIFRegHandlerSCISSOR(GIFReg* r)
+{
+	if(PRIM->CTXT == i && m_env.CTXT[i].SCISSOR.i64 != r->SCISSOR.i64)
+	{
+		Flush();
+	}
+
+	m_env.CTXT[i].SCISSOR = r->SCISSOR;
+
+	m_env.CTXT[i].UpdateScissor();
+}
+
+template<int i> void GSState::GIFRegHandlerALPHA(GIFReg* r)
+{
+	if(PRIM->CTXT == i && m_env.CTXT[i].ALPHA.i64 != r->ALPHA.i64)
+	{
+		Flush();
+	}
+
+	m_env.CTXT[i].ALPHA = r->ALPHA;
+}
+
+void GSState::GIFRegHandlerDIMX(GIFReg* r)
+{
+	if(m_env.DIMX.i64 != r->DIMX.i64)
+	{
+		Flush();
+	}
+
+	m_env.DIMX = r->DIMX;
+}
+
+void GSState::GIFRegHandlerDTHE(GIFReg* r)
+{
+	if(m_env.DTHE.i64 != r->DTHE.i64)
+	{
+		Flush();
+	}
+
+	m_env.DTHE = r->DTHE;
+}
+
+void GSState::GIFRegHandlerCOLCLAMP(GIFReg* r)
+{
+	if(m_env.COLCLAMP.i64 != r->COLCLAMP.i64)
+	{
+		Flush();
+	}
+
+	m_env.COLCLAMP = r->COLCLAMP;
+}
+
+template<int i> void GSState::GIFRegHandlerTEST(GIFReg* r)
+{
+	if(PRIM->CTXT == i && m_env.CTXT[i].TEST.i64 != r->TEST.i64)
+	{
+		Flush();
+	}
+
+	m_env.CTXT[i].TEST = r->TEST;
+}
+
+void GSState::GIFRegHandlerPABE(GIFReg* r)
+{
+	if(m_env.PABE.i64 != r->PABE.i64)
+	{
+		Flush();
+	}
+
+	m_env.PABE = r->PABE;
+}
+
+template<int i> void GSState::GIFRegHandlerFBA(GIFReg* r)
+{
+	if(PRIM->CTXT == i && m_env.CTXT[i].FBA.i64 != r->FBA.i64)
+	{
+		Flush();
+	}
+
+	m_env.CTXT[i].FBA = r->FBA;
+}
+
+template<int i> void GSState::GIFRegHandlerFRAME(GIFReg* r)
+{
+	if(PRIM->CTXT == i && m_env.CTXT[i].FRAME.i64 != r->FRAME.i64)
+	{
+		Flush();
+	}
+
+	m_env.CTXT[i].FRAME = r->FRAME;
+
+	m_env.CTXT[i].ftbl = &GSLocalMemory::m_psm[m_env.CTXT[i].FRAME.PSM];
+}
+
+template<int i> void GSState::GIFRegHandlerZBUF(GIFReg* r)
+{
+	r->ZBUF.PSM |= 0x30;
+
+	if(PRIM->CTXT == i && m_env.CTXT[i].ZBUF.i64 != r->ZBUF.i64)
+	{
+		Flush();
+	}
+
+	m_env.CTXT[i].ZBUF = r->ZBUF;
+
+	if(m_env.CTXT[i].ZBUF.PSM != PSM_PSMZ32
+	&& m_env.CTXT[i].ZBUF.PSM != PSM_PSMZ24
+	&& m_env.CTXT[i].ZBUF.PSM != PSM_PSMZ16
+	&& m_env.CTXT[i].ZBUF.PSM != PSM_PSMZ16S)
+	{
+		m_env.CTXT[i].ZBUF.PSM = PSM_PSMZ32;
+	}
+
+	m_env.CTXT[i].ztbl = &GSLocalMemory::m_psm[m_env.CTXT[i].ZBUF.PSM];
+}
+
+void GSState::GIFRegHandlerBITBLTBUF(GIFReg* r)
+{
+	if(m_env.BITBLTBUF.i64 != r->BITBLTBUF.i64)
+	{
+		FlushWrite();
+	}
+
+	m_env.BITBLTBUF = r->BITBLTBUF;
+}
+
+void GSState::GIFRegHandlerTRXPOS(GIFReg* r)
+{
+	if(m_env.TRXPOS.i64 != r->TRXPOS.i64)
+	{
+		FlushWrite();
+	}
+
+	m_env.TRXPOS = r->TRXPOS;
+}
+
+void GSState::GIFRegHandlerTRXREG(GIFReg* r)
+{
+	if(m_env.TRXREG.i64 != r->TRXREG.i64 || m_env.TRXREG2.i64 != r->TRXREG.i64)
+	{
+		FlushWrite();
+	}
+
+	m_env.TRXREG = m_env.TRXREG2 = r->TRXREG;
+}
+
+void GSState::GIFRegHandlerTRXDIR(GIFReg* r)
+{
+	Flush();
+
+	m_env.TRXDIR = r->TRXDIR;
+
+	switch(m_env.TRXDIR.XDIR)
+	{
+	case 0: // host -> local
+		m_x = m_env.TRXPOS.DSAX;
+		m_y = m_env.TRXPOS.DSAY;
+		m_env.TRXREG.RRW = m_x + m_env.TRXREG2.RRW;
+		m_env.TRXREG.RRH = m_y + m_env.TRXREG2.RRH;
+		break;
+	case 1: // local -> host
+		m_x = m_env.TRXPOS.SSAX;
+		m_y = m_env.TRXPOS.SSAY;
+		m_env.TRXREG.RRW = m_x + m_env.TRXREG2.RRW;
+		m_env.TRXREG.RRH = m_y + m_env.TRXREG2.RRH;
+		break;
+	case 2: // local -> local
+		Move();
+		break;
+	case 3: 
+		ASSERT(0);
+		break;
+	}
+}
+
+void GSState::GIFRegHandlerHWREG(GIFReg* r)
+{
+	// TODO
+
+	ASSERT(0);
+}
+
+void GSState::GIFRegHandlerSIGNAL(GIFReg* r)
+{
+	if(m_mt) return;
+
+	SIGLBLID->SIGID = (SIGLBLID->SIGID & ~r->SIGNAL.IDMSK) | (r->SIGNAL.ID & r->SIGNAL.IDMSK);
+
+	if(CSR->wSIGNAL) CSR->rSIGNAL = 1;
+	if(!IMR->SIGMSK && m_irq) m_irq();
+}
+
+void GSState::GIFRegHandlerFINISH(GIFReg* r)
+{
+	if(m_mt) return;
+
+	if(CSR->wFINISH) CSR->rFINISH = 1;
+	if(!IMR->FINISHMSK && m_irq) m_irq();
+}
+
+void GSState::GIFRegHandlerLABEL(GIFReg* r)
+{
+	if(m_mt) return;
+
+	SIGLBLID->LBLID = (SIGLBLID->LBLID & ~r->LABEL.IDMSK) | (r->LABEL.ID & r->LABEL.IDMSK);
+}
+
+//
+
+void GSState::Flush()
+{
+	FlushWrite();
+
+	FlushPrim();
+}
+
+void GSState::FlushWrite()
+{
+	FlushWrite(m_buff, m_bytes);
+
+	m_bytes = 0;
+}
+
+void GSState::FlushWrite(BYTE* mem, int len)
+{
+	if(len > 0)
+	{
+		int y = m_y;
+
+		GSLocalMemory::SwizzleTexture st = GSLocalMemory::m_psm[m_env.BITBLTBUF.DPSM].st;
+
+		(m_mem.*st)(m_x, m_y, mem, len, m_env.BITBLTBUF, m_env.TRXPOS, m_env.TRXREG);
+
+		// TODO: m_perfmon.Put(GSPerfMon::Swizzle, len);
+
+		//ASSERT(m_env.TRXREG.RRH >= m_y - y);
+
+		CRect r;
+		
+		r.left = m_env.TRXPOS.DSAX;
+		r.top = y;
+		r.right = m_env.TRXREG.RRW;
+		r.bottom = min(m_x == m_env.TRXPOS.DSAX ? m_y : m_y + 1, m_env.TRXREG.RRH);
+
+		InvalidateTexture(m_env.BITBLTBUF, r);
+
+		m_mem.InvalidateCLUT();
+	}
+}
+
+//
+
+void GSState::Write(BYTE* mem, int len)
+{
+	if(len == 0) return;
+
+	// TODO: hmmmm
+
+	if(PRIM->TME && (m_env.BITBLTBUF.DBP == m_context->TEX0.TBP0 || m_env.BITBLTBUF.DBP == m_context->TEX0.CBP))
+	{
+		FlushPrim();
+	}
+
+	int bpp = GSLocalMemory::m_psm[m_env.BITBLTBUF.DPSM].trbpp;
+
+	int pitch = (m_env.TRXREG.RRW - m_env.TRXPOS.DSAX) * bpp >> 3;
+
+	if(pitch <= 0) {ASSERT(0); return;}
+
+	int height = len / pitch;
+
+	if(height > m_env.TRXREG.RRH - m_env.TRXPOS.DSAY)
+	{
+		height = m_env.TRXREG.RRH - m_env.TRXPOS.DSAY;
+
+		len = height * pitch;
+	}
+
+	if(m_bytes > 0 || height < m_env.TRXREG.RRH - m_env.TRXPOS.DSAY)
+	{
+		ASSERT(len <= m_maxbytes); // more than 4mb into a 4mb local mem doesn't make sense
+
+		len = min(m_maxbytes, len);
+
+		if(m_bytes + len > m_maxbytes)
+		{
+			FlushWrite();
+		}
+
+		memcpy(&m_buff[m_bytes], mem, len);
+
+		m_bytes += len;
+	}
+	else
+	{
+		FlushWrite(mem, len);
+	}
+}
+
+void GSState::Read(BYTE* mem, int len)
+{
+	BYTE* pb = (BYTE*)mem;
+	WORD* pw = (WORD*)mem;
+	DWORD* pd = (DWORD*)mem;
+
+	if(m_y >= (int)m_env.TRXREG.RRH) {ASSERT(0); return;}
+
+	if(m_x == m_env.TRXPOS.SSAX && m_y == m_env.TRXPOS.SSAY)
+	{
+		CRect r(m_env.TRXPOS.SSAX, m_env.TRXPOS.SSAY, m_env.TRXREG.RRW, m_env.TRXREG.RRH);
+
+		InvalidateLocalMem(m_env.BITBLTBUF, r);
+	}
+
+	switch(m_env.BITBLTBUF.SPSM)
+	{
+	case PSM_PSMCT32:
+		for(len /= 4; len-- > 0; StepTransfer(m_env.TRXPOS.SSAX, m_env.TRXREG.RRW), pd++)
+			*pd = m_mem.readPixel32(m_x, m_y, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW);
+		break;
+	case PSM_PSMCT24:
+		for(len /= 3; len-- > 0; StepTransfer(m_env.TRXPOS.SSAX, m_env.TRXREG.RRW), pb+=3)
+		{
+			DWORD dw = m_mem.readPixel24(m_x, m_y, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW);
+			pb[0] = ((BYTE*)&dw)[0]; pb[1] = ((BYTE*)&dw)[1]; pb[2] = ((BYTE*)&dw)[2];
+		}
+		break;
+	case PSM_PSMCT16:
+		for(len /= 2; len-- > 0; StepTransfer(m_env.TRXPOS.SSAX, m_env.TRXREG.RRW), pw++)
+			*pw = (WORD)m_mem.readPixel16(m_x, m_y, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW);
+		break;
+	case PSM_PSMCT16S:
+		for(len /= 2; len-- > 0; StepTransfer(m_env.TRXPOS.SSAX, m_env.TRXREG.RRW), pw++)
+			*pw = (WORD)m_mem.readPixel16S(m_x, m_y, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW);
+		break;
+	case PSM_PSMT8:
+		for(; len-- > 0; StepTransfer(m_env.TRXPOS.SSAX, m_env.TRXREG.RRW), pb++)
+			*pb = (BYTE)m_mem.readPixel8(m_x, m_y, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW);
+		break;
+	case PSM_PSMT4:
+		for(; len-- > 0; StepTransfer(m_env.TRXPOS.SSAX, m_env.TRXREG.RRW), StepTransfer(m_env.TRXPOS.SSAX, m_env.TRXREG.RRW), pb++)
+			*pb = (BYTE)(m_mem.readPixel4(m_x, m_y, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW)&0x0f)
+				| (BYTE)(m_mem.readPixel4(m_x+1, m_y, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW)<<4);
+		break;
+	case PSM_PSMT8H:
+		for(; len-- > 0; StepTransfer(m_env.TRXPOS.SSAX, m_env.TRXREG.RRW), pb++)
+			*pb = (BYTE)m_mem.readPixel8H(m_x, m_y, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW);
+		break;
+	case PSM_PSMT4HL:
+		for(; len-- > 0; StepTransfer(m_env.TRXPOS.SSAX, m_env.TRXREG.RRW), StepTransfer(m_env.TRXPOS.SSAX, m_env.TRXREG.RRW), pb++)
+			*pb = (BYTE)(m_mem.readPixel4HL(m_x, m_y, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW)&0x0f)
+				| (BYTE)(m_mem.readPixel4HL(m_x+1, m_y, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW)<<4);
+		break;
+	case PSM_PSMT4HH:
+		for(; len-- > 0; StepTransfer(m_env.TRXPOS.SSAX, m_env.TRXREG.RRW), StepTransfer(m_env.TRXPOS.SSAX, m_env.TRXREG.RRW), pb++)
+			*pb = (BYTE)(m_mem.readPixel4HH(m_x, m_y, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW)&0x0f)
+				| (BYTE)(m_mem.readPixel4HH(m_x+1, m_y, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW)<<4);
+		break;
+	case PSM_PSMZ32:
+		for(len /= 4; len-- > 0; StepTransfer(m_env.TRXPOS.SSAX, m_env.TRXREG.RRW), pd++)
+			*pd = m_mem.readPixel32Z(m_x, m_y, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW);
+		break;
+	case PSM_PSMZ24:
+		for(len /= 3; len-- > 0; StepTransfer(m_env.TRXPOS.SSAX, m_env.TRXREG.RRW), pb+=3)
+		{
+			DWORD dw = m_mem.readPixel24Z(m_x, m_y, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW);
+			pb[0] = ((BYTE*)&dw)[0]; pb[1] = ((BYTE*)&dw)[1]; pb[2] = ((BYTE*)&dw)[2];
+		}
+		break;
+	case PSM_PSMZ16:
+		for(len /= 2; len-- > 0; StepTransfer(m_env.TRXPOS.SSAX, m_env.TRXREG.RRW), pw++)
+			*pw = (WORD)m_mem.readPixel16Z(m_x, m_y, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW);
+		break;
+	case PSM_PSMZ16S:
+		for(len /= 2; len-- > 0; StepTransfer(m_env.TRXPOS.SSAX, m_env.TRXREG.RRW), pw++)
+			*pw = (WORD)m_mem.readPixel16SZ(m_x, m_y, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW);
+		break;
+	}
+}
+
+void GSState::Move()
+{
+	// ffxii uses this to move the top/bottom of the scrolling menus offscreen and then blends them back over the text to create a shading effect
+	// guitar hero copies the far end of the board to do a similar blend too
+
+	GSLocalMemory::readPixel rp = GSLocalMemory::m_psm[m_env.BITBLTBUF.SPSM].rp;
+	GSLocalMemory::writePixel wp = GSLocalMemory::m_psm[m_env.BITBLTBUF.DPSM].wp;
+
+	int sx = m_env.TRXPOS.SSAX;
+	int dx = m_env.TRXPOS.DSAX;
+	int sy = m_env.TRXPOS.SSAY;
+	int dy = m_env.TRXPOS.DSAY;
+	int w = m_env.TRXREG.RRW;
+	int h = m_env.TRXREG.RRH;
+	int xinc = 1;
+	int yinc = 1;
+
+	if(sx < dx) sx += w-1, dx += w-1, xinc = -1;
+	if(sy < dy) sy += h-1, dy += h-1, yinc = -1;
+
+	InvalidateLocalMem(m_env.BITBLTBUF, CRect(CPoint(sx, sy), CSize(w, h)));
+	InvalidateTexture(m_env.BITBLTBUF, CRect(CPoint(dx, dy), CSize(w, h)));
+
+	for(int y = 0; y < h; y++, sy += yinc, dy += yinc, sx -= xinc*w, dx -= xinc*w)
+		for(int x = 0; x < w; x++, sx += xinc, dx += xinc)
+			(m_mem.*wp)(dx, dy, (m_mem.*rp)(sx, sy, m_env.BITBLTBUF.SBP, m_env.BITBLTBUF.SBW), m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW);
+}
+
+void GSState::WriteCSR(UINT32 csr)
+{
+	CSR->ai32[1] = csr;
+}
+
+void GSState::ReadFIFO(BYTE* mem, int size)
+{
+	Flush();
+
+	Read(mem, size * 16);
+}
+
+void GSState::Transfer(BYTE* mem, int size, int index)
+{
+	GIFPath& path = m_path[index];
+
+	while(size > 0)
+	{
+		bool eop = false;
+
+		if(path.tag.NLOOP == 0)
+		{
+			path.tag = *(GIFTag*)mem;
+			path.nreg = 0;
+
+			mem += sizeof(GIFTag);
+			size--;
+
+			m_q = 1.0f;
+
+			if(index == 2 && path.tag.EOP)
+			{
+				m_path3hack = 1;
+			}
+
+			if(path.tag.PRE)
+			{
+				GIFReg r;
+				r.i64 = path.tag.PRIM;
+				(this->*m_fpGIFRegHandlers[GIF_A_D_REG_PRIM])(&r);
+			}
+
+			if(path.tag.EOP)
+			{
+				eop = true;
+			}
+			else if(path.tag.NLOOP == 0)
+			{
+				if(index == 0 && m_nloophack)
+				{
+					continue;
+				}
+
+				eop = true;
+			}
+		}
+
+		switch(path.tag.FLG)
+		{
+		case GIF_FLG_PACKED:
+
+			for(GIFPackedReg* r = (GIFPackedReg*)mem; path.tag.NLOOP > 0 && size > 0; r++, size--, mem += sizeof(GIFPackedReg))
+			{
+				(this->*m_fpGIFPackedRegHandlers[path.GetGIFReg()])(r);
+
+				if((path.nreg = (path.nreg + 1) & 0xf) == path.tag.NREG) 
+				{
+					path.nreg = 0; 
+					path.tag.NLOOP--;
+				}
+			}
+
+			break;
+
+		case GIF_FLG_REGLIST:
+
+			size *= 2;
+
+			for(GIFReg* r = (GIFReg*)mem; path.tag.NLOOP > 0 && size > 0; r++, size--, mem += sizeof(GIFReg))
+			{
+				(this->*m_fpGIFRegHandlers[path.GetGIFReg()])(r);
+
+				if((path.nreg = (path.nreg + 1) & 0xf) == path.tag.NREG)
+				{
+					path.nreg = 0; 
+					path.tag.NLOOP--;
+				}
+			}
+			
+			if(size & 1) mem += sizeof(GIFReg);
+
+			size /= 2;
+			
+			break;
+
+		case GIF_FLG_IMAGE2: // hmmm
+
+			path.tag.NLOOP = 0;
+
+			break;
+
+		case GIF_FLG_IMAGE:
+			{
+				int len = min(size, path.tag.NLOOP);
+
+				//ASSERT(!(len&3));
+
+				switch(m_env.TRXDIR.XDIR)
+				{
+				case 0:
+					Write(mem, len*16);
+					break;
+				case 1: 
+					Read(mem, len*16); // TODO: writing access violation with aqtime
+					break;
+				case 2: 
+					Move();
+					break;
+				case 3: 
+					ASSERT(0);
+					break;
+				default: 
+					__assume(0);
+				}
+
+				mem += len*16;
+				path.tag.NLOOP -= len;
+				size -= len;
+			}
+
+			break;
+
+		default: 
+			__assume(0);
+		}
+
+		if(eop && ((int)size <= 0 || index == 0))
+		{
+			break;
+		}
+	}
+
+	// FIXME: dq8, pcsx2 error probably
+
+	if(index == 0)
+	{
+		if(!path.tag.EOP && path.tag.NLOOP > 0)
+		{
+			path.tag.NLOOP = 0;
+
+			TRACE(_T("path1 hack\n"));
+		}
+	}
+}
+
+int GSState::Freeze(freezeData* fd, bool sizeonly)
+{
+	if(sizeonly)
+	{
+		fd->size = m_sssize;
+		return 0;
+	}
+	
+	if(!fd->data || fd->size < m_sssize)
+	{
+		return -1;
+	}
+
+	Flush();
+
+	BYTE* data = fd->data;
+
+	memcpy(data, &m_version, sizeof(m_version)); data += sizeof(m_version);
+	memcpy(data, &m_env, sizeof(m_env)); data += sizeof(m_env); 
+	memcpy(data, &m_v, sizeof(m_v)); data += sizeof(m_v);
+	memcpy(data, &m_x, sizeof(m_x)); data += sizeof(m_x);
+	memcpy(data, &m_y, sizeof(m_y)); data += sizeof(m_y);
+	memcpy(data, m_mem.GetVM(), m_vmsize); data += m_vmsize;
+	memcpy(data, m_path, sizeof(m_path)); data += sizeof(m_path);
+	memcpy(data, &m_q, sizeof(m_q)); data += sizeof(m_q);
+
+	return 0;
+}
+
+int GSState::Defrost(const freezeData* fd)
+{
+	if(!fd || !fd->data || fd->size == 0) 
+	{
+		return -1;
+	}
+
+	if(fd->size != m_vmsize) 
+	{
+		return -1;
+	}
+
+	BYTE* data = fd->data;
+
+	if(*(int*)data != m_version)
+	{
+		return -1;
+	}
+
+	data += sizeof(m_version);
+
+	Flush();
+
+	memcpy(&m_env, data, sizeof(m_env)); data += sizeof(m_env); 
+	memcpy(&m_v, data, sizeof(m_v)); data += sizeof(m_v);
+	memcpy(&m_x, data, sizeof(m_x)); data += sizeof(m_x);
+	memcpy(&m_y, data, sizeof(m_y)); data += sizeof(m_y);
+	memcpy(m_mem.GetVM(), data, m_vmsize); data += m_vmsize;
+	memcpy(&m_path, data, sizeof(m_path)); data += sizeof(m_path);
+	memcpy(&m_q, data, sizeof(m_q)); data += sizeof(m_q);
+
+	PRIM = !m_env.PRMODECONT.AC ? (GIFRegPRIM*)&m_env.PRMODE : &m_env.PRIM;
+
+	m_context = &m_env.CTXT[PRIM->CTXT];
+
+	m_env.CTXT[0].ftbl = &GSLocalMemory::m_psm[m_env.CTXT[0].FRAME.PSM];
+	m_env.CTXT[0].ztbl = &GSLocalMemory::m_psm[m_env.CTXT[0].ZBUF.PSM];
+	m_env.CTXT[0].ttbl = &GSLocalMemory::m_psm[m_env.CTXT[0].TEX0.PSM];
+
+	m_env.CTXT[1].ftbl = &GSLocalMemory::m_psm[m_env.CTXT[1].FRAME.PSM];
+	m_env.CTXT[1].ztbl = &GSLocalMemory::m_psm[m_env.CTXT[1].ZBUF.PSM];
+	m_env.CTXT[1].ttbl = &GSLocalMemory::m_psm[m_env.CTXT[1].TEX0.PSM];
+
+	return 0;
+}
+
diff --git a/gsdx/GSState.h b/gsdx/GSState.h
new file mode 100644
index 0000000..0208f81
--- /dev/null
+++ b/gsdx/GSState.h
@@ -0,0 +1,186 @@
+/* 
+ *	Copyright (C) 2007 Gabest
+ *	http://www.gabest.org
+ *
+ *  This Program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *   
+ *  This Program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *   
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
+ *  http://www.gnu.org/copyleft/gpl.html
+ *
+ */
+
+#pragma once
+
+#include "GS.h"
+#include "GSLocalMemory.h"
+#include "GSDrawingContext.h"
+#include "GSDrawingEnvironment.h"
+#include "GSVertex.h"
+#include "GSVertexList.h"
+#include "GSUtil.h"
+#include "GSDirtyRect.h"
+#include "GSPerfMon.h"
+#include "GSScale.h"
+
+class GSState
+{
+	typedef void (GSState::*GIFPackedRegHandler)(GIFPackedReg* r);
+
+	GIFPackedRegHandler m_fpGIFPackedRegHandlers[16];
+
+	void GIFPackedRegHandlerNull(GIFPackedReg* r);
+	void GIFPackedRegHandlerPRIM(GIFPackedReg* r);
+	void GIFPackedRegHandlerRGBA(GIFPackedReg* r);
+	void GIFPackedRegHandlerSTQ(GIFPackedReg* r);
+	void GIFPackedRegHandlerUV(GIFPackedReg* r);
+	void GIFPackedRegHandlerXYZF2(GIFPackedReg* r);
+	void GIFPackedRegHandlerXYZ2(GIFPackedReg* r);
+	template<int i> void GIFPackedRegHandlerTEX0(GIFPackedReg* r);
+	template<int i> void GIFPackedRegHandlerCLAMP(GIFPackedReg* r);
+	void GIFPackedRegHandlerFOG(GIFPackedReg* r);
+	void GIFPackedRegHandlerXYZF3(GIFPackedReg* r);
+	void GIFPackedRegHandlerXYZ3(GIFPackedReg* r);
+	void GIFPackedRegHandlerA_D(GIFPackedReg* r);
+	void GIFPackedRegHandlerNOP(GIFPackedReg* r);
+
+	typedef void (GSState::*GIFRegHandler)(GIFReg* r);
+
+	GIFRegHandler m_fpGIFRegHandlers[256];
+
+	void GIFRegHandlerNull(GIFReg* r);
+	void GIFRegHandlerPRIM(GIFReg* r);
+	void GIFRegHandlerRGBAQ(GIFReg* r);
+	void GIFRegHandlerST(GIFReg* r);
+	void GIFRegHandlerUV(GIFReg* r);
+	void GIFRegHandlerXYZF2(GIFReg* r);
+	void GIFRegHandlerXYZ2(GIFReg* r);
+	template<int i> void GIFRegHandlerTEX0(GIFReg* r);
+	template<int i> void GIFRegHandlerCLAMP(GIFReg* r);
+	void GIFRegHandlerFOG(GIFReg* r);
+	void GIFRegHandlerXYZF3(GIFReg* r);
+	void GIFRegHandlerXYZ3(GIFReg* r);
+	void GIFRegHandlerNOP(GIFReg* r);
+	template<int i> void GIFRegHandlerTEX1(GIFReg* r);
+	template<int i> void GIFRegHandlerTEX2(GIFReg* r);
+	template<int i> void GIFRegHandlerXYOFFSET(GIFReg* r);
+	void GIFRegHandlerPRMODECONT(GIFReg* r);
+	void GIFRegHandlerPRMODE(GIFReg* r);
+	void GIFRegHandlerTEXCLUT(GIFReg* r);
+	void GIFRegHandlerSCANMSK(GIFReg* r);
+	template<int i> void GIFRegHandlerMIPTBP1(GIFReg* r);
+	template<int i> void GIFRegHandlerMIPTBP2(GIFReg* r);
+	void GIFRegHandlerTEXA(GIFReg* r);
+	void GIFRegHandlerFOGCOL(GIFReg* r);
+	void GIFRegHandlerTEXFLUSH(GIFReg* r);
+	template<int i> void GIFRegHandlerSCISSOR(GIFReg* r);
+	template<int i> void GIFRegHandlerALPHA(GIFReg* r);
+	void GIFRegHandlerDIMX(GIFReg* r);
+	void GIFRegHandlerDTHE(GIFReg* r);
+	void GIFRegHandlerCOLCLAMP(GIFReg* r);
+	template<int i> void GIFRegHandlerTEST(GIFReg* r);
+	void GIFRegHandlerPABE(GIFReg* r);
+	template<int i> void GIFRegHandlerFBA(GIFReg* r);
+	template<int i> void GIFRegHandlerFRAME(GIFReg* r);
+	template<int i> void GIFRegHandlerZBUF(GIFReg* r);
+	void GIFRegHandlerBITBLTBUF(GIFReg* r);
+	void GIFRegHandlerTRXPOS(GIFReg* r);
+	void GIFRegHandlerTRXREG(GIFReg* r);
+	void GIFRegHandlerTRXDIR(GIFReg* r);
+	void GIFRegHandlerHWREG(GIFReg* r);
+	void GIFRegHandlerSIGNAL(GIFReg* r);
+	void GIFRegHandlerFINISH(GIFReg* r);
+	void GIFRegHandlerLABEL(GIFReg* r);
+
+	int m_version;
+	int m_vmsize;
+	int m_sssize;
+
+	bool m_mt;
+	void (*m_irq)();
+	bool m_path3hack;
+
+	int m_x, m_y;
+	int m_bytes;
+	int m_maxbytes;
+	BYTE* m_buff;
+
+	void FlushWrite();
+	void FlushWrite(BYTE* mem, int len);
+	void StepTransfer(int sx, int ex) {if(++m_x == ex) {m_x = sx; m_y++;}}
+
+public:
+	GIFRegPRIM*		PRIM;
+	GSRegPMODE*		PMODE;
+	GSRegSMODE1*	SMODE1;
+	GSRegSMODE2*	SMODE2;
+	GSRegDISPFB*	DISPFB[2];
+	GSRegDISPLAY*	DISPLAY[2];
+	GSRegEXTBUF*	EXTBUF;
+	GSRegEXTDATA*	EXTDATA;
+	GSRegEXTWRITE*	EXTWRITE;
+	GSRegBGCOLOR*	BGCOLOR;
+	GSRegCSR*		CSR;
+	GSRegIMR*		IMR;
+	GSRegBUSDIR*	BUSDIR;
+	GSRegSIGLBLID*	SIGLBLID;
+
+	GIFPath m_path[3];
+	GSLocalMemory m_mem;
+	GSDrawingEnvironment m_env;
+	GSDrawingContext* m_context;
+	GSVertex m_v;
+	float m_q;
+
+	bool m_nloophack;
+
+public:
+	GSState(BYTE* base, bool mt, void (*irq)(), bool nloophack);
+	virtual ~GSState();
+
+	void ResetHandlers();
+
+	CPoint GetDisplayPos(int i);
+	CSize GetDisplaySize(int i);
+	CRect GetDisplayRect(int i);
+	CSize GetDisplayPos();
+	CSize GetDisplaySize();
+	CRect GetDisplayRect();
+	CPoint GetFramePos(int i);
+	CSize GetFrameSize(int i);
+	CRect GetFrameRect(int i);
+	CSize GetFramePos();
+	CSize GetFrameSize();
+	CRect GetFrameRect();
+	bool IsEnabled(int i);
+	int GetFPS();
+
+	virtual void Reset();
+	virtual void Flush();
+	virtual void FlushPrim() = 0;
+	virtual void ResetPrim() = 0;
+	virtual void VertexKick(bool skip) = 0;
+	virtual void InvalidateTexture(const GIFRegBITBLTBUF& BITBLTBUF, CRect r) {}
+	virtual void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, CRect r) {}
+
+	void Move();
+	void Write(BYTE* mem, int len);
+	void Read(BYTE* mem, int len);
+
+	void WriteCSR(UINT32 csr);
+	void ReadFIFO(BYTE* mem, int size);
+	void Transfer(BYTE* mem, int size, int index);
+	void GetLastTag(UINT32* tag) {*tag = m_path3hack; m_path3hack = 0;}
+	int Freeze(freezeData* fd, bool sizeonly);
+	int Defrost(const freezeData* fd);
+};
+
diff --git a/gsdx/GSTables.cpp b/gsdx/GSTables.cpp
new file mode 100644
index 0000000..a73cbec
--- /dev/null
+++ b/gsdx/GSTables.cpp
@@ -0,0 +1,236 @@
+/* 
+ *	Copyright (C) 2007 Gabest
+ *	http://www.gabest.org
+ *
+ *  This Program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *   
+ *  This Program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *   
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
+ *  http://www.gnu.org/copyleft/gpl.html
+ *
+ */
+
+#include "StdAfx.h"
+#include "GSTables.h"
+
+const DWORD blockTable32[4][8] = {
+	{  0,  1,  4,  5, 16, 17, 20, 21},
+	{  2,  3,  6,  7, 18, 19, 22, 23},
+	{  8,  9, 12, 13, 24, 25, 28, 29},
+	{ 10, 11, 14, 15, 26, 27, 30, 31}
+};
+
+const DWORD blockTable32Z[4][8] = {
+	{ 24, 25, 28, 29,  8,  9, 12, 13},
+	{ 26, 27, 30, 31, 10, 11, 14, 15},
+	{ 16, 17, 20, 21,  0,  1,  4,  5},
+	{ 18, 19, 22, 23,  2,  3,  6,  7}
+};
+
+const DWORD blockTable16[8][4] = {
+	{  0,  2,  8, 10 },
+	{  1,  3,  9, 11 },
+	{  4,  6, 12, 14 },
+	{  5,  7, 13, 15 },
+	{ 16, 18, 24, 26 },
+	{ 17, 19, 25, 27 },
+	{ 20, 22, 28, 30 },
+	{ 21, 23, 29, 31 }
+};
+
+const DWORD blockTable16S[8][4] = {
+	{  0,  2, 16, 18 },
+	{  1,  3, 17, 19 },
+	{  8, 10, 24, 26 },
+	{  9, 11, 25, 27 },
+	{  4,  6, 20, 22 },
+	{  5,  7, 21, 23 },
+	{ 12, 14, 28, 30 },
+	{ 13, 15, 29, 31 }
+};
+
+const DWORD blockTable16Z[8][4] = {
+	{ 24, 26, 16, 18 },
+	{ 25, 27, 17, 19 },
+	{ 28, 30, 20, 22 },
+	{ 29, 31, 21, 23 },
+	{  8, 10,  0,  2 },
+	{  9, 11,  1,  3 },
+	{ 12, 14,  4,  6 },
+	{ 13, 15,  5,  7 }
+};
+
+const DWORD blockTable16SZ[8][4] = {
+	{ 24, 26,  8, 10 },
+	{ 25, 27,  9, 11 },
+	{ 16, 18,  0,  2 },
+	{ 17, 19,  1,  3 },
+	{ 28, 30, 12, 14 },
+	{ 29, 31, 13, 15 },
+	{ 20, 22,  4,  6 },
+	{ 21, 23,  5,  7 }
+};
+
+const DWORD blockTable8[4][8] = {
+	{  0,  1,  4,  5, 16, 17, 20, 21},
+	{  2,  3,  6,  7, 18, 19, 22, 23},
+	{  8,  9, 12, 13, 24, 25, 28, 29},
+	{ 10, 11, 14, 15, 26, 27, 30, 31}
+};
+
+const DWORD blockTable4[8][4] = {
+	{  0,  2,  8, 10 },
+	{  1,  3,  9, 11 },
+	{  4,  6, 12, 14 },
+	{  5,  7, 13, 15 },
+	{ 16, 18, 24, 26 },
+	{ 17, 19, 25, 27 },
+	{ 20, 22, 28, 30 },
+	{ 21, 23, 29, 31 }
+};
+
+const DWORD columnTable32[8][8] = {
+	{  0,  1,  4,  5,  8,  9, 12, 13 },
+	{  2,  3,  6,  7, 10, 11, 14, 15 },
+	{ 16, 17, 20, 21, 24, 25, 28, 29 },
+	{ 18, 19, 22, 23, 26, 27, 30, 31 },
+	{ 32, 33, 36, 37, 40, 41, 44, 45 },
+	{ 34, 35, 38, 39, 42, 43, 46, 47 },
+	{ 48, 49, 52, 53, 56, 57, 60, 61 },
+	{ 50, 51, 54, 55, 58, 59, 62, 63 },
+};
+
+const DWORD columnTable16[8][16] = {
+	{   0,   2,   8,  10,  16,  18,  24,  26, 
+	    1,   3,   9,  11,  17,  19,  25,  27 },
+	{   4,   6,  12,  14,  20,  22,  28,  30, 
+	    5,   7,  13,  15,  21,  23,  29,  31 },
+	{  32,  34,  40,  42,  48,  50,  56,  58,
+	   33,  35,  41,  43,  49,  51,  57,  59 },
+	{  36,  38,  44,  46,  52,  54,  60,  62,
+	   37,  39,  45,  47,  53,  55,  61,  63 },
+	{  64,  66,  72,  74,  80,  82,  88,  90,
+	   65,  67,  73,  75,  81,  83,  89,  91 },
+	{  68,  70,  76,  78,  84,  86,  92,  94,
+	   69,  71,  77,  79,  85,  87,  93,  95 },
+	{  96,  98, 104, 106, 112, 114, 120, 122,
+	   97,  99, 105, 107, 113, 115, 121, 123 },
+	{ 100, 102, 108, 110, 116, 118, 124, 126,
+	  101, 103, 109, 111, 117, 119, 125, 127 },
+};
+
+const DWORD columnTable8[16][16] = {
+	{   0,   4,  16,  20,  32,  36,  48,  52,	// column 0
+	    2,   6,  18,  22,  34,  38,  50,  54 },
+	{   8,  12,  24,  28,  40,  44,  56,  60,
+	   10,  14,  26,  30,  42,  46,  58,  62 },
+	{  33,  37,  49,  53,   1,   5,  17,  21,
+	   35,  39,  51,  55,   3,   7,  19,  23 },
+	{  41,  45,  57,  61,   9,  13,  25,  29,
+	   43,  47,  59,  63,  11,  15,  27,  31 },
+	{  96, 100, 112, 116,  64,  68,  80,  84, 	// column 1
+	   98, 102, 114, 118,  66,  70,  82,  86 },
+	{ 104, 108, 120, 124,  72,  76,  88,  92, 
+	  106, 110, 122, 126,  74,  78,  90,  94 },
+	{  65,  69,  81,  85,  97, 101, 113, 117,
+	   67,  71,  83,  87,  99, 103, 115, 119 },
+	{  73,  77,  89,  93, 105, 109, 121, 125,
+	   75,  79,  91,  95, 107, 111, 123, 127 },
+	{ 128, 132, 144, 148, 160, 164, 176, 180,	// column 2
+	  130, 134, 146, 150, 162, 166, 178, 182 },
+	{ 136, 140, 152, 156, 168, 172, 184, 188,
+	  138, 142, 154, 158, 170, 174, 186, 190 },
+	{ 161, 165, 177, 181, 129, 133, 145, 149,
+	  163, 167, 179, 183, 131, 135, 147, 151 },
+	{ 169, 173, 185, 189, 137, 141, 153, 157,
+	  171, 175, 187, 191, 139, 143, 155, 159 },
+	{ 224, 228, 240, 244, 192, 196, 208, 212,	// column 3
+	  226, 230, 242, 246, 194, 198, 210, 214 },
+	{ 232, 236, 248, 252, 200, 204, 216, 220,
+	  234, 238, 250, 254, 202, 206, 218, 222 },
+	{ 193, 197, 209, 213, 225, 229, 241, 245,
+	  195, 199, 211, 215, 227, 231, 243, 247 },
+	{ 201, 205, 217, 221, 233, 237, 249, 253,
+	  203, 207, 219, 223, 235, 239, 251, 255 },
+};
+
+const DWORD columnTable4[16][32] = {
+	{   0,   8,  32,  40,  64,  72,  96, 104,	// column 0
+	    2,  10,  34,  42,  66,  74,  98, 106,
+	    4,  12,  36,  44,  68,  76, 100, 108,
+	    6,  14,  38,  46,  70,  78, 102, 110 },
+	{  16,  24,  48,  56,  80,  88, 112, 120,
+	   18,  26,  50,  58,  82,  90, 114, 122,
+	   20,  28,  52,  60,  84,  92, 116, 124,
+	   22,  30,  54,  62,  86,  94, 118, 126 },
+	{  65,  73,  97, 105,   1,   9,  33,  41,
+	   67,  75,  99, 107,   3,  11,  35,  43,
+	   69,  77, 101, 109,   5,  13,  37,  45, 
+	   71,  79, 103, 111,   7,  15,  39,  47 },
+	{  81,  89, 113, 121,  17,  25,  49,  57,
+	   83,  91, 115, 123,  19,  27,  51,  59,
+	   85,  93, 117, 125,  21,  29,  53,  61,
+	   87,  95, 119, 127,  23,  31,  55,  63 },
+	{ 192, 200, 224, 232, 128, 136, 160, 168,	// column 1
+	  194, 202, 226, 234, 130, 138, 162, 170,
+	  196, 204, 228, 236, 132, 140, 164, 172,
+	  198, 206, 230, 238, 134, 142, 166, 174 },
+	{ 208, 216, 240, 248, 144, 152, 176, 184,
+	  210, 218, 242, 250, 146, 154, 178, 186,
+	  212, 220, 244, 252, 148, 156, 180, 188,
+	  214, 222, 246, 254, 150, 158, 182, 190 },
+	{ 129, 137, 161, 169, 193, 201, 225, 233,
+	  131, 139, 163, 171, 195, 203, 227, 235,
+	  133, 141, 165, 173, 197, 205, 229, 237, 
+	  135, 143, 167, 175, 199, 207, 231, 239 },
+	{ 145, 153, 177, 185, 209, 217, 241, 249,
+	  147, 155, 179, 187, 211, 219, 243, 251,
+	  149, 157, 181, 189, 213, 221, 245, 253,
+	  151, 159, 183, 191, 215, 223, 247, 255 },
+	{ 256, 264, 288, 296, 320, 328, 352, 360,	// column 2
+	  258, 266, 290, 298, 322, 330, 354, 362,
+	  260, 268, 292, 300, 324, 332, 356, 364,
+	  262, 270, 294, 302, 326, 334, 358, 366 },
+	{ 272, 280, 304, 312, 336, 344, 368, 376,
+	  274, 282, 306, 314, 338, 346, 370, 378,
+	  276, 284, 308, 316, 340, 348, 372, 380,
+	  278, 286, 310, 318, 342, 350, 374, 382 },
+	{ 321, 329, 353, 361, 257, 265, 289, 297,
+	  323, 331, 355, 363, 259, 267, 291, 299,
+	  325, 333, 357, 365, 261, 269, 293, 301, 
+	  327, 335, 359, 367, 263, 271, 295, 303 },
+	{ 337, 345, 369, 377, 273, 281, 305, 313,
+	  339, 347, 371, 379, 275, 283, 307, 315,
+	  341, 349, 373, 381, 277, 285, 309, 317,
+	  343, 351, 375, 383, 279, 287, 311, 319 },
+	{ 448, 456, 480, 488, 384, 392, 416, 424,	// column 3
+	  450, 458, 482, 490, 386, 394, 418, 426,
+	  452, 460, 484, 492, 388, 396, 420, 428,
+	  454, 462, 486, 494, 390, 398, 422, 430 },
+	{ 464, 472, 496, 504, 400, 408, 432, 440,
+	  466, 474, 498, 506, 402, 410, 434, 442,
+	  468, 476, 500, 508, 404, 412, 436, 444,
+	  470, 478, 502, 510, 406, 414, 438, 446 },
+	{ 385, 393, 417, 425, 449, 457, 481, 489,
+	  387, 395, 419, 427, 451, 459, 483, 491,
+	  389, 397, 421, 429, 453, 461, 485, 493, 
+	  391, 399, 423, 431, 455, 463, 487, 495 },
+	{ 401, 409, 433, 441, 465, 473, 497, 505,
+	  403, 411, 435, 443, 467, 475, 499, 507,
+	  405, 413, 437, 445, 469, 477, 501, 509,
+	  407, 415, 439, 447, 471, 479, 503, 511 },
+};
+
+const int primVertexCount[8] = 
+{
+	1, 2, 2, 3, 3, 3, 2, 1
+};
\ No newline at end of file
diff --git a/gsdx/GSTables.h b/gsdx/GSTables.h
new file mode 100644
index 0000000..45f323e
--- /dev/null
+++ b/gsdx/GSTables.h
@@ -0,0 +1,36 @@
+/* 
+ *	Copyright (C) 2007 Gabest
+ *	http://www.gabest.org
+ *
+ *  This Program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *   
+ *  This Program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *   
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
+ *  http://www.gnu.org/copyleft/gpl.html
+ *
+ */
+
+#pragma once
+
+extern const DWORD blockTable32[4][8];
+extern const DWORD blockTable32Z[4][8];
+extern const DWORD blockTable16[8][4];
+extern const DWORD blockTable16S[8][4];
+extern const DWORD blockTable16Z[8][4];
+extern const DWORD blockTable16SZ[8][4];
+extern const DWORD blockTable8[4][8];
+extern const DWORD blockTable4[8][4];
+extern const DWORD columnTable32[8][8];
+extern const DWORD columnTable16[8][16];
+extern const DWORD columnTable8[16][16];
+extern const DWORD columnTable4[16][32];
+extern const int primVertexCount[8];
diff --git a/gsdx/GSUtil.cpp b/gsdx/GSUtil.cpp
new file mode 100644
index 0000000..90c0017
--- /dev/null
+++ b/gsdx/GSUtil.cpp
@@ -0,0 +1,95 @@
+/* 
+ *	Copyright (C) 2007 Gabest
+ *	http://www.gabest.org
+ *
+ *  This Program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *   
+ *  This Program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *   
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
+ *  http://www.gnu.org/copyleft/gpl.html
+ *
+ */
+
+#include "stdafx.h"
+#include "GS.h"
+
+bool HasSharedBits(DWORD spsm, DWORD dpsm)
+{
+	switch(spsm)
+	{
+	case PSM_PSMCT32:
+	case PSM_PSMCT16:
+	case PSM_PSMCT16S:
+	case PSM_PSMT8:
+	case PSM_PSMT4:
+	case PSM_PSMZ32:
+	case PSM_PSMZ16:
+	case PSM_PSMZ16S:
+		return true;
+	case PSM_PSMCT24:
+	case PSM_PSMZ24:
+		return !(dpsm == PSM_PSMT8H || dpsm == PSM_PSMT4HL || dpsm == PSM_PSMT4HH);
+	case PSM_PSMT8H:
+		return !(dpsm == PSM_PSMCT24 || dpsm == PSM_PSMZ24);
+	case PSM_PSMT4HL:
+		return !(dpsm == PSM_PSMCT24 || dpsm == PSM_PSMZ24 || dpsm == PSM_PSMT4HH);
+	case PSM_PSMT4HH:
+		return !(dpsm == PSM_PSMCT24 || dpsm == PSM_PSMZ24 || dpsm == PSM_PSMT4HL);
+	}
+
+	return true;
+}
+
+bool HasSharedBits(DWORD sbp, DWORD spsm, DWORD dbp, DWORD dpsm)
+{
+	if(sbp != dbp) return false;
+
+	return HasSharedBits(spsm, dpsm);
+}
+
+bool HasCompatibleBits(DWORD spsm, DWORD dpsm)
+{
+	if(spsm == dpsm) return true;
+
+	switch(spsm)
+	{
+	case PSM_PSMCT32:
+	case PSM_PSMCT24:
+		return dpsm == PSM_PSMCT32 || dpsm == PSM_PSMCT24;
+	case PSM_PSMCT16:
+	case PSM_PSMCT16S:
+		return dpsm == PSM_PSMCT16 || dpsm == PSM_PSMCT16S;
+	case PSM_PSMZ32:
+	case PSM_PSMZ24:
+		return dpsm == PSM_PSMZ32 || dpsm == PSM_PSMZ24;
+	case PSM_PSMZ16:
+	case PSM_PSMZ16S:
+		return dpsm == PSM_PSMZ16 || dpsm == PSM_PSMZ16S;
+	}
+
+	return false;
+}
+
+bool IsRectInRect(const CRect& inner, const CRect& outer)
+{
+	return outer.left <= inner.left && inner.right <= outer.right && outer.top <= inner.top && inner.bottom <= outer.bottom;
+}
+
+bool IsRectInRectH(const CRect& inner, const CRect& outer)
+{
+	return outer.top <= inner.top && inner.bottom <= outer.bottom;
+}
+
+bool IsRectInRectV(const CRect& inner, const CRect& outer)
+{
+	return outer.left <= inner.left && inner.right <= outer.right;
+}
diff --git a/gsdx/GSUtil.h b/gsdx/GSUtil.h
new file mode 100644
index 0000000..70bb983
--- /dev/null
+++ b/gsdx/GSUtil.h
@@ -0,0 +1,29 @@
+/* 
+ *	Copyright (C) 2007 Gabest
+ *	http://www.gabest.org
+ *
+ *  This Program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *   
+ *  This Program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *   
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
+ *  http://www.gnu.org/copyleft/gpl.html
+ *
+ */
+
+#pragma once
+
+extern bool HasSharedBits(DWORD spsm, DWORD dpsm);
+extern bool HasSharedBits(DWORD sbp, DWORD spsm, DWORD dbp, DWORD dpsm);
+extern bool HasCompatibleBits(DWORD spsm, DWORD dpsm);
+extern bool IsRectInRect(const CRect& inner, const CRect& outer);
+extern bool IsRectInRectH(const CRect& inner, const CRect& outer);
+extern bool IsRectInRectV(const CRect& inner, const CRect& outer);
diff --git a/gsdx/GSVertex.h b/gsdx/GSVertex.h
new file mode 100644
index 0000000..36c33c9
--- /dev/null
+++ b/gsdx/GSVertex.h
@@ -0,0 +1,39 @@
+/* 
+ *	Copyright (C) 2007 Gabest
+ *	http://www.gabest.org
+ *
+ *  This Program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *   
+ *  This Program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *   
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
+ *  http://www.gnu.org/copyleft/gpl.html
+ *
+ */
+
+#pragma once
+
+#include "GS.h"
+
+#pragma pack(push, 1)
+
+struct GSVertex
+{
+	GIFRegRGBAQ		RGBAQ;
+	GIFRegST		ST;
+	GIFRegUV		UV;
+	GIFRegXYZ		XYZ;
+	GIFRegFOG		FOG;
+
+	GSVertex() {memset(this, 0, sizeof(*this));}
+};
+
+#pragma pack(pop)
diff --git a/gsdx/GSVertexList.cpp b/gsdx/GSVertexList.cpp
new file mode 100644
index 0000000..d39b008
--- /dev/null
+++ b/gsdx/GSVertexList.cpp
@@ -0,0 +1,23 @@
+/* 
+ *	Copyright (C) 2007 Gabest
+ *	http://www.gabest.org
+ *
+ *  This Program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *   
+ *  This Program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *   
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
+ *  http://www.gnu.org/copyleft/gpl.html
+ *
+ */
+
+#include "StdAfx.h"
+#include "GSVertexList.h"
\ No newline at end of file
diff --git a/gsdx/GSVertexList.h b/gsdx/GSVertexList.h
new file mode 100644
index 0000000..12abd38
--- /dev/null
+++ b/gsdx/GSVertexList.h
@@ -0,0 +1,89 @@
+/* 
+ *	Copyright (C) 2007 Gabest
+ *	http://www.gabest.org
+ *
+ *  This Program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *   
+ *  This Program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *   
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
+ *  http://www.gnu.org/copyleft/gpl.html
+ *
+ */
+
+#pragma once
+
+template <class Vertex> class GSVertexList
+{
+	Vertex* m_v;
+	int m_head;
+	int m_tail;
+	int m_count;
+
+public:
+	GSVertexList()
+	{
+		m_v = (Vertex*)_aligned_malloc(sizeof(Vertex)*4, 16);
+
+		RemoveAll();
+	}
+
+	virtual ~GSVertexList()
+	{
+		_aligned_free(m_v);
+	}
+
+	void RemoveAll()
+	{
+		m_head = m_tail = m_count = 0;
+	}
+
+	Vertex& AddTail()
+	{
+		ASSERT(m_count < 4);
+
+		Vertex& v = m_v[m_tail];
+		m_tail = (m_tail+1)&3;
+		m_count++;
+		return v;
+	}
+
+	void AddTail(Vertex& v)
+	{
+		ASSERT(m_count < 4);
+
+		m_v[m_tail] = v;
+		m_tail = (m_tail+1)&3;
+		m_count++;
+	}
+
+	void RemoveAt(int i, Vertex& v)
+	{
+		GetAt(i, v);
+
+		i = (m_head+i)&3;
+		if(i == m_head) m_head = (m_head+1)&3;
+		else for(m_tail = (m_tail+4-1)&3; i != m_tail; i = (i+1)&3) m_v[i] = m_v[(i+1)&3];
+		m_count--;
+	}
+
+	void GetAt(int i, Vertex& v)
+	{
+		ASSERT(m_count > 0); 
+
+		v = m_v[(m_head+i)&3];
+	}
+
+	int GetCount()
+	{
+		return m_count;
+	}
+};
diff --git a/gsdx/GSdx_vs2005.vcproj b/gsdx/GSdx_vs2005.vcproj
new file mode 100644
index 0000000..ac5742e
--- /dev/null
+++ b/gsdx/GSdx_vs2005.vcproj
@@ -0,0 +1,788 @@
+<?xml version="1.0" encoding="windows-1250"?>
+<VisualStudioProject
+	ProjectType="Visual C++"
+	Version="8.00"
+	Name="GSdx"
+	ProjectGUID="{18E42F6F-3A62-41EE-B42F-79366C4F1E95}"
+	RootNamespace="GSdx"
+	Keyword="Win32Proj"
+	TargetFrameworkVersion="196613"
+	>
+	<Platforms>
+		<Platform
+			Name="Win32"
+		/>
+		<Platform
+			Name="x64"
+		/>
+	</Platforms>
+	<ToolFiles>
+	</ToolFiles>
+	<Configurations>
+		<Configuration
+			Name="Debug|Win32"
+			ConfigurationType="4"
+			InheritedPropertySheets="..\common.vsprops;..\debug.vsprops"
+			UseOfMFC="1"
+			CharacterSet="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				PreprocessorDefinitions="_LIB"
+				UsePrecompiledHeader="2"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLibrarianTool"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Debug|x64"
+			ConfigurationType="4"
+			InheritedPropertySheets="..\common.vsprops;..\debug.vsprops"
+			UseOfMFC="1"
+			CharacterSet="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+				TargetEnvironment="3"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				PreprocessorDefinitions="_LIB"
+				UsePrecompiledHeader="2"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLibrarianTool"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Release|Win32"
+			ConfigurationType="4"
+			InheritedPropertySheets="..\common.vsprops;..\release.vsprops"
+			UseOfMFC="1"
+			CharacterSet="1"
+			WholeProgramOptimization="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				PreprocessorDefinitions="_LIB"
+				UsePrecompiledHeader="2"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLibrarianTool"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Release|x64"
+			ConfigurationType="4"
+			InheritedPropertySheets="..\common.vsprops;..\release.vsprops"
+			UseOfMFC="1"
+			CharacterSet="1"
+			WholeProgramOptimization="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+				TargetEnvironment="3"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				PreprocessorDefinitions="_LIB"
+				UsePrecompiledHeader="2"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLibrarianTool"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Debug SSE2|Win32"
+			ConfigurationType="4"
+			InheritedPropertySheets="..\common.vsprops;..\debug.vsprops;..\sse2.vsprops"
+			UseOfMFC="1"
+			CharacterSet="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				PreprocessorDefinitions="_LIB"
+				UsePrecompiledHeader="2"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLibrarianTool"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Debug SSE2|x64"
+			ConfigurationType="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Release SSE2|Win32"
+			ConfigurationType="4"
+			InheritedPropertySheets="..\common.vsprops;..\release.vsprops;..\sse2.vsprops"
+			UseOfMFC="1"
+			CharacterSet="1"
+			WholeProgramOptimization="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				PreprocessorDefinitions="_LIB"
+				UsePrecompiledHeader="2"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLibrarianTool"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Release SSE2|x64"
+			ConfigurationType="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+	</Configurations>
+	<References>
+	</References>
+	<Files>
+		<Filter
+			Name="Source Files"
+			Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
+			UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
+			>
+			<File
+				RelativePath=".\GSDirtyRect.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\GSLocalMemory.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\GSPerfMon.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\GSState.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\GSTables.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\GSUtil.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\GSVertexList.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\stdafx.cpp"
+				>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						UsePrecompiledHeader="1"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						UsePrecompiledHeader="1"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						UsePrecompiledHeader="1"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						UsePrecompiledHeader="1"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug SSE2|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						UsePrecompiledHeader="1"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release SSE2|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						UsePrecompiledHeader="1"
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath=".\x86-32.asm"
+				>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="ml /nologo /c /Zi /Fo&quot;$(IntDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;"
+						Outputs="$(IntDir)\$(InputName).obj"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="ml /nologo /c /Zi /Fo&quot;$(IntDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;"
+						Outputs="$(IntDir)\$(InputName).obj"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="ml /nologo /c /Zi /Fo&quot;$(IntDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;"
+						Outputs="$(IntDir)\$(InputName).obj"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="ml /nologo /c /Zi /Fo&quot;$(IntDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;"
+						Outputs="$(IntDir)\$(InputName).obj"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug SSE2|Win32"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="ml /nologo /c /Zi /Fo&quot;$(IntDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;"
+						Outputs="$(IntDir)\$(InputName).obj"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug SSE2|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="ml /nologo /c /Zi /Fo&quot;$(IntDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;"
+						Outputs="$(IntDir)\$(InputName).obj"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release SSE2|Win32"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="ml /nologo /c /Zi /Fo&quot;$(IntDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;"
+						Outputs="$(IntDir)\$(InputName).obj"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release SSE2|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="ml /nologo /c /Zi /Fo&quot;$(IntDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;"
+						Outputs="$(IntDir)\$(InputName).obj"
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath=".\x86-64.asm"
+				>
+				<FileConfiguration
+					Name="Debug|Win32"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="ml /nologo /c /Zi /Fo&quot;$(IntDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;"
+						Outputs="$(IntDir)\$(InputName).obj"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|Win32"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="ml /nologo /c /Zi /Fo&quot;$(IntDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;"
+						Outputs="$(IntDir)\$(InputName).obj"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug SSE2|Win32"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="ml /nologo /c /Zi /Fo&quot;$(IntDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;"
+						Outputs="$(IntDir)\$(InputName).obj"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug SSE2|x64"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="ml /nologo /c /Zi /Fo&quot;$(IntDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;"
+						Outputs="$(IntDir)\$(InputName).obj"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release SSE2|Win32"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release SSE2|x64"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="ml /nologo /c /Zi /Fo&quot;$(IntDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;"
+						Outputs="$(IntDir)\$(InputName).obj"
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath=".\x86.cpp"
+				>
+			</File>
+		</Filter>
+		<Filter
+			Name="Header Files"
+			Filter="h;hpp;hxx;hm;inl;inc;xsd"
+			UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
+			>
+			<File
+				RelativePath=".\GS.h"
+				>
+			</File>
+			<File
+				RelativePath=".\GSDirtyRect.h"
+				>
+			</File>
+			<File
+				RelativePath=".\GSDrawingContext.h"
+				>
+			</File>
+			<File
+				RelativePath=".\GSDrawingEnvironment.h"
+				>
+			</File>
+			<File
+				RelativePath=".\GSLocalMemory.h"
+				>
+			</File>
+			<File
+				RelativePath=".\GSPerfMon.h"
+				>
+			</File>
+			<File
+				RelativePath=".\GSScale.h"
+				>
+			</File>
+			<File
+				RelativePath=".\GSState.h"
+				>
+			</File>
+			<File
+				RelativePath=".\GSTables.h"
+				>
+			</File>
+			<File
+				RelativePath=".\GSUtil.h"
+				>
+			</File>
+			<File
+				RelativePath=".\GSVertex.h"
+				>
+			</File>
+			<File
+				RelativePath=".\GSVertexList.h"
+				>
+			</File>
+			<File
+				RelativePath=".\stdafx.h"
+				>
+			</File>
+			<File
+				RelativePath=".\x86.h"
+				>
+			</File>
+		</Filter>
+		<Filter
+			Name="Resource Files"
+			Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
+			UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
+			>
+		</Filter>
+	</Files>
+	<Globals>
+	</Globals>
+</VisualStudioProject>
diff --git a/gsdx/GSdx_vs2008.vcproj b/gsdx/GSdx_vs2008.vcproj
new file mode 100644
index 0000000..d4f692a
--- /dev/null
+++ b/gsdx/GSdx_vs2008.vcproj
@@ -0,0 +1,788 @@
+<?xml version="1.0" encoding="windows-1250"?>
+<VisualStudioProject
+	ProjectType="Visual C++"
+	Version="9.00"
+	Name="GSdx"
+	ProjectGUID="{18E42F6F-3A62-41EE-B42F-79366C4F1E95}"
+	RootNamespace="GSdx"
+	Keyword="Win32Proj"
+	TargetFrameworkVersion="196613"
+	>
+	<Platforms>
+		<Platform
+			Name="Win32"
+		/>
+		<Platform
+			Name="x64"
+		/>
+	</Platforms>
+	<ToolFiles>
+	</ToolFiles>
+	<Configurations>
+		<Configuration
+			Name="Debug|Win32"
+			ConfigurationType="4"
+			InheritedPropertySheets="..\common.vsprops;..\debug.vsprops"
+			UseOfMFC="1"
+			CharacterSet="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				PreprocessorDefinitions="_LIB"
+				UsePrecompiledHeader="2"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLibrarianTool"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Debug|x64"
+			ConfigurationType="4"
+			InheritedPropertySheets="..\common.vsprops;..\debug.vsprops"
+			UseOfMFC="1"
+			CharacterSet="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+				TargetEnvironment="3"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				PreprocessorDefinitions="_LIB"
+				UsePrecompiledHeader="2"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLibrarianTool"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Release|Win32"
+			ConfigurationType="4"
+			InheritedPropertySheets="..\common.vsprops;..\release.vsprops"
+			UseOfMFC="1"
+			CharacterSet="1"
+			WholeProgramOptimization="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				PreprocessorDefinitions="_LIB"
+				UsePrecompiledHeader="2"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLibrarianTool"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Release|x64"
+			ConfigurationType="4"
+			InheritedPropertySheets="..\common.vsprops;..\release.vsprops"
+			UseOfMFC="1"
+			CharacterSet="1"
+			WholeProgramOptimization="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+				TargetEnvironment="3"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				PreprocessorDefinitions="_LIB"
+				UsePrecompiledHeader="2"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLibrarianTool"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Debug SSE2|Win32"
+			ConfigurationType="4"
+			InheritedPropertySheets="..\common.vsprops;..\debug.vsprops;..\sse2.vsprops"
+			UseOfMFC="1"
+			CharacterSet="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				PreprocessorDefinitions="_LIB"
+				UsePrecompiledHeader="2"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLibrarianTool"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Debug SSE2|x64"
+			ConfigurationType="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Release SSE2|Win32"
+			ConfigurationType="4"
+			InheritedPropertySheets="..\common.vsprops;..\release.vsprops;..\sse2.vsprops"
+			UseOfMFC="1"
+			CharacterSet="1"
+			WholeProgramOptimization="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				PreprocessorDefinitions="_LIB"
+				UsePrecompiledHeader="2"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLibrarianTool"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Release SSE2|x64"
+			ConfigurationType="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+	</Configurations>
+	<References>
+	</References>
+	<Files>
+		<Filter
+			Name="Source Files"
+			Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
+			UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
+			>
+			<File
+				RelativePath=".\GSDirtyRect.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\GSLocalMemory.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\GSPerfMon.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\GSState.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\GSTables.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\GSUtil.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\GSVertexList.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\stdafx.cpp"
+				>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						UsePrecompiledHeader="1"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						UsePrecompiledHeader="1"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						UsePrecompiledHeader="1"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						UsePrecompiledHeader="1"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug SSE2|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						UsePrecompiledHeader="1"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release SSE2|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						UsePrecompiledHeader="1"
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath=".\x86-32.asm"
+				>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="ml /nologo /c /Zi /Fo&quot;$(IntDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;"
+						Outputs="$(IntDir)\$(InputName).obj"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="ml /nologo /c /Zi /Fo&quot;$(IntDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;"
+						Outputs="$(IntDir)\$(InputName).obj"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="ml /nologo /c /Zi /Fo&quot;$(IntDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;"
+						Outputs="$(IntDir)\$(InputName).obj"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="ml /nologo /c /Zi /Fo&quot;$(IntDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;"
+						Outputs="$(IntDir)\$(InputName).obj"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug SSE2|Win32"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="ml /nologo /c /Zi /Fo&quot;$(IntDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;"
+						Outputs="$(IntDir)\$(InputName).obj"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug SSE2|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="ml /nologo /c /Zi /Fo&quot;$(IntDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;"
+						Outputs="$(IntDir)\$(InputName).obj"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release SSE2|Win32"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="ml /nologo /c /Zi /Fo&quot;$(IntDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;"
+						Outputs="$(IntDir)\$(InputName).obj"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release SSE2|x64"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="ml /nologo /c /Zi /Fo&quot;$(IntDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;"
+						Outputs="$(IntDir)\$(InputName).obj"
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath=".\x86-64.asm"
+				>
+				<FileConfiguration
+					Name="Debug|Win32"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="ml /nologo /c /Zi /Fo&quot;$(IntDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;"
+						Outputs="$(IntDir)\$(InputName).obj"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|Win32"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="ml /nologo /c /Zi /Fo&quot;$(IntDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;"
+						Outputs="$(IntDir)\$(InputName).obj"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug SSE2|Win32"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="ml /nologo /c /Zi /Fo&quot;$(IntDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;"
+						Outputs="$(IntDir)\$(InputName).obj"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug SSE2|x64"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="ml /nologo /c /Zi /Fo&quot;$(IntDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;"
+						Outputs="$(IntDir)\$(InputName).obj"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release SSE2|Win32"
+					ExcludedFromBuild="true"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release SSE2|x64"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+						CommandLine="ml /nologo /c /Zi /Fo&quot;$(IntDir)\$(InputName).obj&quot; &quot;$(InputPath)&quot;"
+						Outputs="$(IntDir)\$(InputName).obj"
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath=".\x86.cpp"
+				>
+			</File>
+		</Filter>
+		<Filter
+			Name="Header Files"
+			Filter="h;hpp;hxx;hm;inl;inc;xsd"
+			UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
+			>
+			<File
+				RelativePath=".\GS.h"
+				>
+			</File>
+			<File
+				RelativePath=".\GSDirtyRect.h"
+				>
+			</File>
+			<File
+				RelativePath=".\GSDrawingContext.h"
+				>
+			</File>
+			<File
+				RelativePath=".\GSDrawingEnvironment.h"
+				>
+			</File>
+			<File
+				RelativePath=".\GSLocalMemory.h"
+				>
+			</File>
+			<File
+				RelativePath=".\GSPerfMon.h"
+				>
+			</File>
+			<File
+				RelativePath=".\GSScale.h"
+				>
+			</File>
+			<File
+				RelativePath=".\GSState.h"
+				>
+			</File>
+			<File
+				RelativePath=".\GSTables.h"
+				>
+			</File>
+			<File
+				RelativePath=".\GSUtil.h"
+				>
+			</File>
+			<File
+				RelativePath=".\GSVertex.h"
+				>
+			</File>
+			<File
+				RelativePath=".\GSVertexList.h"
+				>
+			</File>
+			<File
+				RelativePath=".\stdafx.h"
+				>
+			</File>
+			<File
+				RelativePath=".\x86.h"
+				>
+			</File>
+		</Filter>
+		<Filter
+			Name="Resource Files"
+			Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
+			UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
+			>
+		</Filter>
+	</Files>
+	<Globals>
+	</Globals>
+</VisualStudioProject>
diff --git a/gsdx/stdafx.cpp b/gsdx/stdafx.cpp
new file mode 100644
index 0000000..3832007
--- /dev/null
+++ b/gsdx/stdafx.cpp
@@ -0,0 +1,8 @@
+// stdafx.cpp : source file that includes just the standard includes
+// GSdx.pch will be the pre-compiled header
+// stdafx.obj will contain the pre-compiled type information
+
+#include "stdafx.h"
+
+// TODO: reference any additional headers you need in STDAFX.H
+// and not in this file
diff --git a/gsdx/stdafx.h b/gsdx/stdafx.h
new file mode 100644
index 0000000..899908a
--- /dev/null
+++ b/gsdx/stdafx.h
@@ -0,0 +1,54 @@
+// stdafx.h : include file for standard system include files,
+// or project specific include files that are used frequently, but
+// are changed infrequently
+//
+
+#pragma once
+
+#pragma warning(disable: 4996)
+
+#ifndef VC_EXTRALEAN
+#define VC_EXTRALEAN		// Exclude rarely-used stuff from Windows headers
+#endif
+
+// Modify the following defines if you have to target a platform prior to the ones specified below.
+// Refer to MSDN for the latest info on corresponding values for different platforms.
+#ifndef WINVER				// Allow use of features specific to Windows 95 and Windows NT 4 or later.
+#define WINVER 0x0510		// Change this to the appropriate value to target Windows 98 and Windows 2000 or later.
+#endif
+
+#ifndef _WIN32_WINNT		// Allow use of features specific to Windows NT 4 or later.
+#define _WIN32_WINNT 0x0400	// Change this to the appropriate value to target Windows 2000 or later.
+#endif						
+
+#ifndef _WIN32_WINDOWS		// Allow use of features specific to Windows 98 or later.
+#define _WIN32_WINDOWS 0x0410 // Change this to the appropriate value to target Windows Me or later.
+#endif
+
+#ifndef _WIN32_IE			// Allow use of features specific to IE 4.0 or later.
+#define _WIN32_IE 0x0400	// Change this to the appropriate value to target IE 5.0 or later.
+#endif
+
+#define _ATL_CSTRING_EXPLICIT_CONSTRUCTORS	// some CString constructors will be explicit
+
+#include <afxwin.h>         // MFC core and standard components
+//#include <afxext.h>         // MFC extensions
+//#include <afxmt.h>
+#include <atlbase.h>
+#include <atlcoll.h>
+#include <atlpath.h>
+#include <xmmintrin.h>
+#include <emmintrin.h>
+
+#define countof(a) (sizeof(a)/sizeof(a[0]))
+
+#ifndef RESTRICT
+	#ifdef __INTEL_COMPILER
+		#define RESTRICT restrict
+	#elif _MSC_VER >= 1400
+		#define RESTRICT __restrict
+	#else
+		#define RESTRICT
+	#endif
+#endif
+
diff --git a/gsdx/x86-32.asm b/gsdx/x86-32.asm
new file mode 100644
index 0000000..4e945c4
--- /dev/null
+++ b/gsdx/x86-32.asm
@@ -0,0 +1,1335 @@
+
+	.686
+	.model flat
+	.mmx
+	.xmm
+
+	.const
+
+	__uvmin DD 0d01502f9r ; -1e+010
+	__uvmax DD 0501502f9r ; +1e+010
+
+	.code
+	
+;
+; memsetd
+;
+
+@memsetd@12 proc public
+
+	push	edi
+	
+	mov		edi, ecx
+	mov		eax, edx
+	mov		ecx, [esp+4+4]
+	cld
+	rep		stosd
+	
+	pop		edi
+
+	ret		4
+	
+@memsetd@12 endp
+
+;
+; SaturateColor
+;
+			
+@SaturateColor_sse2@4 proc public
+
+	pxor		xmm0, xmm0
+	movdqa		xmm1, [ecx]
+	packssdw	xmm1, xmm0
+	packuswb	xmm1, xmm0
+	punpcklbw	xmm1, xmm0
+	punpcklwd	xmm1, xmm0
+	movdqa		[ecx], xmm1
+
+	ret
+
+@SaturateColor_sse2@4 endp
+
+@SaturateColor_asm@4 proc public
+
+	push	esi
+
+	mov		esi, ecx
+
+	xor		eax, eax
+	mov		edx, 000000ffh
+
+	mov		ecx, [esi]
+	cmp		ecx, eax
+	cmovl	ecx, eax
+	cmp		ecx, edx
+	cmovg	ecx, edx
+	mov		[esi], ecx
+
+	mov		ecx, [esi+4]
+	cmp		ecx, eax
+	cmovl	ecx, eax
+	cmp		ecx, edx
+	cmovg	ecx, edx
+	mov		[esi+4], ecx
+
+	mov		ecx, [esi+8]
+	cmp		ecx, eax
+	cmovl	ecx, eax
+	cmp		ecx, edx
+	cmovg	ecx, edx
+	mov		[esi+8], ecx
+
+	mov		ecx, [esi+12]
+	cmp		ecx, eax
+	cmovl	ecx, eax
+	cmp		ecx, edx
+	cmovg	ecx, edx
+	mov		[esi+12], ecx
+	
+	pop		esi
+	
+	ret
+	
+@SaturateColor_asm@4 endp
+
+;
+; swizzling
+;
+
+punpck macro op, sd0, sd2, s1, s3, d1, d3
+
+	movdqa					@CatStr(xmm, %d1),	@CatStr(xmm, %sd0)
+	pshufd					@CatStr(xmm, %d3),	@CatStr(xmm, %sd2), 0e4h
+	
+	@CatStr(punpckl, op)	@CatStr(xmm, %sd0),	@CatStr(xmm, %s1)
+	@CatStr(punpckh, op)	@CatStr(xmm, %d1),	@CatStr(xmm, %s1)
+	@CatStr(punpckl, op)	@CatStr(xmm, %sd2),	@CatStr(xmm, %s3)
+	@CatStr(punpckh, op)	@CatStr(xmm, %d3),	@CatStr(xmm, %s3)
+
+	endm
+	
+punpcknb macro
+
+	movdqa	xmm4, xmm0
+	pshufd	xmm5, xmm1, 0e4h
+
+	psllq	xmm1, 4
+	psrlq	xmm4, 4
+
+	movdqa	xmm6, xmm7
+	pand	xmm0, xmm7
+	pandn	xmm6, xmm1
+	por		xmm0, xmm6
+
+	movdqa	xmm6, xmm7
+	pand	xmm4, xmm7
+	pandn	xmm6, xmm5
+	por		xmm4, xmm6
+
+	movdqa	xmm1, xmm4
+
+	movdqa	xmm4, xmm2
+	pshufd	xmm5, xmm3, 0e4h
+
+	psllq	xmm3, 4
+	psrlq	xmm4, 4
+
+	movdqa	xmm6, xmm7
+	pand	xmm2, xmm7
+	pandn	xmm6, xmm3
+	por		xmm2, xmm6
+
+	movdqa	xmm6, xmm7
+	pand	xmm4, xmm7
+	pandn	xmm6, xmm5
+	por		xmm4, xmm6
+
+	movdqa	xmm3, xmm4
+
+	punpck	bw, 0, 2, 1, 3, 4, 6
+
+	endm
+
+;
+; unSwizzleBlock32
+;
+
+@unSwizzleBlock32_sse2@12 proc public
+
+	push		ebx
+
+	mov			ebx, [esp+4+4]
+	lea			eax, [ebx*2]
+	add			eax, ebx
+
+	movdqa		xmm0, [ecx+16*0]
+	movdqa		xmm1, [ecx+16*1]
+	movdqa		xmm2, [ecx+16*2]
+	movdqa		xmm3, [ecx+16*3]
+
+	punpck		qdq, 0, 2, 1, 3, 4, 6
+
+	movdqa		[edx], xmm0
+	movdqa		[edx+16], xmm2
+	movdqa		[edx+ebx], xmm4
+	movdqa		[edx+ebx+16], xmm6
+
+	movdqa		xmm0, [ecx+16*4]
+	movdqa		xmm1, [ecx+16*5]
+	movdqa		xmm2, [ecx+16*6]
+	movdqa		xmm3, [ecx+16*7]
+
+	punpck		qdq, 0, 2, 1, 3, 4, 6
+
+	movdqa		[edx+ebx*2], xmm0
+	movdqa		[edx+ebx*2+16], xmm2
+	movdqa		[edx+eax], xmm4
+	movdqa		[edx+eax+16], xmm6
+	
+	lea			edx, [edx+ebx*4]
+
+	movdqa		xmm0, [ecx+16*8]
+	movdqa		xmm1, [ecx+16*9]
+	movdqa		xmm2, [ecx+16*10]
+	movdqa		xmm3, [ecx+16*11]
+
+	punpck		qdq, 0, 2, 1, 3, 4, 6
+
+	movdqa		[edx], xmm0
+	movdqa		[edx+16], xmm2
+	movdqa		[edx+ebx], xmm4
+	movdqa		[edx+ebx+16], xmm6
+
+	movdqa		xmm0, [ecx+16*12]
+	movdqa		xmm1, [ecx+16*13]
+	movdqa		xmm2, [ecx+16*14]
+	movdqa		xmm3, [ecx+16*15]
+
+	punpck		qdq, 0, 2, 1, 3, 4, 6
+
+	movdqa		[edx+ebx*2], xmm0
+	movdqa		[edx+ebx*2+16], xmm2
+	movdqa		[edx+eax], xmm4
+	movdqa		[edx+eax+16], xmm6
+
+	pop			ebx
+
+	ret			4
+
+@unSwizzleBlock32_sse2@12 endp
+
+;
+; unSwizzleBlock16
+;
+
+@unSwizzleBlock16_sse2@12 proc public
+	
+	push		ebx
+
+	mov			ebx, [esp+4+4]
+	mov			eax, 4
+	
+	align 16
+@@:
+	movdqa		xmm0, [ecx+16*0]
+	movdqa		xmm1, [ecx+16*1]
+	movdqa		xmm2, [ecx+16*2]
+	movdqa		xmm3, [ecx+16*3]
+
+	punpck		wd, 0, 2, 1, 3, 4, 6
+	punpck		dq, 0, 4, 2, 6, 1, 3
+	punpck		wd, 0, 4, 1, 3, 2, 6
+
+	movdqa		[edx], xmm0
+	movdqa		[edx+16], xmm2
+	movdqa		[edx+ebx], xmm4
+	movdqa		[edx+ebx+16], xmm6
+
+	add			ecx, 64
+	lea			edx, [edx+ebx*2]
+
+	dec			eax
+	jnz			@B
+	
+	pop			ebx
+	
+	ret			4
+	
+@unSwizzleBlock16_sse2@12 endp
+
+;
+; unSwizzleBlock8
+;
+
+@unSwizzleBlock8_sse2@12 proc public
+
+	push		ebx
+
+	mov			ebx, [esp+4+4]
+	mov			eax, 2
+
+	align 16
+@@:
+	; col 0, 2
+
+	movdqa		xmm0, [ecx+16*0]
+	movdqa		xmm1, [ecx+16*1]
+	movdqa		xmm4, [ecx+16*2]
+	movdqa		xmm5, [ecx+16*3]
+
+	punpck		bw,  0, 4, 1, 5, 2, 6
+	punpck		wd,  0, 2, 4, 6, 1, 3
+	punpck		bw,  0, 2, 1, 3, 4, 6
+	punpck		qdq, 0, 2, 4, 6, 1, 3
+
+	pshufd		xmm1, xmm1, 0b1h
+	pshufd		xmm3, xmm3, 0b1h
+
+	movdqa		[edx], xmm0
+	movdqa		[edx+ebx], xmm2
+	lea			edx, [edx+ebx*2]
+
+	movdqa		[edx], xmm1
+	movdqa		[edx+ebx], xmm3
+	lea			edx, [edx+ebx*2]
+
+	; col 1, 3
+
+	movdqa		xmm0, [ecx+16*4]
+	movdqa		xmm1, [ecx+16*5]
+	movdqa		xmm4, [ecx+16*6]
+	movdqa		xmm5, [ecx+16*7]
+
+	punpck		bw,  0, 4, 1, 5, 2, 6
+	punpck		wd,  0, 2, 4, 6, 1, 3
+	punpck		bw,  0, 2, 1, 3, 4, 6
+	punpck		qdq, 0, 2, 4, 6, 1, 3
+
+	pshufd		xmm0, xmm0, 0b1h
+	pshufd		xmm2, xmm2, 0b1h
+
+	movdqa		[edx], xmm0
+	movdqa		[edx+ebx], xmm2
+	lea			edx, [edx+ebx*2]
+
+	movdqa		[edx], xmm1
+	movdqa		[edx+ebx], xmm3
+	lea			edx, [edx+ebx*2]
+
+	add			ecx, 128
+
+	dec			eax
+	jnz			@B
+
+	pop			ebx
+	
+	ret			4
+
+@unSwizzleBlock8_sse2@12 endp
+
+;
+; unSwizzleBlock4
+;
+
+@unSwizzleBlock4_sse2@12 proc public
+
+	push		ebx
+
+	mov         eax, 0f0f0f0fh
+	movd        xmm7, eax 
+	pshufd      xmm7, xmm7, 0
+
+	mov			ebx, [esp+4+4]
+	mov			eax, 2
+
+	align 16
+@@:
+	; col 0, 2
+
+	movdqa		xmm0, [ecx+16*0]
+	movdqa		xmm1, [ecx+16*1]
+	movdqa		xmm4, [ecx+16*2]
+	movdqa		xmm3, [ecx+16*3]
+
+	punpck		dq, 0, 4, 1, 3, 2, 6
+	punpck		dq, 0, 2, 4, 6, 1, 3
+	punpcknb
+	punpck		bw, 0, 2, 4, 6, 1, 3
+	punpck		wd, 0, 2, 1, 3, 4, 6
+
+	pshufd		xmm0, xmm0, 0d8h
+	pshufd		xmm2, xmm2, 0d8h
+	pshufd		xmm4, xmm4, 0d8h
+	pshufd		xmm6, xmm6, 0d8h
+
+	punpck		qdq, 0, 2, 4, 6, 1, 3
+
+	pshuflw		xmm1, xmm1, 0b1h
+	pshuflw		xmm3, xmm3, 0b1h
+	pshufhw		xmm1, xmm1, 0b1h
+	pshufhw		xmm3, xmm3, 0b1h
+
+	movdqa		[edx], xmm0
+	movdqa		[edx+ebx], xmm2
+	lea			edx, [edx+ebx*2]
+
+	movdqa		[edx], xmm1
+	movdqa		[edx+ebx], xmm3
+	lea			edx, [edx+ebx*2]
+
+	; col 1, 3
+
+	movdqa		xmm0, [ecx+16*4]
+	movdqa		xmm1, [ecx+16*5]
+	movdqa		xmm4, [ecx+16*6]
+	movdqa		xmm3, [ecx+16*7]
+
+	punpck		dq, 0, 4, 1, 3, 2, 6
+	punpck		dq, 0, 2, 4, 6, 1, 3
+	punpcknb
+	punpck		bw, 0, 2, 4, 6, 1, 3
+	punpck		wd, 0, 2, 1, 3, 4, 6
+
+	pshufd		xmm0, xmm0, 0d8h
+	pshufd		xmm2, xmm2, 0d8h
+	pshufd		xmm4, xmm4, 0d8h
+	pshufd		xmm6, xmm6, 0d8h
+
+	punpck		qdq, 0, 2, 4, 6, 1, 3
+
+	pshuflw		xmm0, xmm0, 0b1h
+	pshuflw		xmm2, xmm2, 0b1h
+	pshufhw		xmm0, xmm0, 0b1h
+	pshufhw		xmm2, xmm2, 0b1h
+
+	movdqa		[edx], xmm0
+	movdqa		[edx+ebx], xmm2
+	lea			edx, [edx+ebx*2]
+
+	movdqa		[edx], xmm1
+	movdqa		[edx+ebx], xmm3
+	lea			edx, [edx+ebx*2]
+
+	add			ecx, 128
+
+	dec			eax
+	jnz			@B
+
+	pop			ebx
+	
+	ret			4
+
+@unSwizzleBlock4_sse2@12 endp
+
+;
+; unSwizzleBlock8HP
+;
+
+@unSwizzleBlock8HP_sse2@12 proc public
+
+	push		ebx
+
+	mov			ebx, [esp+4+4]
+	mov			eax, 4
+
+	align 16
+@@:
+	movdqa		xmm0, [ecx+16*0]
+	movdqa		xmm1, [ecx+16*1]
+	movdqa		xmm2, [ecx+16*2]
+	movdqa		xmm3, [ecx+16*3]
+
+	punpck		qdq, 0, 2, 1, 3, 4, 6
+	
+	psrld		xmm0, 24
+	psrld		xmm2, 24
+	psrld		xmm4, 24
+	psrld		xmm6, 24
+	
+	packssdw	xmm0, xmm2
+	packssdw	xmm4, xmm6
+	packuswb	xmm0, xmm4
+
+	movlps		qword ptr [edx], xmm0
+	movhps		qword ptr [edx+ebx], xmm0
+
+	add			ecx, 64
+	lea			edx, [edx+ebx*2]
+
+	dec			eax
+	jnz			@B
+
+	pop			ebx
+
+	ret			4
+
+@unSwizzleBlock8HP_sse2@12 endp
+
+;
+; unSwizzleBlock4HLP
+;
+
+@unSwizzleBlock4HLP_sse2@12 proc public
+
+	push		ebx
+	
+	mov         eax, 0f0f0f0fh
+	movd        xmm7, eax 
+	pshufd      xmm7, xmm7, 0
+
+	mov			ebx, [esp+4+4]
+	mov			eax, 4
+	
+	align 16
+@@:
+	movdqa		xmm0, [ecx+16*0]
+	movdqa		xmm1, [ecx+16*1]
+	movdqa		xmm2, [ecx+16*2]
+	movdqa		xmm3, [ecx+16*3]
+
+	punpck		qdq, 0, 2, 1, 3, 4, 6
+	
+	psrld		xmm0, 24
+	psrld		xmm2, 24
+	psrld		xmm4, 24
+	psrld		xmm6, 24
+	
+	packssdw	xmm0, xmm2
+	packssdw	xmm4, xmm6
+	packuswb	xmm0, xmm4
+
+	pand		xmm0, xmm7
+
+	movlps		qword ptr [edx], xmm0
+	movhps		qword ptr [edx+ebx], xmm0
+
+	add			ecx, 64
+	lea			edx, [edx+ebx*2]
+
+	dec			eax
+	jnz			@B
+
+	pop			ebx
+
+	ret			4
+
+@unSwizzleBlock4HLP_sse2@12 endp
+
+;
+; unSwizzleBlock4HHP
+;
+
+@unSwizzleBlock4HHP_sse2@12 proc public
+
+	push		ebx
+
+	mov			ebx, [esp+4+4]
+	mov			eax, 4
+
+	align 16
+@@:
+	movdqa		xmm0, [ecx+16*0]
+	movdqa		xmm1, [ecx+16*1]
+	movdqa		xmm2, [ecx+16*2]
+	movdqa		xmm3, [ecx+16*3]
+
+	punpck		qdq, 0, 2, 1, 3, 4, 6
+	
+	psrld		xmm0, 28
+	psrld		xmm2, 28
+	psrld		xmm4, 28
+	psrld		xmm6, 28
+	
+	packssdw	xmm0, xmm2
+	packssdw	xmm4, xmm6
+	packuswb	xmm0, xmm4
+
+	movlps		qword ptr [edx], xmm0
+	movhps		qword ptr [edx+ebx], xmm0
+
+	add			ecx, 64
+	lea			edx, [edx+ebx*2]
+
+	dec			eax
+	jnz			@B
+
+	pop			ebx
+
+	ret			4
+
+@unSwizzleBlock4HHP_sse2@12 endp
+
+;
+; unSwizzleBlock4P
+;
+
+@unSwizzleBlock4P_sse2@12 proc public
+
+	push		esi
+	push		edi
+
+	mov         eax, 0f0f0f0fh
+	movd        xmm7, eax 
+	pshufd      xmm7, xmm7, 0
+
+	mov			esi, [esp+4+8]
+	lea			edi, [esi*2]
+	add			edi, esi
+
+	; col 0
+
+	movdqa		xmm0, [ecx+16*0]
+	movdqa		xmm1, [ecx+16*1]
+	movdqa		xmm2, [ecx+16*2]
+	movdqa		xmm3, [ecx+16*3]
+
+	punpck		bw, 0, 2, 1, 3, 4, 6
+	punpck		wd, 0, 4, 2, 6, 1, 3
+	punpck		bw, 0, 4, 1, 3, 2, 6
+
+	movdqa		xmm1, xmm7
+	pandn		xmm1, xmm0
+	pand		xmm0, xmm7
+	pshufd		xmm1, xmm1, 0b1h
+	psrlq		xmm1, 4
+
+	movdqa		xmm3, xmm7
+	pandn		xmm3, xmm2
+	pand		xmm2, xmm7
+	pshufd		xmm3, xmm3, 0b1h
+	psrlq		xmm3, 4
+
+	movdqa		[edx], xmm0
+	movdqa		[edx+16], xmm2
+	movdqa		[edx+esi*2], xmm1
+	movdqa		[edx+esi*2+16], xmm3
+	
+	movdqa		xmm1, xmm7
+	pandn		xmm1, xmm4
+	pand		xmm4, xmm7
+	pshufd		xmm1, xmm1, 0b1h
+	psrlq		xmm1, 4
+	
+	movdqa		xmm3, xmm7
+	pandn		xmm3, xmm6
+	pand		xmm6, xmm7
+	pshufd		xmm3, xmm3, 0b1h
+	psrlq		xmm3, 4
+
+	movdqa		[edx+esi], xmm4
+	movdqa		[edx+esi+16], xmm6
+	movdqa		[edx+edi], xmm1
+	movdqa		[edx+edi+16], xmm3
+
+	lea			edx, [edx+esi*4]
+
+	; col 1
+
+	movdqa		xmm0, [ecx+16*4]
+	movdqa		xmm1, [ecx+16*5]
+	movdqa		xmm2, [ecx+16*6]
+	movdqa		xmm3, [ecx+16*7]
+
+	punpck		bw, 0, 2, 1, 3, 4, 6
+	punpck		wd, 0, 4, 2, 6, 1, 3
+	punpck		bw, 0, 4, 1, 3, 2, 6
+
+	movdqa		xmm1, xmm7
+	pandn		xmm1, xmm0
+	pand		xmm0, xmm7
+	pshufd		xmm0, xmm0, 0b1h
+	psrlq		xmm1, 4
+
+	movdqa		xmm3, xmm7
+	pandn		xmm3, xmm2
+	pand		xmm2, xmm7
+	pshufd		xmm2, xmm2, 0b1h
+	psrlq		xmm3, 4
+
+	movdqa		[edx], xmm0
+	movdqa		[edx+16], xmm2
+	movdqa		[edx+esi*2], xmm1
+	movdqa		[edx+esi*2+16], xmm3
+	
+	movdqa		xmm1, xmm7
+	pandn		xmm1, xmm4
+	pand		xmm4, xmm7
+	pshufd		xmm4, xmm4, 0b1h
+	psrlq		xmm1, 4
+	
+	movdqa		xmm3, xmm7
+	pandn		xmm3, xmm6
+	pand		xmm6, xmm7
+	pshufd		xmm6, xmm6, 0b1h
+	psrlq		xmm3, 4
+
+	movdqa		[edx+esi], xmm4
+	movdqa		[edx+esi+16], xmm6
+	movdqa		[edx+edi], xmm1
+	movdqa		[edx+edi+16], xmm3
+
+	lea			edx, [edx+esi*4]
+
+	; col 2
+
+	movdqa		xmm0, [ecx+16*8]
+	movdqa		xmm1, [ecx+16*9]
+	movdqa		xmm2, [ecx+16*10]
+	movdqa		xmm3, [ecx+16*11]
+
+	punpck		bw, 0, 2, 1, 3, 4, 6
+	punpck		wd, 0, 4, 2, 6, 1, 3
+	punpck		bw, 0, 4, 1, 3, 2, 6
+
+	movdqa		xmm1, xmm7
+	pandn		xmm1, xmm0
+	pand		xmm0, xmm7
+	pshufd		xmm1, xmm1, 0b1h
+	psrlq		xmm1, 4
+
+	movdqa		xmm3, xmm7
+	pandn		xmm3, xmm2
+	pand		xmm2, xmm7
+	pshufd		xmm3, xmm3, 0b1h
+	psrlq		xmm3, 4
+
+	movdqa		[edx], xmm0
+	movdqa		[edx+16], xmm2
+	movdqa		[edx+esi*2], xmm1
+	movdqa		[edx+esi*2+16], xmm3
+	
+	movdqa		xmm1, xmm7
+	pandn		xmm1, xmm4
+	pand		xmm4, xmm7
+	pshufd		xmm1, xmm1, 0b1h
+	psrlq		xmm1, 4
+	
+	movdqa		xmm3, xmm7
+	pandn		xmm3, xmm6
+	pand		xmm6, xmm7
+	pshufd		xmm3, xmm3, 0b1h
+	psrlq		xmm3, 4
+
+	movdqa		[edx+esi], xmm4
+	movdqa		[edx+esi+16], xmm6
+	movdqa		[edx+edi], xmm1
+	movdqa		[edx+edi+16], xmm3
+
+	lea			edx, [edx+esi*4]
+
+	; col 3
+
+	movdqa		xmm0, [ecx+16*12]
+	movdqa		xmm1, [ecx+16*13]
+	movdqa		xmm2, [ecx+16*14]
+	movdqa		xmm3, [ecx+16*15]
+
+	punpck		bw, 0, 2, 1, 3, 4, 6
+	punpck		wd, 0, 4, 2, 6, 1, 3
+	punpck		bw, 0, 4, 1, 3, 2, 6
+
+	movdqa		xmm1, xmm7
+	pandn		xmm1, xmm0
+	pand		xmm0, xmm7
+	pshufd		xmm0, xmm0, 0b1h
+	psrlq		xmm1, 4
+
+	movdqa		xmm3, xmm7
+	pandn		xmm3, xmm2
+	pand		xmm2, xmm7
+	pshufd		xmm2, xmm2, 0b1h
+	psrlq		xmm3, 4
+
+	movdqa		[edx], xmm0
+	movdqa		[edx+16], xmm2
+	movdqa		[edx+esi*2], xmm1
+	movdqa		[edx+esi*2+16], xmm3
+	
+	movdqa		xmm1, xmm7
+	pandn		xmm1, xmm4
+	pand		xmm4, xmm7
+	pshufd		xmm4, xmm4, 0b1h
+	psrlq		xmm1, 4
+	
+	movdqa		xmm3, xmm7
+	pandn		xmm3, xmm6
+	pand		xmm6, xmm7
+	pshufd		xmm6, xmm6, 0b1h
+	psrlq		xmm3, 4
+
+	movdqa		[edx+esi], xmm4
+	movdqa		[edx+esi+16], xmm6
+	movdqa		[edx+edi], xmm1
+	movdqa		[edx+edi+16], xmm3
+
+	; lea			edx, [edx+esi*4]
+
+	pop			edi
+	pop			esi
+
+	ret			4
+
+@unSwizzleBlock4P_sse2@12 endp
+
+;
+; swizzling
+;
+
+;
+; SwizzleBlock32
+;
+
+@SwizzleBlock32_sse2@16 proc public
+
+
+	push		esi
+	push		edi
+
+	mov			edi, ecx
+	mov			esi, edx
+	mov			edx, [esp+4+8]
+	mov			ecx, 4
+
+	mov			eax, [esp+8+8]
+	cmp			eax, 0ffffffffh
+	jnz			SwizzleBlock32_sse2@WM
+
+	align 16
+@@:
+	movdqa		xmm0, [esi]
+	movdqa		xmm4, [esi+16]
+	movdqa		xmm1, [esi+edx]
+	movdqa		xmm5, [esi+edx+16]
+
+	punpck		qdq, 0, 4, 1, 5, 2, 6
+
+	movdqa		[edi+16*0], xmm0
+	movdqa		[edi+16*1], xmm2
+	movdqa		[edi+16*2], xmm4
+	movdqa		[edi+16*3], xmm6
+
+	lea			esi, [esi+edx*2]
+	add			edi, 64
+
+	dec			ecx
+	jnz			@B
+
+	pop			edi
+	pop			esi
+
+	ret			8
+
+SwizzleBlock32_sse2@WM:
+
+	movd		xmm7, eax
+	pshufd		xmm7, xmm7, 0
+	
+	align 16
+@@:
+	movdqa		xmm0, [esi]
+	movdqa		xmm4, [esi+16]
+	movdqa		xmm1, [esi+edx]
+	movdqa		xmm5, [esi+edx+16]
+
+	punpck		qdq, 0, 4, 1, 5, 2, 6
+
+	movdqa		xmm3, xmm7
+	pshufd		xmm5, xmm7, 0e4h
+
+	pandn		xmm3, [edi+16*0]
+	pand		xmm0, xmm7
+	por			xmm0, xmm3
+	movdqa		[edi+16*0], xmm0
+
+	pandn		xmm5, [edi+16*1]
+	pand		xmm2, xmm7
+	por			xmm2, xmm5
+	movdqa		[edi+16*1], xmm2
+
+	movdqa		xmm3, xmm7
+	pshufd		xmm5, xmm7, 0e4h
+
+	pandn		xmm3, [edi+16*2]
+	pand		xmm4, xmm7
+	por			xmm4, xmm3
+	movdqa		[edi+16*2], xmm4
+
+	pandn		xmm5, [edi+16*3]
+	pand		xmm6, xmm7
+	por			xmm6, xmm5
+	movdqa		[edi+16*3], xmm6
+
+	lea			esi, [esi+edx*2]
+	add			edi, 64
+
+	dec			ecx
+	jnz			@B
+
+	pop			edi
+	pop			esi
+
+	ret			8
+	
+@SwizzleBlock32_sse2@16 endp
+
+;
+; SwizzleBlock16
+;
+
+@SwizzleBlock16_sse2@12 proc public
+
+	push		ebx
+
+	mov			ebx, [esp+4+4]
+	mov			eax, 4
+
+	align 16
+@@:
+	movdqa		xmm0, [edx]
+	movdqa		xmm1, [edx+16]
+	movdqa		xmm2, [edx+ebx]
+	movdqa		xmm3, [edx+ebx+16]
+
+	punpck		wd, 0, 2, 1, 3, 4, 6
+	punpck		qdq, 0, 4, 2, 6, 1, 5
+
+	movdqa		[ecx+16*0], xmm0
+	movdqa		[ecx+16*1], xmm1
+	movdqa		[ecx+16*2], xmm4
+	movdqa		[ecx+16*3], xmm5
+
+	lea			edx, [edx+ebx*2]
+	add			ecx, 64
+
+	dec			eax
+	jnz			@B
+
+	pop			ebx
+
+	ret			4
+
+@SwizzleBlock16_sse2@12 endp
+
+;
+; SwizzleBlock8
+;
+
+@SwizzleBlock8_sse2@12 proc public
+
+	push		ebx
+
+	mov			ebx, [esp+4+4]
+	mov			eax, 2
+
+	align 16
+@@:
+	; col 0, 2
+
+	movdqa		xmm0, [edx]
+	movdqa		xmm2, [edx+ebx]
+	lea			edx, [edx+ebx*2]
+
+	pshufd		xmm1, [edx], 0b1h
+	pshufd		xmm3, [edx+ebx], 0b1h
+	lea			edx, [edx+ebx*2]
+
+	punpck		bw, 0, 2, 1, 3, 4, 6
+	punpck		wd, 0, 2, 4, 6, 1, 3
+	punpck		qdq, 0, 1, 2, 3, 4, 5
+
+	movdqa		[ecx+16*0], xmm0
+	movdqa		[ecx+16*1], xmm4
+	movdqa		[ecx+16*2], xmm1
+	movdqa		[ecx+16*3], xmm5
+
+	; col 1, 3
+
+	pshufd		xmm0, [edx], 0b1h
+	pshufd		xmm2, [edx+ebx], 0b1h
+	lea			edx, [edx+ebx*2]
+
+	movdqa		xmm1, [edx]
+	movdqa		xmm3, [edx+ebx]
+	lea			edx, [edx+ebx*2]
+
+	punpck		bw, 0, 2, 1, 3, 4, 6
+	punpck		wd, 0, 2, 4, 6, 1, 3
+	punpck		qdq, 0, 1, 2, 3, 4, 5
+
+	movdqa		[ecx+16*4], xmm0
+	movdqa		[ecx+16*5], xmm4
+	movdqa		[ecx+16*6], xmm1
+	movdqa		[ecx+16*7], xmm5
+
+	add			ecx, 128
+
+	dec			eax
+	jnz			@B
+
+	pop			ebx
+
+	ret			4
+	
+@SwizzleBlock8_sse2@12 endp
+
+;
+; SwizzleBlock4
+;
+
+@SwizzleBlock4_sse2@12 proc public
+
+	push		ebx
+	
+	mov         eax, 0f0f0f0fh
+	movd        xmm7, eax 
+	pshufd      xmm7, xmm7, 0
+
+	mov			ebx, [esp+4+4]
+	mov			eax, 2
+
+	align 16
+@@:
+	; col 0, 2
+
+	movdqa		xmm0, [edx]
+	movdqa		xmm2, [edx+ebx]
+	lea			edx, [edx+ebx*2]
+
+	movdqa		xmm1, [edx]
+	movdqa		xmm3, [edx+ebx]
+	lea			edx, [edx+ebx*2]
+
+	pshuflw		xmm1, xmm1, 0b1h
+	pshuflw		xmm3, xmm3, 0b1h
+	pshufhw		xmm1, xmm1, 0b1h
+	pshufhw		xmm3, xmm3, 0b1h
+
+	punpcknb
+	punpck		bw, 0, 2, 4, 6, 1, 3
+	punpck		bw, 0, 2, 1, 3, 4, 6
+	punpck		qdq, 0, 4, 2, 6, 1, 3
+
+	movdqa		[ecx+16*0], xmm0
+	movdqa		[ecx+16*1], xmm1
+	movdqa		[ecx+16*2], xmm4
+	movdqa		[ecx+16*3], xmm3
+
+	; col 1, 3
+
+	movdqa		xmm0, [edx]
+	movdqa		xmm2, [edx+ebx]
+	lea			edx, [edx+ebx*2]
+
+	movdqa		xmm1, [edx]
+	movdqa		xmm3, [edx+ebx]
+	lea			edx, [edx+ebx*2]
+
+	pshuflw		xmm0, xmm0, 0b1h
+	pshuflw		xmm2, xmm2, 0b1h
+	pshufhw		xmm0, xmm0, 0b1h
+	pshufhw		xmm2, xmm2, 0b1h
+
+	punpcknb
+	punpck		bw, 0, 2, 4, 6, 1, 3
+	punpck		bw, 0, 2, 1, 3, 4, 6
+	punpck		qdq, 0, 4, 2, 6, 1, 3
+
+	movdqa		[ecx+16*4], xmm0
+	movdqa		[ecx+16*5], xmm1
+	movdqa		[ecx+16*6], xmm4
+	movdqa		[ecx+16*7], xmm3
+
+	add			ecx, 128
+
+	dec			eax
+	jnz			@B
+
+	pop			ebx
+
+	ret			4
+
+@SwizzleBlock4_sse2@12 endp
+
+;
+; swizzling with unaligned reads
+;
+
+;
+; SwizzleBlock32u
+;
+
+@SwizzleBlock32u_sse2@16 proc public
+
+	push		esi
+	push		edi
+
+	mov			edi, ecx
+	mov			esi, edx
+	mov			edx, [esp+4+8]
+	mov			ecx, 4
+
+	mov			eax, [esp+8+8]
+	cmp			eax, 0ffffffffh
+	jnz			SwizzleBlock32u_sse2@WM
+
+	align 16
+@@:
+	movdqu		xmm0, [esi]
+	movdqu		xmm4, [esi+16]
+	movdqu		xmm1, [esi+edx]
+	movdqu		xmm5, [esi+edx+16]
+
+	punpck		qdq, 0, 4, 1, 5, 2, 6
+
+	movdqa		[edi+16*0], xmm0
+	movdqa		[edi+16*1], xmm2
+	movdqa		[edi+16*2], xmm4
+	movdqa		[edi+16*3], xmm6
+
+	lea			esi, [esi+edx*2]
+	add			edi, 64
+
+	dec			ecx
+	jnz			@B
+
+	pop			edi
+	pop			esi
+
+	ret			8
+
+SwizzleBlock32u_sse2@WM:
+
+	movd		xmm7, eax
+	pshufd		xmm7, xmm7, 0
+	
+	align 16
+@@:
+	movdqu		xmm0, [esi]
+	movdqu		xmm4, [esi+16]
+	movdqu		xmm1, [esi+edx]
+	movdqu		xmm5, [esi+edx+16]
+
+	punpck		qdq, 0, 4, 1, 5, 2, 6
+
+	movdqa		xmm3, xmm7
+	pshufd		xmm5, xmm7, 0e4h
+
+	pandn		xmm3, [edi+16*0]
+	pand		xmm0, xmm7
+	por			xmm0, xmm3
+	movdqa		[edi+16*0], xmm0
+
+	pandn		xmm5, [edi+16*1]
+	pand		xmm2, xmm7
+	por			xmm2, xmm5
+	movdqa		[edi+16*1], xmm2
+
+	movdqa		xmm3, xmm7
+	pshufd		xmm5, xmm7, 0e4h
+
+	pandn		xmm3, [edi+16*2]
+	pand		xmm4, xmm7
+	por			xmm4, xmm3
+	movdqa		[edi+16*2], xmm4
+
+	pandn		xmm5, [edi+16*3]
+	pand		xmm6, xmm7
+	por			xmm6, xmm5
+	movdqa		[edi+16*3], xmm6
+
+	lea			esi, [esi+edx*2]
+	add			edi, 64
+
+	dec			ecx
+	jnz			@B
+
+	pop			edi
+	pop			esi
+
+	ret			8
+	
+@SwizzleBlock32u_sse2@16 endp
+
+;
+; SwizzleBlock16u
+;
+
+@SwizzleBlock16u_sse2@12 proc public
+
+	push		ebx
+
+	mov			ebx, [esp+4+4]
+	mov			eax, 4
+
+	align 16
+@@:
+	movdqu		xmm0, [edx]
+	movdqu		xmm1, [edx+16]
+	movdqu		xmm2, [edx+ebx]
+	movdqu		xmm3, [edx+ebx+16]
+
+	punpck		wd, 0, 2, 1, 3, 4, 6
+	punpck		qdq, 0, 4, 2, 6, 1, 5
+
+	movdqa		[ecx+16*0], xmm0
+	movdqa		[ecx+16*1], xmm1
+	movdqa		[ecx+16*2], xmm4
+	movdqa		[ecx+16*3], xmm5
+
+	lea			edx, [edx+ebx*2]
+	add			ecx, 64
+
+	dec			eax
+	jnz			@B
+
+	pop			ebx
+
+	ret			4
+
+@SwizzleBlock16u_sse2@12 endp
+
+;
+; SwizzleBlock8u
+;
+
+@SwizzleBlock8u_sse2@12 proc public
+
+	push		ebx
+
+	mov			ebx, [esp+4+4]
+	mov			eax, 2
+
+	align 16
+@@:
+	; col 0, 2
+
+	movdqu		xmm0, [edx]
+	movdqu		xmm2, [edx+ebx]
+	lea			edx, [edx+ebx*2]
+
+	movdqu		xmm1, [edx]
+	movdqu		xmm3, [edx+ebx]
+	pshufd		xmm1, xmm1, 0b1h
+	pshufd		xmm3, xmm3, 0b1h
+	lea			edx, [edx+ebx*2]
+
+	punpck		bw, 0, 2, 1, 3, 4, 6
+	punpck		wd, 0, 2, 4, 6, 1, 3
+	punpck		qdq, 0, 1, 2, 3, 4, 5
+
+	movdqa		[ecx+16*0], xmm0
+	movdqa		[ecx+16*1], xmm4
+	movdqa		[ecx+16*2], xmm1
+	movdqa		[ecx+16*3], xmm5
+
+	; col 1, 3
+
+	movdqu		xmm0, [edx]
+	movdqu		xmm2, [edx+ebx]
+	pshufd		xmm0, xmm0, 0b1h
+	pshufd		xmm2, xmm2, 0b1h
+	lea			edx, [edx+ebx*2]
+
+	movdqu		xmm1, [edx]
+	movdqu		xmm3, [edx+ebx]
+	lea			edx, [edx+ebx*2]
+
+	punpck		bw, 0, 2, 1, 3, 4, 6
+	punpck		wd, 0, 2, 4, 6, 1, 3
+	punpck		qdq, 0, 1, 2, 3, 4, 5
+
+	movdqa		[ecx+16*4], xmm0
+	movdqa		[ecx+16*5], xmm4
+	movdqa		[ecx+16*6], xmm1
+	movdqa		[ecx+16*7], xmm5
+
+	add			ecx, 128
+
+	dec			eax
+	jnz			@B
+
+	pop			ebx
+
+	ret			4
+	
+@SwizzleBlock8u_sse2@12 endp
+
+;
+; SwizzleBlock4u
+;
+
+@SwizzleBlock4u_sse2@12 proc public
+
+	push		ebx
+	
+	mov         eax, 0f0f0f0fh
+	movd        xmm7, eax 
+	pshufd      xmm7, xmm7, 0
+
+	mov			ebx, [esp+4+4]
+	mov			eax, 2
+
+	align 16
+@@:
+	; col 0, 2
+
+	movdqu		xmm0, [edx]
+	movdqu		xmm2, [edx+ebx]
+	lea			edx, [edx+ebx*2]
+
+	movdqu		xmm1, [edx]
+	movdqu		xmm3, [edx+ebx]
+	lea			edx, [edx+ebx*2]
+
+	pshuflw		xmm1, xmm1, 0b1h
+	pshuflw		xmm3, xmm3, 0b1h
+	pshufhw		xmm1, xmm1, 0b1h
+	pshufhw		xmm3, xmm3, 0b1h
+
+	punpcknb
+	punpck		bw, 0, 2, 4, 6, 1, 3
+	punpck		bw, 0, 2, 1, 3, 4, 6
+	punpck		qdq, 0, 4, 2, 6, 1, 3
+
+	movdqa		[ecx+16*0], xmm0
+	movdqa		[ecx+16*1], xmm1
+	movdqa		[ecx+16*2], xmm4
+	movdqa		[ecx+16*3], xmm3
+
+	; col 1, 3
+
+	movdqu		xmm0, [edx]
+	movdqu		xmm2, [edx+ebx]
+	lea			edx, [edx+ebx*2]
+
+	movdqu		xmm1, [edx]
+	movdqu		xmm3, [edx+ebx]
+	lea			edx, [edx+ebx*2]
+
+	pshuflw		xmm0, xmm0, 0b1h
+	pshuflw		xmm2, xmm2, 0b1h
+	pshufhw		xmm0, xmm0, 0b1h
+	pshufhw		xmm2, xmm2, 0b1h
+
+	punpcknb
+	punpck		bw, 0, 2, 4, 6, 1, 3
+	punpck		bw, 0, 2, 1, 3, 4, 6
+	punpck		qdq, 0, 4, 2, 6, 1, 3
+
+	movdqa		[ecx+16*4], xmm0
+	movdqa		[ecx+16*5], xmm1
+	movdqa		[ecx+16*6], xmm4
+	movdqa		[ecx+16*7], xmm3
+
+	add			ecx, 128
+
+	dec			eax
+	jnz			@B
+
+	pop			ebx
+
+	ret			4
+
+@SwizzleBlock4u_sse2@12 endp
+
+	end
\ No newline at end of file
diff --git a/gsdx/x86-64.asm b/gsdx/x86-64.asm
new file mode 100644
index 0000000..70e6d94
--- /dev/null
+++ b/gsdx/x86-64.asm
@@ -0,0 +1,1422 @@
+
+	.const
+
+	__uvmin DD 0d01502f9r ; -1e+010
+	__uvmax DD 0501502f9r ; +1e+010
+
+	.code
+	
+;
+; memsetd
+;
+
+memsetd proc public
+
+	push	rdi
+
+	mov		rdi, rcx
+	mov		eax, edx
+	mov		rcx, r8
+	cld
+	rep		stosd
+
+	pop		rdi
+
+	ret
+
+memsetd endp
+
+;
+; SaturateColor
+;
+
+SaturateColor_amd64 proc public
+
+	pxor		xmm0, xmm0
+	movdqa		xmm1, [rcx]
+	packssdw	xmm1, xmm0
+	packuswb	xmm1, xmm0
+	punpcklbw	xmm1, xmm0
+	punpcklwd	xmm1, xmm0
+	movdqa		[rcx], xmm1
+
+	ret
+
+SaturateColor_amd64 endp
+
+;
+; swizzling
+;
+
+punpck macro op, sd0, sd2, s1, s3, d1, d3
+
+	movdqa					@CatStr(xmm, %d1),	@CatStr(xmm, %sd0)
+	pshufd					@CatStr(xmm, %d3),	@CatStr(xmm, %sd2), 0e4h
+	
+	@CatStr(punpckl, op)	@CatStr(xmm, %sd0),	@CatStr(xmm, %s1)
+	@CatStr(punpckh, op)	@CatStr(xmm, %d1),	@CatStr(xmm, %s1)
+	@CatStr(punpckl, op)	@CatStr(xmm, %sd2),	@CatStr(xmm, %s3)
+	@CatStr(punpckh, op)	@CatStr(xmm, %d3),	@CatStr(xmm, %s3)
+
+	endm
+
+punpck2 macro op, sd0, sd2, sd4, sd6, s1, s3, s5, s7, d1, d3, d5, d7
+
+	movdqa					@CatStr(xmm, %d1),	@CatStr(xmm, %sd0)
+	pshufd					@CatStr(xmm, %d3),	@CatStr(xmm, %sd2), 0e4h
+	movdqa					@CatStr(xmm, %d5),	@CatStr(xmm, %sd4)
+	pshufd					@CatStr(xmm, %d7),	@CatStr(xmm, %sd6), 0e4h
+	
+	@CatStr(punpckl, op)	@CatStr(xmm, %sd0),	@CatStr(xmm, %s1)
+	@CatStr(punpckh, op)	@CatStr(xmm, %d1),	@CatStr(xmm, %s1)
+	@CatStr(punpckl, op)	@CatStr(xmm, %sd2),	@CatStr(xmm, %s3)
+	@CatStr(punpckh, op)	@CatStr(xmm, %d3),	@CatStr(xmm, %s3)
+	@CatStr(punpckl, op)	@CatStr(xmm, %sd4),	@CatStr(xmm, %s5)
+	@CatStr(punpckh, op)	@CatStr(xmm, %d5),	@CatStr(xmm, %s5)
+	@CatStr(punpckl, op)	@CatStr(xmm, %sd6),	@CatStr(xmm, %s7)
+	@CatStr(punpckh, op)	@CatStr(xmm, %d7),	@CatStr(xmm, %s7)
+
+	endm
+
+punpcknbl macro
+
+	movdqa	xmm4, xmm0
+	pshufd	xmm5, xmm1, 0e4h
+
+	psllq	xmm1, 4
+	psrlq	xmm4, 4
+
+	movdqa	xmm6, xmm7
+	pand	xmm0, xmm7
+	pandn	xmm6, xmm1
+	por		xmm0, xmm6
+
+	movdqa	xmm6, xmm7
+	pand	xmm4, xmm7
+	pandn	xmm6, xmm5
+	por		xmm4, xmm6
+
+	movdqa	xmm1, xmm4
+
+	movdqa	xmm4, xmm2
+	pshufd	xmm5, xmm3, 0e4h
+
+	psllq	xmm3, 4
+	psrlq	xmm4, 4
+
+	movdqa	xmm6, xmm7
+	pand	xmm2, xmm7
+	pandn	xmm6, xmm3
+	por		xmm2, xmm6
+
+	movdqa	xmm6, xmm7
+	pand	xmm4, xmm7
+	pandn	xmm6, xmm5
+	por		xmm4, xmm6
+
+	movdqa	xmm3, xmm4
+
+	punpck	bw, 0, 2, 1, 3, 4, 6
+
+	endm
+
+punpcknbh macro
+
+	movdqa	xmm12, xmm8
+	pshufd	xmm13, xmm9, 0e4h
+
+	psllq	xmm9, 4
+	psrlq	xmm12, 4
+
+	movdqa	xmm14, xmm15
+	pand	xmm8, xmm15
+	pandn	xmm14, xmm9
+	por		xmm8, xmm14
+
+	movdqa	xmm14, xmm15
+	pand	xmm12, xmm15
+	pandn	xmm14, xmm13
+	por		xmm12, xmm14
+
+	movdqa	xmm9, xmm12
+
+	movdqa	xmm12, xmm10
+	pshufd	xmm13, xmm11, 0e4h
+
+	psllq	xmm11, 4
+	psrlq	xmm12, 4
+
+	movdqa	xmm14, xmm15
+	pand	xmm10, xmm15
+	pandn	xmm14, xmm11
+	por		xmm10, xmm14
+
+	movdqa	xmm14, xmm15
+	pand	xmm12, xmm15
+	pandn	xmm14, xmm13
+	por		xmm12, xmm14
+
+	movdqa	xmm11, xmm12
+
+	punpck	bw, 8, 10, 9, 11, 12, 14
+
+	endm
+	
+;
+; unSwizzleBlock32
+;
+
+unSwizzleBlock32_amd64 proc public
+
+	push		rsi
+	push		rdi
+
+	mov			rsi, rcx
+	mov			rdi, rdx
+	mov			rcx, 4
+
+	align 16
+@@:
+	movdqa		xmm0, [rsi+16*0]
+	movdqa		xmm1, [rsi+16*1]
+	movdqa		xmm2, [rsi+16*2]
+	movdqa		xmm3, [rsi+16*3]
+
+	punpck		qdq, 0, 2, 1, 3, 4, 6
+
+	movdqa		[rdi], xmm0
+	movdqa		[rdi+16], xmm2
+	movdqa		[rdi+r8], xmm4
+	movdqa		[rdi+r8+16], xmm6
+
+	add			rsi, 64
+	lea			rdi, [rdi+r8*2]
+
+	dec			rcx
+	jnz			@B
+
+	pop			rdi
+	pop			rsi
+
+	ret
+
+unSwizzleBlock32_amd64 endp
+
+;
+; unSwizzleBlock32_2 (TODO: test me)
+;
+
+unSwizzleBlock32_2_amd64 proc public
+
+	push		rsi
+	push		rdi
+
+	mov			rsi, rcx
+	mov			rdi, rdx
+	mov			rcx, 2
+
+	align 16
+@@:
+	movdqa		xmm0, [rsi+16*0]
+	movdqa		xmm1, [rsi+16*1]
+	movdqa		xmm2, [rsi+16*2]
+	movdqa		xmm3, [rsi+16*3]
+	movdqa		xmm4, [rsi+16*4]
+	movdqa		xmm5, [rsi+16*5]
+	movdqa		xmm6, [rsi+16*6]
+	movdqa		xmm7, [rsi+16*7]
+
+	punpck2		qdq, 0, 2, 4, 6, 1, 3, 5, 7, 8, 10, 12, 14
+
+	movdqa		[rdi], xmm0
+	movdqa		[rdi+16], xmm2
+	movdqa		[rdi+r8], xmm4
+	movdqa		[rdi+r8+16], xmm6
+	lea			rdi, [rdi+r8*2]
+
+	movdqa		[rdi], xmm8
+	movdqa		[rdi+16], xmm10
+	movdqa		[rdi+r8], xmm12
+	movdqa		[rdi+r8+16], xmm14
+	lea			rdi, [rdi+r8*2]
+
+	add			rsi, 128
+
+	dec			rcx
+	jnz			@B
+
+	pop			rdi
+	pop			rsi
+
+	ret
+
+unSwizzleBlock32_2_amd64 endp
+
+;
+; unSwizzleBlock16
+;
+
+unSwizzleBlock16_amd64 proc public
+
+	push		rsi
+	push		rdi
+
+	mov			rsi, rcx
+	mov			rdi, rdx
+	mov			rcx, 4
+
+	align 16
+@@:
+	movdqa		xmm0, [rsi+16*0]
+	movdqa		xmm1, [rsi+16*1]
+	movdqa		xmm2, [rsi+16*2]
+	movdqa		xmm3, [rsi+16*3]
+
+	punpck		wd, 0, 2, 1, 3, 4, 6
+	punpck		dq, 0, 4, 2, 6, 1, 3
+	punpck		wd, 0, 4, 1, 3, 2, 6
+
+	movdqa		[rdi], xmm0
+	movdqa		[rdi+16], xmm2
+	movdqa		[rdi+r8], xmm4
+	movdqa		[rdi+r8+16], xmm6
+
+	add			rsi, 64
+	lea			rdi, [rdi+r8*2]
+
+	dec			rcx
+	jnz			@B
+
+	pop			rdi
+	pop			rsi
+
+	ret
+	
+unSwizzleBlock16_amd64 endp
+
+;
+; unSwizzleBlock8
+;
+
+unSwizzleBlock8_amd64 proc public
+
+	push		rsi
+	push		rdi
+
+	mov			rsi, rcx
+	mov			rdi, rdx
+	mov			rcx, 2
+	
+	; r9 = r8*3
+	lea			r9, [r8*2]
+	add			r9, r8
+
+	align 16
+@@:
+	; col 0, 2
+	
+	movdqa		xmm0, [rsi+16*0]
+	movdqa		xmm1, [rsi+16*1]
+	movdqa		xmm4, [rsi+16*2]
+	movdqa		xmm5, [rsi+16*3]
+	
+	; col 1, 3
+
+	movdqa		xmm8, [rsi+16*4]
+	movdqa		xmm9, [rsi+16*5]
+	movdqa		xmm12, [rsi+16*6]
+	movdqa		xmm13, [rsi+16*7]
+
+	; col 0, 2
+	
+	punpck		bw,  0, 4, 1, 5, 2, 6
+	punpck		wd,  0, 2, 4, 6, 1, 3
+	punpck		bw,  0, 2, 1, 3, 4, 6
+	punpck		qdq, 0, 2, 4, 6, 1, 3
+
+	pshufd		xmm1, xmm1, 0b1h
+	pshufd		xmm3, xmm3, 0b1h
+	
+	; col 1, 3
+
+	punpck		bw,  8, 12, 9, 13, 10, 14
+	punpck		wd,  8, 10, 12, 14, 9, 11
+	punpck		bw,  8, 10, 9, 11, 12, 14
+	punpck		qdq, 8, 10, 12, 14, 9, 11
+	
+	pshufd		xmm8, xmm8, 0b1h
+	pshufd		xmm10, xmm10, 0b1h
+
+	; col 0, 2
+	
+	movdqa		[rdi], xmm0
+	movdqa		[rdi+r8], xmm2
+	movdqa		[rdi+r8*2], xmm1
+	movdqa		[rdi+r9], xmm3
+	lea			rdi, [rdi+r8*4]
+
+	; col 1, 3
+
+	movdqa		[rdi], xmm8
+	movdqa		[rdi+r8], xmm10
+	movdqa		[rdi+r8*2], xmm9
+	movdqa		[rdi+r9], xmm11
+	lea			rdi, [rdi+r8*4]
+
+	add			rsi, 128
+
+	dec			rcx
+	jnz			@B
+
+	pop			rdi
+	pop			rsi
+	
+	ret
+
+unSwizzleBlock8_amd64 endp
+
+;
+; unSwizzleBlock4
+;
+
+unSwizzleBlock4_amd64 proc public
+
+	push		rsi
+	push		rdi
+
+	mov			rsi, rcx
+	mov			rdi, rdx
+	mov			rcx, 2
+
+	; r9 = r8*3
+	lea			r9, [r8*2]
+	add			r9, r8
+
+	mov         eax, 0f0f0f0fh
+	movd        xmm7, rax 
+	pshufd      xmm7, xmm7, 0
+	movdqa      xmm15, xmm7
+
+	align 16
+@@:
+	; col 0, 2
+
+	movdqa		xmm0, [rsi+16*0]
+	movdqa		xmm1, [rsi+16*1]
+	movdqa		xmm4, [rsi+16*2]
+	movdqa		xmm3, [rsi+16*3]
+
+	; col 1, 3
+
+	movdqa		xmm8, [rsi+16*4]
+	movdqa		xmm9, [rsi+16*5]
+	movdqa		xmm12, [rsi+16*6]
+	movdqa		xmm11, [rsi+16*7]
+
+	; col 0, 2
+
+	punpck		dq, 0, 4, 1, 3, 2, 6
+	punpck		dq, 0, 2, 4, 6, 1, 3
+	punpcknbl
+	punpck		bw, 0, 2, 4, 6, 1, 3
+	punpck		wd, 0, 2, 1, 3, 4, 6
+
+	; col 1, 3
+
+	punpck		dq, 8, 12, 9, 11, 10, 14
+	punpck		dq, 8, 10, 12, 14, 9, 11
+	punpcknbh
+	punpck		bw, 8, 10, 12, 14, 9, 11
+	punpck		wd, 8, 10, 9, 11, 12, 14
+
+	; col 0, 2
+
+	pshufd		xmm0, xmm0, 0d8h
+	pshufd		xmm2, xmm2, 0d8h
+	pshufd		xmm4, xmm4, 0d8h
+	pshufd		xmm6, xmm6, 0d8h
+
+	; col 1, 3
+
+	pshufd		xmm8, xmm8, 0d8h
+	pshufd		xmm10, xmm10, 0d8h
+	pshufd		xmm12, xmm12, 0d8h
+	pshufd		xmm14, xmm14, 0d8h
+
+	; col 0, 2
+
+	punpck		qdq, 0, 2, 4, 6, 1, 3
+
+	; col 1, 3
+
+	punpck		qdq, 8, 10, 12, 14, 9, 11
+
+	; col 0, 2
+
+	pshuflw		xmm1, xmm1, 0b1h
+	pshuflw		xmm3, xmm3, 0b1h
+	pshufhw		xmm1, xmm1, 0b1h
+	pshufhw		xmm3, xmm3, 0b1h
+
+	; col 1, 3
+
+	pshuflw		xmm8, xmm8, 0b1h
+	pshuflw		xmm10, xmm10, 0b1h
+	pshufhw		xmm8, xmm8, 0b1h
+	pshufhw		xmm10, xmm10, 0b1h
+
+	; col 0, 2
+
+	movdqa		[rdi], xmm0
+	movdqa		[rdi+r8], xmm2
+	movdqa		[rdi+r8*2], xmm1
+	movdqa		[rdi+r9], xmm3
+	lea			rdi, [rdi+r8*4]
+
+	; col 1, 3
+
+	movdqa		[rdi], xmm8
+	movdqa		[rdi+r8], xmm10
+	movdqa		[rdi+r8*2], xmm9
+	movdqa		[rdi+r9], xmm11
+	lea			rdi, [rdi+r8*4]
+
+	add			rsi, 128
+
+	dec			rcx
+	jnz			@B
+
+	pop			rdi
+	pop			rsi
+
+	ret
+
+unSwizzleBlock4_amd64 endp
+
+;
+; unSwizzleBlock8HP
+;
+
+unSwizzleBlock8HP_amd64 proc public
+
+	push		rsi
+	push		rdi
+
+	mov			rsi, rcx
+	mov			rdi, rdx
+	mov			rcx, 4
+
+	align 16
+@@:
+	movdqa		xmm0, [rsi+16*0]
+	movdqa		xmm1, [rsi+16*1]
+	movdqa		xmm2, [rsi+16*2]
+	movdqa		xmm3, [rsi+16*3]
+
+	punpck		qdq, 0, 2, 1, 3, 4, 6
+
+	psrld		xmm0, 24
+	psrld		xmm2, 24
+	psrld		xmm4, 24
+	psrld		xmm6, 24
+	
+	packssdw	xmm0, xmm2
+	packssdw	xmm4, xmm6
+	packuswb	xmm0, xmm4
+
+	movlps		qword ptr [rdi], xmm0
+	movhps		qword ptr [rdi+r8], xmm0
+
+	add			rsi, 64
+	lea			rdi, [rdi+r8*2]
+
+	dec			rcx
+	jnz			@B
+
+	pop			rdi
+	pop			rsi
+	
+	ret
+
+unSwizzleBlock8HP_amd64 endp
+
+;
+; unSwizzleBlock4HLP
+;
+
+unSwizzleBlock4HLP_amd64 proc public
+
+	push		rsi
+	push		rdi
+
+	mov			rsi, rcx
+	mov			rdi, rdx
+	mov			rcx, 4
+
+	mov         eax, 0f0f0f0fh
+	movd        xmm7, eax 
+	pshufd      xmm7, xmm7, 0
+
+	align 16
+@@:
+	movdqa		xmm0, [rsi+16*0]
+	movdqa		xmm1, [rsi+16*1]
+	movdqa		xmm2, [rsi+16*2]
+	movdqa		xmm3, [rsi+16*3]
+
+	punpck		qdq, 0, 2, 1, 3, 4, 6
+
+	psrld		xmm0, 24
+	psrld		xmm2, 24
+	psrld		xmm4, 24
+	psrld		xmm6, 24
+	
+	packssdw	xmm0, xmm2
+	packssdw	xmm4, xmm6
+	packuswb	xmm0, xmm4
+
+	pand		xmm0, xmm7
+	
+	movlps		qword ptr [rdi], xmm0
+	movhps		qword ptr [rdi+r8], xmm0
+
+	add			rsi, 64
+	lea			rdi, [rdi+r8*2]
+
+	dec			rcx
+	jnz			@B
+
+	pop			rdi
+	pop			rsi
+	
+	ret
+	
+unSwizzleBlock4HLP_amd64 endp
+
+;
+; unSwizzleBlock4HHP
+;
+
+unSwizzleBlock4HHP_amd64 proc public
+
+	push		rsi
+	push		rdi
+
+	mov			rsi, rcx
+	mov			rdi, rdx
+	mov			rcx, 4
+
+	align 16
+@@:
+	movdqa		xmm0, [rsi+16*0]
+	movdqa		xmm1, [rsi+16*1]
+	movdqa		xmm2, [rsi+16*2]
+	movdqa		xmm3, [rsi+16*3]
+
+	punpck		qdq, 0, 2, 1, 3, 4, 6
+
+	psrld		xmm0, 28
+	psrld		xmm2, 28
+	psrld		xmm4, 28
+	psrld		xmm6, 28
+	
+	packssdw	xmm0, xmm2
+	packssdw	xmm4, xmm6
+	packuswb	xmm0, xmm4
+
+	movlps		qword ptr [rdi], xmm0
+	movhps		qword ptr [rdi+r8], xmm0
+
+	add			rsi, 64
+	lea			rdi, [rdi+r8*2]
+
+	dec			rcx
+	jnz			@B
+
+	pop			rdi
+	pop			rsi
+	
+	ret
+	
+unSwizzleBlock4HHP_amd64 endp
+
+;
+; unSwizzleBlock4P
+;
+
+unSwizzleBlock4P_amd64 proc public
+
+	mov         eax, 0f0f0f0fh
+	movd        xmm8, eax 
+	pshufd      xmm8, xmm8, 0
+
+	; r9 = r8*3
+	lea			r9, [r8*2]
+	add			r9, r8
+
+	; col 0
+	
+	movdqa		xmm0, [rcx+16*0]
+	movdqa		xmm1, [rcx+16*1]
+	movdqa		xmm2, [rcx+16*2]
+	movdqa		xmm3, [rcx+16*3]
+
+	punpck		bw, 0, 2, 1, 3, 4, 6
+	punpck		wd, 0, 4, 2, 6, 1, 3
+	punpck		bw, 0, 4, 1, 3, 2, 6
+
+	movdqa		xmm1, xmm8
+	pandn		xmm1, xmm0
+	pand		xmm0, xmm8
+	pshufd		xmm1, xmm1, 0b1h
+	psrlq		xmm1, 4
+
+	movdqa		xmm3, xmm8
+	pandn		xmm3, xmm2
+	pand		xmm2, xmm8
+	pshufd		xmm3, xmm3, 0b1h
+	psrlq		xmm3, 4
+	
+	movdqa		xmm5, xmm8
+	pandn		xmm5, xmm4
+	pand		xmm4, xmm8
+	pshufd		xmm5, xmm5, 0b1h
+	psrlq		xmm5, 4
+
+	movdqa		xmm7, xmm8
+	pandn		xmm7, xmm6
+	pand		xmm6, xmm8
+	pshufd		xmm7, xmm7, 0b1h
+	psrlq		xmm7, 4
+
+	movdqa		[rdx], xmm0
+	movdqa		[rdx+16], xmm2
+	movdqa		[rdx+r8], xmm4
+	movdqa		[rdx+r8+16], xmm6
+		
+	movdqa		[rdx+r8*2], xmm1
+	movdqa		[rdx+r8*2+16], xmm3
+	movdqa		[rdx+r9], xmm5
+	movdqa		[rdx+r9+16], xmm7
+	
+	lea			rdx, [rdx+r8*4]
+
+	; col 1
+	
+	movdqa		xmm0, [rcx+16*4]
+	movdqa		xmm1, [rcx+16*5]
+	movdqa		xmm2, [rcx+16*6]
+	movdqa		xmm3, [rcx+16*7]
+
+	punpck		bw, 0, 2, 1, 3, 4, 6
+	punpck		wd, 0, 4, 2, 6, 1, 3
+	punpck		bw, 0, 4, 1, 3, 2, 6
+
+	movdqa		xmm1, xmm8
+	pandn		xmm1, xmm0
+	pand		xmm0, xmm8
+	pshufd		xmm0, xmm0, 0b1h
+	psrlq		xmm1, 4
+
+	movdqa		xmm3, xmm8
+	pandn		xmm3, xmm2
+	pand		xmm2, xmm8
+	pshufd		xmm2, xmm2, 0b1h
+	psrlq		xmm3, 4
+	
+	movdqa		xmm5, xmm8
+	pandn		xmm5, xmm4
+	pand		xmm4, xmm8
+	pshufd		xmm4, xmm4, 0b1h
+	psrlq		xmm5, 4
+
+	movdqa		xmm7, xmm8
+	pandn		xmm7, xmm6
+	pand		xmm6, xmm8
+	pshufd		xmm6, xmm6, 0b1h
+	psrlq		xmm7, 4
+
+	movdqa		[rdx], xmm0
+	movdqa		[rdx+16], xmm2
+	movdqa		[rdx+r8], xmm4
+	movdqa		[rdx+r8+16], xmm6
+		
+	movdqa		[rdx+r8*2], xmm1
+	movdqa		[rdx+r8*2+16], xmm3
+	movdqa		[rdx+r9], xmm5
+	movdqa		[rdx+r9+16], xmm7
+	
+	lea			rdx, [rdx+r8*4]
+
+	; col 2
+	
+	movdqa		xmm0, [rcx+16*8]
+	movdqa		xmm1, [rcx+16*9]
+	movdqa		xmm2, [rcx+16*10]
+	movdqa		xmm3, [rcx+16*11]
+
+	punpck		bw, 0, 2, 1, 3, 4, 6
+	punpck		wd, 0, 4, 2, 6, 1, 3
+	punpck		bw, 0, 4, 1, 3, 2, 6
+
+	movdqa		xmm1, xmm8
+	pandn		xmm1, xmm0
+	pand		xmm0, xmm8
+	pshufd		xmm1, xmm1, 0b1h
+	psrlq		xmm1, 4
+
+	movdqa		xmm3, xmm8
+	pandn		xmm3, xmm2
+	pand		xmm2, xmm8
+	pshufd		xmm3, xmm3, 0b1h
+	psrlq		xmm3, 4
+	
+	movdqa		xmm5, xmm8
+	pandn		xmm5, xmm4
+	pand		xmm4, xmm8
+	pshufd		xmm5, xmm5, 0b1h
+	psrlq		xmm5, 4
+
+	movdqa		xmm7, xmm8
+	pandn		xmm7, xmm6
+	pand		xmm6, xmm8
+	pshufd		xmm7, xmm7, 0b1h
+	psrlq		xmm7, 4
+
+	movdqa		[rdx], xmm0
+	movdqa		[rdx+16], xmm2
+	movdqa		[rdx+r8], xmm4
+	movdqa		[rdx+r8+16], xmm6
+		
+	movdqa		[rdx+r8*2], xmm1
+	movdqa		[rdx+r8*2+16], xmm3
+	movdqa		[rdx+r9], xmm5
+	movdqa		[rdx+r9+16], xmm7
+	
+	lea			rdx, [rdx+r8*4]
+
+	; col 3
+	
+	movdqa		xmm0, [rcx+16*12]
+	movdqa		xmm1, [rcx+16*13]
+	movdqa		xmm2, [rcx+16*14]
+	movdqa		xmm3, [rcx+16*15]
+
+	punpck		bw, 0, 2, 1, 3, 4, 6
+	punpck		wd, 0, 4, 2, 6, 1, 3
+	punpck		bw, 0, 4, 1, 3, 2, 6
+
+	movdqa		xmm1, xmm8
+	pandn		xmm1, xmm0
+	pand		xmm0, xmm8
+	pshufd		xmm0, xmm0, 0b1h
+	psrlq		xmm1, 4
+
+	movdqa		xmm3, xmm8
+	pandn		xmm3, xmm2
+	pand		xmm2, xmm8
+	pshufd		xmm2, xmm2, 0b1h
+	psrlq		xmm3, 4
+	
+	movdqa		xmm5, xmm8
+	pandn		xmm5, xmm4
+	pand		xmm4, xmm8
+	pshufd		xmm4, xmm4, 0b1h
+	psrlq		xmm5, 4
+
+	movdqa		xmm7, xmm8
+	pandn		xmm7, xmm6
+	pand		xmm6, xmm8
+	pshufd		xmm6, xmm6, 0b1h
+	psrlq		xmm7, 4
+
+	movdqa		[rdx], xmm0
+	movdqa		[rdx+16], xmm2
+	movdqa		[rdx+r8], xmm4
+	movdqa		[rdx+r8+16], xmm6
+		
+	movdqa		[rdx+r8*2], xmm1
+	movdqa		[rdx+r8*2+16], xmm3
+	movdqa		[rdx+r9], xmm5
+	movdqa		[rdx+r9+16], xmm7
+	
+	; lea			rdx, [rdx+r8*4]
+
+	ret
+
+unSwizzleBlock4P_amd64 endp
+
+;
+; swizzling
+;
+
+;
+; SwizzleBlock32_amd64
+;
+
+SwizzleBlock32_amd64 proc public
+
+	push		rsi
+	push		rdi
+
+	mov			rdi, rcx
+	mov			rsi, rdx
+	mov			rcx, 4
+
+	cmp			r9d, 0ffffffffh
+	jnz			SwizzleBlock32_amd64@WM
+
+	align 16
+@@:
+	movdqa		xmm0, [rsi]
+	movdqa		xmm4, [rsi+16]
+	movdqa		xmm1, [rsi+r8]
+	movdqa		xmm5, [rsi+r8+16]
+
+	punpck		qdq, 0, 4, 1, 5, 2, 6
+
+	movdqa		[rdi+16*0], xmm0
+	movdqa		[rdi+16*1], xmm2
+	movdqa		[rdi+16*2], xmm4
+	movdqa		[rdi+16*3], xmm6
+
+	lea			rsi, [rsi+r8*2]
+	add			rdi, 64
+
+	dec			rcx
+	jnz			@B
+
+	pop			rdi
+	pop			rsi
+
+	ret
+
+SwizzleBlock32_amd64@WM:
+
+	movd		xmm7, r9d
+	pshufd		xmm7, xmm7, 0
+	
+	align 16
+@@:
+	movdqa		xmm0, [rsi]
+	movdqa		xmm4, [rsi+16]
+	movdqa		xmm1, [rsi+r8]
+	movdqa		xmm5, [rsi+r8+16]
+
+	punpck		qdq, 0, 4, 1, 5, 2, 6
+
+	movdqa		xmm3, xmm7
+	pshufd		xmm5, xmm7, 0e4h
+	movdqa		xmm9, xmm7
+	pshufd		xmm11, xmm7, 0e4h
+
+	pandn		xmm3, [rdi+16*0]
+	pand		xmm0, xmm7
+	por			xmm0, xmm3
+	movdqa		[rdi+16*0], xmm0
+
+	pandn		xmm5, [rdi+16*1]
+	pand		xmm2, xmm7
+	por			xmm2, xmm5
+	movdqa		[rdi+16*1], xmm2
+
+	pandn		xmm9, [rdi+16*2]
+	pand		xmm4, xmm7
+	por			xmm4, xmm9
+	movdqa		[rdi+16*2], xmm4
+
+	pandn		xmm11, [rdi+16*3]
+	pand		xmm6, xmm7
+	por			xmm6, xmm11
+	movdqa		[edi+16*3], xmm6
+
+	lea			rsi, [rsi+r8*2]
+	add			rdi, 64
+
+	dec			rcx
+	jnz			@B
+
+	pop			rdi
+	pop			rsi
+
+	ret
+	
+SwizzleBlock32_amd64 endp
+
+;
+; SwizzleBlock16_amd64
+;
+
+SwizzleBlock16_amd64 proc public
+
+	push		rsi
+	push		rdi
+
+	mov			rdi, rcx
+	mov			rsi, rdx
+	mov			rcx, 4
+
+	align 16
+@@:
+	movdqa		xmm0, [rsi]
+	movdqa		xmm1, [rsi+16]
+	movdqa		xmm2, [rsi+r8]
+	movdqa		xmm3, [rsi+r8+16]
+
+	punpck		wd, 0, 2, 1, 3, 4, 6
+	punpck		qdq, 0, 4, 2, 6, 1, 5
+
+	movdqa		[rdi+16*0], xmm0
+	movdqa		[rdi+16*1], xmm1
+	movdqa		[rdi+16*2], xmm4
+	movdqa		[rdi+16*3], xmm5
+
+	lea			rsi, [rsi+r8*2]
+	add			rdi, 64
+
+	dec			rcx
+	jnz			@B
+
+	pop			rdi
+	pop			rsi
+
+	ret
+	
+SwizzleBlock16_amd64 endp
+
+;
+; SwizzleBlock8
+;
+
+SwizzleBlock8_amd64 proc public
+
+	push		rsi
+	push		rdi
+
+	mov			rdi, rcx
+	mov			rsi, rdx
+	mov			ecx, 2
+
+	align 16
+@@:
+	; col 0, 2
+
+	movdqa		xmm0, [rsi]
+	movdqa		xmm2, [rsi+r8]
+	lea			rsi, [rsi+r8*2]
+
+	pshufd		xmm1, [rsi], 0b1h
+	pshufd		xmm3, [rsi+r8], 0b1h
+	lea			rsi, [rsi+r8*2]
+
+	punpck		bw, 0, 2, 1, 3, 4, 6
+	punpck		wd, 0, 2, 4, 6, 1, 3
+	punpck		qdq, 0, 1, 2, 3, 4, 5
+
+	movdqa		[rdi+16*0], xmm0
+	movdqa		[rdi+16*1], xmm4
+	movdqa		[rdi+16*2], xmm1
+	movdqa		[rdi+16*3], xmm5
+
+	; col 1, 3
+
+	pshufd		xmm0, [rsi], 0b1h
+	pshufd		xmm2, [rsi+r8], 0b1h
+	lea			rsi, [rsi+r8*2]
+
+	movdqa		xmm1, [rsi]
+	movdqa		xmm3, [rsi+r8]
+	lea			rsi, [rsi+r8*2]
+
+	punpck		bw, 0, 2, 1, 3, 4, 6
+	punpck		wd, 0, 2, 4, 6, 1, 3
+	punpck		qdq, 0, 1, 2, 3, 4, 5
+
+	movdqa		[rdi+16*4], xmm0
+	movdqa		[rdi+16*5], xmm4
+	movdqa		[rdi+16*6], xmm1
+	movdqa		[rdi+16*7], xmm5
+
+	add			edi, 128
+
+	dec			rcx
+	jnz			@B
+
+	pop			rdi
+	pop			rsi
+
+	ret
+
+SwizzleBlock8_amd64 endp
+
+;
+; SwizzleBlock4
+;
+
+SwizzleBlock4_amd64 proc public
+
+	push		rsi
+	push		rdi
+	
+	mov			rdi, rcx
+	mov			rsi, rdx
+	mov			rcx, 2
+
+	mov         eax, 0f0f0f0fh
+	movd        xmm7, eax 
+	pshufd      xmm7, xmm7, 0
+
+	align 16
+@@:
+	; col 0, 2
+
+	movdqa		xmm0, [rsi]
+	movdqa		xmm2, [rsi+r8]
+	lea			rsi, [rsi+r8*2]
+
+	movdqa		xmm1, [rsi]
+	movdqa		xmm3, [rsi+r8]
+	lea			rsi, [rsi+r8*2]
+
+	pshuflw		xmm1, xmm1, 0b1h
+	pshuflw		xmm3, xmm3, 0b1h
+	pshufhw		xmm1, xmm1, 0b1h
+	pshufhw		xmm3, xmm3, 0b1h
+
+	punpcknbl
+	punpck		bw, 0, 2, 4, 6, 1, 3
+	punpck		bw, 0, 2, 1, 3, 4, 6
+	punpck		qdq, 0, 4, 2, 6, 1, 3
+
+	movdqa		[rdi+16*0], xmm0
+	movdqa		[rdi+16*1], xmm1
+	movdqa		[rdi+16*2], xmm4
+	movdqa		[rdi+16*3], xmm3
+
+	; col 1, 3
+
+	movdqa		xmm0, [rsi]
+	movdqa		xmm2, [rsi+r8]
+	lea			esi, [rsi+r8*2]
+
+	movdqa		xmm1, [rsi]
+	movdqa		xmm3, [rsi+r8]
+	lea			rsi, [rsi+r8*2]
+
+	pshuflw		xmm0, xmm0, 0b1h
+	pshuflw		xmm2, xmm2, 0b1h
+	pshufhw		xmm0, xmm0, 0b1h
+	pshufhw		xmm2, xmm2, 0b1h
+
+	punpcknbl
+	punpck		bw, 0, 2, 4, 6, 1, 3
+	punpck		bw, 0, 2, 1, 3, 4, 6
+	punpck		qdq, 0, 4, 2, 6, 1, 3
+
+	movdqa		[rdi+16*4], xmm0
+	movdqa		[rdi+16*5], xmm1
+	movdqa		[rdi+16*6], xmm4
+	movdqa		[rdi+16*7], xmm3
+
+	add			rdi, 128
+
+	dec			rcx
+	jnz			@B
+
+	pop			rdi
+	pop			rsi
+
+	ret
+
+SwizzleBlock4_amd64 endp
+
+;
+; swizzling with unaligned reads
+;
+
+;
+; SwizzleBlock32u_amd64
+;
+
+SwizzleBlock32u_amd64 proc public
+
+	push		rsi
+	push		rdi
+
+	mov			rdi, rcx
+	mov			rsi, rdx
+	mov			rcx, 4
+
+	cmp			r9d, 0ffffffffh
+	jnz			SwizzleBlock32u_amd64@WM
+
+	align 16
+@@:
+	movdqu		xmm0, [rsi]
+	movdqu		xmm4, [rsi+16]
+	movdqu		xmm1, [rsi+r8]
+	movdqu		xmm5, [rsi+r8+16]
+
+	punpck		qdq, 0, 4, 1, 5, 2, 6
+
+	movdqa		[rdi+16*0], xmm0
+	movdqa		[rdi+16*1], xmm2
+	movdqa		[rdi+16*2], xmm4
+	movdqa		[rdi+16*3], xmm6
+
+	lea			rsi, [rsi+r8*2]
+	add			rdi, 64
+
+	dec			rcx
+	jnz			@B
+
+	pop			rdi
+	pop			rsi
+
+	ret
+
+SwizzleBlock32u_amd64@WM:
+
+	movd		xmm7, r9d
+	pshufd		xmm7, xmm7, 0
+	
+	align 16
+@@:
+	movdqu		xmm0, [rsi]
+	movdqu		xmm4, [rsi+16]
+	movdqu		xmm1, [rsi+r8]
+	movdqu		xmm5, [rsi+r8+16]
+
+	punpck		qdq, 0, 4, 1, 5, 2, 6
+
+	movdqa		xmm3, xmm7
+	pshufd		xmm5, xmm7, 0e4h
+	movdqa		xmm9, xmm7
+	pshufd		xmm11, xmm7, 0e4h
+
+	pandn		xmm3, [rdi+16*0]
+	pand		xmm0, xmm7
+	por			xmm0, xmm3
+	movdqa		[rdi+16*0], xmm0
+
+	pandn		xmm5, [rdi+16*1]
+	pand		xmm2, xmm7
+	por			xmm2, xmm5
+	movdqa		[rdi+16*1], xmm2
+
+	pandn		xmm9, [rdi+16*2]
+	pand		xmm4, xmm7
+	por			xmm4, xmm9
+	movdqa		[rdi+16*2], xmm4
+
+	pandn		xmm11, [rdi+16*3]
+	pand		xmm6, xmm7
+	por			xmm6, xmm11
+	movdqa		[edi+16*3], xmm6
+
+	lea			rsi, [rsi+r8*2]
+	add			rdi, 64
+
+	dec			rcx
+	jnz			@B
+
+	pop			rdi
+	pop			rsi
+
+	ret
+	
+SwizzleBlock32u_amd64 endp
+
+;
+; SwizzleBlock16u_amd64
+;
+
+SwizzleBlock16u_amd64 proc public
+
+	push		rsi
+	push		rdi
+
+	mov			rdi, rcx
+	mov			rsi, rdx
+	mov			rcx, 4
+
+	align 16
+@@:
+	movdqu		xmm0, [rsi]
+	movdqu		xmm1, [rsi+16]
+	movdqu		xmm2, [rsi+r8]
+	movdqu		xmm3, [rsi+r8+16]
+
+	punpck		wd, 0, 2, 1, 3, 4, 6
+	punpck		qdq, 0, 4, 2, 6, 1, 5
+
+	movdqa		[rdi+16*0], xmm0
+	movdqa		[rdi+16*1], xmm1
+	movdqa		[rdi+16*2], xmm4
+	movdqa		[rdi+16*3], xmm5
+
+	lea			rsi, [rsi+r8*2]
+	add			rdi, 64
+
+	dec			rcx
+	jnz			@B
+
+	pop			rdi
+	pop			rsi
+
+	ret
+	
+SwizzleBlock16u_amd64 endp
+
+;
+; SwizzleBlock8u
+;
+
+SwizzleBlock8u_amd64 proc public
+
+	push		rsi
+	push		rdi
+
+	mov			rdi, rcx
+	mov			rsi, rdx
+	mov			ecx, 2
+
+	align 16
+@@:
+	; col 0, 2
+
+	movdqu		xmm0, [rsi]
+	movdqu		xmm2, [rsi+r8]
+	lea			rsi, [rsi+r8*2]
+
+	movdqu		xmm1, [rsi]
+	movdqu		xmm3, [rsi+r8]
+	pshufd		xmm1, xmm1, 0b1h
+	pshufd		xmm3, xmm3, 0b1h
+	lea			rsi, [rsi+r8*2]
+
+	punpck		bw, 0, 2, 1, 3, 4, 6
+	punpck		wd, 0, 2, 4, 6, 1, 3
+	punpck		qdq, 0, 1, 2, 3, 4, 5
+
+	movdqa		[rdi+16*0], xmm0
+	movdqa		[rdi+16*1], xmm4
+	movdqa		[rdi+16*2], xmm1
+	movdqa		[rdi+16*3], xmm5
+
+	; col 1, 3
+
+	movdqu		xmm0, [rsi]
+	movdqu		xmm2, [rsi+r8]
+	pshufd		xmm0, xmm0, 0b1h
+	pshufd		xmm2, xmm2, 0b1h
+	lea			rsi, [rsi+r8*2]
+
+	movdqu		xmm1, [rsi]
+	movdqu		xmm3, [rsi+r8]
+	lea			rsi, [rsi+r8*2]
+
+	punpck		bw, 0, 2, 1, 3, 4, 6
+	punpck		wd, 0, 2, 4, 6, 1, 3
+	punpck		qdq, 0, 1, 2, 3, 4, 5
+
+	movdqa		[rdi+16*4], xmm0
+	movdqa		[rdi+16*5], xmm4
+	movdqa		[rdi+16*6], xmm1
+	movdqa		[rdi+16*7], xmm5
+
+	add			edi, 128
+
+	dec			rcx
+	jnz			@B
+
+	pop			rdi
+	pop			rsi
+
+	ret
+
+SwizzleBlock8u_amd64 endp
+
+;
+; SwizzleBlock4u
+;
+
+SwizzleBlock4u_amd64 proc public
+
+	push		rsi
+	push		rdi
+	
+	mov			rdi, rcx
+	mov			rsi, rdx
+	mov			rcx, 2
+
+	mov         eax, 0f0f0f0fh
+	movd        xmm7, eax 
+	pshufd      xmm7, xmm7, 0
+
+	align 16
+@@:
+	; col 0, 2
+
+	movdqu		xmm0, [rsi]
+	movdqu		xmm2, [rsi+r8]
+	lea			rsi, [rsi+r8*2]
+
+	movdqu		xmm1, [rsi]
+	movdqu		xmm3, [rsi+r8]
+	lea			rsi, [rsi+r8*2]
+
+	pshuflw		xmm1, xmm1, 0b1h
+	pshuflw		xmm3, xmm3, 0b1h
+	pshufhw		xmm1, xmm1, 0b1h
+	pshufhw		xmm3, xmm3, 0b1h
+
+	punpcknbl
+	punpck		bw, 0, 2, 4, 6, 1, 3
+	punpck		bw, 0, 2, 1, 3, 4, 6
+	punpck		qdq, 0, 4, 2, 6, 1, 3
+
+	movdqa		[rdi+16*0], xmm0
+	movdqa		[rdi+16*1], xmm1
+	movdqa		[rdi+16*2], xmm4
+	movdqa		[rdi+16*3], xmm3
+
+	; col 1, 3
+
+	movdqu		xmm0, [rsi]
+	movdqu		xmm2, [rsi+r8]
+	lea			esi, [rsi+r8*2]
+
+	movdqu		xmm1, [rsi]
+	movdqu		xmm3, [rsi+r8]
+	lea			rsi, [rsi+r8*2]
+
+	pshuflw		xmm0, xmm0, 0b1h
+	pshuflw		xmm2, xmm2, 0b1h
+	pshufhw		xmm0, xmm0, 0b1h
+	pshufhw		xmm2, xmm2, 0b1h
+
+	punpcknbl
+	punpck		bw, 0, 2, 4, 6, 1, 3
+	punpck		bw, 0, 2, 1, 3, 4, 6
+	punpck		qdq, 0, 4, 2, 6, 1, 3
+
+	movdqa		[rdi+16*4], xmm0
+	movdqa		[rdi+16*5], xmm1
+	movdqa		[rdi+16*6], xmm4
+	movdqa		[rdi+16*7], xmm3
+
+	add			rdi, 128
+
+	dec			rcx
+	jnz			@B
+
+	pop			rdi
+	pop			rsi
+
+	ret
+
+SwizzleBlock4u_amd64 endp
+
+	end
+	
\ No newline at end of file
diff --git a/gsdx/x86.cpp b/gsdx/x86.cpp
new file mode 100644
index 0000000..677250a
--- /dev/null
+++ b/gsdx/x86.cpp
@@ -0,0 +1,836 @@
+/* 
+ *	Copyright (C) 2007 Gabest
+ *	http://www.gabest.org
+ *
+ *  This Program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *   
+ *  This Program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *   
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
+ *  http://www.gnu.org/copyleft/gpl.html
+ *
+ */
+
+#include "stdafx.h"
+#include "GSTables.h"
+#include "x86.h"
+
+// unswizzling
+
+void __fastcall unSwizzleBlock32_c(BYTE* src, BYTE* dst, int dstpitch)
+{
+	const DWORD* s = &columnTable32[0][0];
+
+	for(int j = 0; j < 8; j++, s += 8, dst += dstpitch)
+		for(int i = 0; i < 8; i++)
+			((DWORD*)dst)[i] = ((DWORD*)src)[s[i]];
+}
+
+void __fastcall unSwizzleBlock16_c(BYTE* src, BYTE* dst, int dstpitch)
+{
+	const DWORD* s = &columnTable16[0][0];
+
+	for(int j = 0; j < 8; j++, s += 16, dst += dstpitch)
+		for(int i = 0; i < 16; i++)
+			((WORD*)dst)[i] = ((WORD*)src)[s[i]];
+}
+
+void __fastcall unSwizzleBlock8_c(BYTE* src, BYTE* dst, int dstpitch)
+{
+	const DWORD* s = &columnTable8[0][0];
+
+	for(int j = 0; j < 16; j++, s += 16, dst += dstpitch)
+		for(int i = 0; i < 16; i++)
+			dst[i] = src[s[i]];
+}
+
+void __fastcall unSwizzleBlock4_c(BYTE* src, BYTE* dst, int dstpitch)
+{
+	const DWORD* s = &columnTable4[0][0];
+
+	for(int j = 0; j < 16; j++, s += 32, dst += dstpitch)
+	{
+		for(int i = 0; i < 32; i++)
+		{
+			DWORD addr = s[i];
+			BYTE c = (src[addr>>1] >> ((addr&1) << 2)) & 0x0f;
+			int shift = (i&1) << 2;
+			dst[i >> 1] = (dst[i >> 1] & (0xf0 >> shift)) | (c << shift);
+		}
+	}
+}
+
+void __fastcall unSwizzleBlock8HP_c(BYTE* src, BYTE* dst, int dstpitch)
+{
+	const DWORD* s = &columnTable32[0][0];
+
+	for(int j = 0; j < 8; j++, s += 8, dst += dstpitch)
+		for(int i = 0; i < 8; i++)
+			dst[i] = (BYTE)(((DWORD*)src)[s[i]]>>24);
+}
+
+void __fastcall unSwizzleBlock4HLP_c(BYTE* src, BYTE* dst, int dstpitch)
+{
+	const DWORD* s = &columnTable32[0][0];
+
+	for(int j = 0; j < 8; j++, s += 8, dst += dstpitch)
+		for(int i = 0; i < 8; i++)
+			dst[i] = (BYTE)(((DWORD*)src)[s[i]]>>24)&0xf;
+}
+
+void __fastcall unSwizzleBlock4HHP_c(BYTE* src, BYTE* dst, int dstpitch)
+{
+	const DWORD* s = &columnTable32[0][0];
+
+	for(int j = 0; j < 8; j++, s += 8, dst += dstpitch)
+		for(int i = 0; i < 8; i++)
+			dst[i] = (BYTE)(((DWORD*)src)[s[i]]>>28);
+}
+
+void __fastcall unSwizzleBlock4P_c(BYTE* src, BYTE* dst, int dstpitch)
+{
+	const DWORD* s = &columnTable4[0][0];
+
+	for(int j = 0; j < 16; j++, s += 32, dst += dstpitch)
+	{
+		for(int i = 0; i < 32; i++)
+		{
+			DWORD addr = s[i];
+			dst[i] = (src[addr>>1] >> ((addr&1) << 2)) & 0x0f;
+		}
+	}
+}
+
+// swizzling
+
+void __fastcall SwizzleBlock32_c(BYTE* dst, BYTE* src, int srcpitch, DWORD WriteMask)
+{
+	const DWORD* d = &columnTable32[0][0];
+
+	if(WriteMask == 0xffffffff)
+	{
+		for(int j = 0; j < 8; j++, d += 8, src += srcpitch)
+			for(int i = 0; i < 8; i++)
+				((DWORD*)dst)[d[i]] = ((DWORD*)src)[i];
+	}
+	else
+	{
+		for(int j = 0; j < 8; j++, d += 8, src += srcpitch)
+			for(int i = 0; i < 8; i++)
+				((DWORD*)dst)[d[i]] = (((DWORD*)dst)[d[i]] & ~WriteMask) | (((DWORD*)src)[i] & WriteMask);
+	}
+}
+
+void __fastcall SwizzleBlock16_c(BYTE* dst, BYTE* src, int srcpitch)
+{
+	const DWORD* d = &columnTable16[0][0];
+
+	for(int j = 0; j < 8; j++, d += 16, src += srcpitch)
+		for(int i = 0; i < 16; i++)
+			((WORD*)dst)[d[i]] = ((WORD*)src)[i];
+}
+
+void __fastcall SwizzleBlock8_c(BYTE* dst, BYTE* src, int srcpitch)
+{
+	const DWORD* d = &columnTable8[0][0];
+
+	for(int j = 0; j < 16; j++, d += 16, src += srcpitch)
+		for(int i = 0; i < 16; i++)
+			dst[d[i]] = src[i];
+}
+
+void __fastcall SwizzleBlock4_c(BYTE* dst, BYTE* src, int srcpitch)
+{
+	const DWORD* d = &columnTable4[0][0];
+
+	for(int j = 0; j < 16; j++, d += 32, src += srcpitch)
+	{
+		for(int i = 0; i < 32; i++)
+		{
+			DWORD addr = d[i];
+			BYTE c = (src[i>>1] >> ((i&1) << 2)) & 0x0f;
+			DWORD shift = (addr&1) << 2;
+			dst[addr >> 1] = (dst[addr >> 1] & (0xf0 >> shift)) | (c << shift);
+		}
+	}
+}
+
+// column swizzling (TODO: sse2)
+
+void __fastcall SwizzleColumn32_c(int y, BYTE* dst, BYTE* src, int srcpitch, DWORD WriteMask)
+{
+	const DWORD* d = &columnTable32[((y/2)&3)*2][0];
+
+	if(WriteMask == 0xffffffff)
+	{
+		for(int j = 0; j < 2; j++, d += 8, src += srcpitch)
+			for(int i = 0; i < 8; i++)
+				((DWORD*)dst)[d[i]] = ((DWORD*)src)[i];
+	}
+	else
+	{
+		for(int j = 0; j < 2; j++, d += 8, src += srcpitch)
+			for(int i = 0; i < 8; i++)
+				((DWORD*)dst)[d[i]] = (((DWORD*)dst)[d[i]] & ~WriteMask) | (((DWORD*)src)[i] & WriteMask);
+	}
+}
+
+void __fastcall SwizzleColumn16_c(int y, BYTE* dst, BYTE* src, int srcpitch)
+{
+	const DWORD* d = &columnTable16[((y/2)&3)*2][0];
+
+	for(int j = 0; j < 2; j++, d += 16, src += srcpitch)
+		for(int i = 0; i < 16; i++)
+			((WORD*)dst)[d[i]] = ((WORD*)src)[i];
+}
+
+void __fastcall SwizzleColumn8_c(int y, BYTE* dst, BYTE* src, int srcpitch)
+{
+	const DWORD* d = &columnTable8[((y/4)&3)*4][0];
+
+	for(int j = 0; j < 4; j++, d += 16, src += srcpitch)
+		for(int i = 0; i < 16; i++)
+			dst[d[i]] = src[i];
+}
+
+void __fastcall SwizzleColumn4_c(int y, BYTE* dst, BYTE* src, int srcpitch)
+{
+	const DWORD* d = &columnTable4[y&(3<<2)][0]; // ((y/4)&3)*4
+
+	for(int j = 0; j < 4; j++, d += 32, src += srcpitch)
+	{
+		for(int i = 0; i < 32; i++)
+		{
+			DWORD addr = d[i];
+			BYTE c = (src[i>>1] >> ((i&1) << 2)) & 0x0f;
+			DWORD shift = (addr&1) << 2;
+			dst[addr >> 1] = (dst[addr >> 1] & (0xf0 >> shift)) | (c << shift);
+		}
+	}
+}
+
+//
+
+#if defined(_M_AMD64) || _M_IX86_FP >= 2
+
+static __m128i s_zero = _mm_setzero_si128();
+static __m128i s_bgrm = _mm_set1_epi32(0x00ffffff);
+static __m128i s_am = _mm_set1_epi32(0x00008000);
+static __m128i s_bm = _mm_set1_epi32(0x00007c00);
+static __m128i s_gm = _mm_set1_epi32(0x000003e0);
+static __m128i s_rm = _mm_set1_epi32(0x0000001f);
+
+void __fastcall ExpandBlock24_sse2(DWORD* src, DWORD* dst, int dstpitch, GIFRegTEXA* pTEXA)
+{
+	__m128i TA0 = _mm_set1_epi32((DWORD)pTEXA->TA0 << 24);
+
+	if(!pTEXA->AEM)
+	{
+		for(int j = 0; j < 8; j++, src += 8, dst += dstpitch>>2)
+		{
+			for(int i = 0; i < 8; i += 4)
+			{
+				__m128i c = _mm_load_si128((__m128i*)&src[i]);
+				c = _mm_and_si128(c, s_bgrm);
+				c = _mm_or_si128(c, TA0);
+				_mm_store_si128((__m128i*)&dst[i], c);
+			}
+		}
+	}
+	else
+	{
+		for(int j = 0; j < 8; j++, src += 8, dst += dstpitch>>2)
+		{
+			for(int i = 0; i < 8; i += 4)
+			{
+				__m128i c = _mm_load_si128((__m128i*)&src[i]);
+				c = _mm_and_si128(c, s_bgrm);
+				__m128i a = _mm_andnot_si128(_mm_cmpeq_epi32(c, s_zero), TA0);
+				c = _mm_or_si128(c, a);
+				_mm_store_si128((__m128i*)&dst[i], c);
+			}
+		}
+	}
+}
+
+void __fastcall ExpandBlock16_sse2(WORD* src, DWORD* dst, int dstpitch, GIFRegTEXA* pTEXA)
+{
+	__m128i TA0 = _mm_set1_epi32((DWORD)pTEXA->TA0 << 24);
+	__m128i TA1 = _mm_set1_epi32((DWORD)pTEXA->TA1 << 24);
+	__m128i a, b, g, r;
+
+	if(!pTEXA->AEM)
+	{
+		for(int j = 0; j < 8; j++, src += 16, dst += dstpitch>>2)
+		{
+			for(int i = 0; i < 16; i += 8)
+			{
+				__m128i c = _mm_load_si128((__m128i*)&src[i]);
+
+				__m128i cl = _mm_unpacklo_epi16(c, s_zero);
+				__m128i ch = _mm_unpackhi_epi16(c, s_zero);
+
+				__m128i alm = _mm_cmplt_epi32(cl, s_am);
+				__m128i ahm = _mm_cmplt_epi32(ch, s_am);
+
+				// lo
+
+				b = _mm_slli_epi32(_mm_and_si128(cl, s_bm), 9);
+				g = _mm_slli_epi32(_mm_and_si128(cl, s_gm), 6);
+				r = _mm_slli_epi32(_mm_and_si128(cl, s_rm), 3);
+				a = _mm_or_si128(_mm_and_si128(alm, TA0), _mm_andnot_si128(alm, TA1));
+
+				cl = _mm_or_si128(_mm_or_si128(a, b), _mm_or_si128(g, r));
+
+				_mm_store_si128((__m128i*)&dst[i], cl);
+
+				// hi
+
+				b = _mm_slli_epi32(_mm_and_si128(ch, s_bm), 9);
+				g = _mm_slli_epi32(_mm_and_si128(ch, s_gm), 6);
+				r = _mm_slli_epi32(_mm_and_si128(ch, s_rm), 3);
+				a = _mm_or_si128(_mm_and_si128(ahm, TA0), _mm_andnot_si128(ahm, TA1));
+
+				ch = _mm_or_si128(_mm_or_si128(a, b), _mm_or_si128(g, r));
+
+				_mm_store_si128((__m128i*)&dst[i+4], ch);
+			}
+		}
+	}
+	else
+	{
+		for(int j = 0; j < 8; j++, src += 16, dst += dstpitch>>2)
+		{
+			for(int i = 0; i < 16; i += 8)
+			{
+				__m128i c = _mm_load_si128((__m128i*)&src[i]);
+
+				__m128i cl = _mm_unpacklo_epi16(c, s_zero);
+				__m128i ch = _mm_unpackhi_epi16(c, s_zero);
+
+				__m128i alm = _mm_cmplt_epi32(cl, s_am);
+				__m128i ahm = _mm_cmplt_epi32(ch, s_am);
+
+				__m128i trm = _mm_cmpeq_epi16(c, s_zero);
+				__m128i trlm = _mm_unpacklo_epi16(trm, trm);
+				__m128i trhm = _mm_unpackhi_epi16(trm, trm);
+
+				// lo
+
+				b = _mm_slli_epi32(_mm_and_si128(cl, s_bm), 9);
+				g = _mm_slli_epi32(_mm_and_si128(cl, s_gm), 6);
+				r = _mm_slli_epi32(_mm_and_si128(cl, s_rm), 3);
+				a = _mm_or_si128(_mm_and_si128(alm, TA0), _mm_andnot_si128(alm, TA1));
+				a = _mm_andnot_si128(trlm, a);
+
+				cl = _mm_or_si128(_mm_or_si128(a, b), _mm_or_si128(g, r));
+
+				_mm_store_si128((__m128i*)&dst[i], cl);
+
+				// hi
+
+				b = _mm_slli_epi32(_mm_and_si128(ch, s_bm), 9);
+				g = _mm_slli_epi32(_mm_and_si128(ch, s_gm), 6);
+				r = _mm_slli_epi32(_mm_and_si128(ch, s_rm), 3);
+				a = _mm_or_si128(_mm_and_si128(ahm, TA0), _mm_andnot_si128(ahm, TA1));
+				a = _mm_andnot_si128(trhm, a);
+
+				ch = _mm_or_si128(_mm_or_si128(a, b), _mm_or_si128(g, r));
+
+				_mm_store_si128((__m128i*)&dst[i+4], ch);
+			}
+		}
+	}
+}
+
+void __fastcall Expand16_sse2(WORD* src, DWORD* dst, int w, GIFRegTEXA* pTEXA)
+{
+	ASSERT(!(w&7));
+
+	__m128i TA0 = _mm_set1_epi32((DWORD)pTEXA->TA0 << 24);
+	__m128i TA1 = _mm_set1_epi32((DWORD)pTEXA->TA1 << 24);
+	__m128i a, b, g, r;
+
+	if(!pTEXA->AEM)
+	{
+		for(int i = 0; i < w; i += 8)
+		{
+			__m128i c = _mm_load_si128((__m128i*)&src[i]);
+
+			__m128i cl = _mm_unpacklo_epi16(c, s_zero);
+			__m128i ch = _mm_unpackhi_epi16(c, s_zero);
+
+			__m128i alm = _mm_cmplt_epi32(cl, s_am);
+			__m128i ahm = _mm_cmplt_epi32(ch, s_am);
+
+			// lo
+
+			b = _mm_slli_epi32(_mm_and_si128(cl, s_bm), 9);
+			g = _mm_slli_epi32(_mm_and_si128(cl, s_gm), 6);
+			r = _mm_slli_epi32(_mm_and_si128(cl, s_rm), 3);
+			a = _mm_or_si128(_mm_and_si128(alm, TA0), _mm_andnot_si128(alm, TA1));
+
+			cl = _mm_or_si128(_mm_or_si128(a, b), _mm_or_si128(g, r));
+
+			_mm_store_si128((__m128i*)&dst[i], cl);
+
+			// hi
+
+			b = _mm_slli_epi32(_mm_and_si128(ch, s_bm), 9);
+			g = _mm_slli_epi32(_mm_and_si128(ch, s_gm), 6);
+			r = _mm_slli_epi32(_mm_and_si128(ch, s_rm), 3);
+			a = _mm_or_si128(_mm_and_si128(ahm, TA0), _mm_andnot_si128(ahm, TA1));
+
+			ch = _mm_or_si128(_mm_or_si128(a, b), _mm_or_si128(g, r));
+
+			_mm_store_si128((__m128i*)&dst[i+4], ch);
+		}
+	}
+	else
+	{
+		for(int i = 0; i < w; i += 8)
+		{
+			__m128i c = _mm_load_si128((__m128i*)&src[i]);
+
+			__m128i cl = _mm_unpacklo_epi16(c, s_zero);
+			__m128i ch = _mm_unpackhi_epi16(c, s_zero);
+
+			__m128i alm = _mm_cmplt_epi32(cl, s_am);
+			__m128i ahm = _mm_cmplt_epi32(ch, s_am);
+
+			__m128i trm = _mm_cmpeq_epi16(c, s_zero);
+			__m128i trlm = _mm_unpacklo_epi16(trm, trm);
+			__m128i trhm = _mm_unpackhi_epi16(trm, trm);
+
+			// lo
+
+			b = _mm_slli_epi32(_mm_and_si128(cl, s_bm), 9);
+			g = _mm_slli_epi32(_mm_and_si128(cl, s_gm), 6);
+			r = _mm_slli_epi32(_mm_and_si128(cl, s_rm), 3);
+			a = _mm_or_si128(_mm_and_si128(alm, TA0), _mm_andnot_si128(alm, TA1));
+			a = _mm_andnot_si128(trlm, a);
+
+			cl = _mm_or_si128(_mm_or_si128(a, b), _mm_or_si128(g, r));
+
+			_mm_store_si128((__m128i*)&dst[i], cl);
+
+			// hi
+
+			b = _mm_slli_epi32(_mm_and_si128(ch, s_bm), 9);
+			g = _mm_slli_epi32(_mm_and_si128(ch, s_gm), 6);
+			r = _mm_slli_epi32(_mm_and_si128(ch, s_rm), 3);
+			a = _mm_or_si128(_mm_and_si128(ahm, TA0), _mm_andnot_si128(ahm, TA1));
+			a = _mm_andnot_si128(trhm, a);
+
+			ch = _mm_or_si128(_mm_or_si128(a, b), _mm_or_si128(g, r));
+
+			_mm_store_si128((__m128i*)&dst[i+4], ch);
+		}
+	}
+}
+
+#endif
+
+void __fastcall ExpandBlock24_c(DWORD* src, DWORD* dst, int dstpitch, GIFRegTEXA* pTEXA)
+{
+	DWORD TA0 = (DWORD)pTEXA->TA0 << 24;
+
+	if(!pTEXA->AEM)
+	{
+		for(int j = 0; j < 8; j++, src += 8, dst += dstpitch>>2)
+			for(int i = 0; i < 8; i++)
+				dst[i] = TA0 | (src[i]&0xffffff);
+	}
+	else
+	{
+		for(int j = 0; j < 8; j++, src += 8, dst += dstpitch>>2)
+			for(int i = 0; i < 8; i++)
+				dst[i] = ((src[i]&0xffffff) ? TA0 : 0) | (src[i]&0xffffff);
+	}
+}
+
+void __fastcall ExpandBlock16_c(WORD* src, DWORD* dst, int dstpitch, GIFRegTEXA* pTEXA)
+{
+	DWORD TA0 = (DWORD)pTEXA->TA0 << 24;
+	DWORD TA1 = (DWORD)pTEXA->TA1 << 24;
+
+	if(!pTEXA->AEM)
+	{
+		for(int j = 0; j < 8; j++, src += 16, dst += dstpitch>>2)
+			for(int i = 0; i < 16; i++)
+				dst[i] = ((src[i]&0x8000) ? TA1 : TA0)
+					| ((src[i]&0x7c00) << 9) | ((src[i]&0x03e0) << 6) | ((src[i]&0x001f) << 3);
+	}
+	else
+	{
+		for(int j = 0; j < 8; j++, src += 16, dst += dstpitch>>2)
+			for(int i = 0; i < 16; i++)
+				dst[i] = ((src[i]&0x8000) ? TA1 : src[i] ? TA0 : 0)
+					| ((src[i]&0x7c00) << 9) | ((src[i]&0x03e0) << 6) | ((src[i]&0x001f) << 3);
+	}
+}
+
+void __fastcall Expand16_c(WORD* src, DWORD* dst, int w, GIFRegTEXA* pTEXA)
+{
+	DWORD TA0 = (DWORD)pTEXA->TA0 << 24;
+	DWORD TA1 = (DWORD)pTEXA->TA1 << 24;
+
+	if(!pTEXA->AEM)
+	{
+		for(int i = 0; i < w; i++)
+			dst[i] = ((src[i]&0x8000) ? TA1 : TA0)
+				| ((src[i]&0x7c00) << 9) | ((src[i]&0x03e0) << 6) | ((src[i]&0x001f) << 3);
+	}
+	else
+	{
+		for(int i = 0; i < w; i++)
+			dst[i] = ((src[i]&0x8000) ? TA1 : src[i] ? TA0 : 0)
+				| ((src[i]&0x7c00) << 9) | ((src[i]&0x03e0) << 6) | ((src[i]&0x001f) << 3);
+	}
+}
+
+//
+
+#if defined(_M_AMD64) || _M_IX86_FP >= 2
+
+static __m128 s_uvmin = _mm_set1_ps(+1e10);
+static __m128 s_uvmax = _mm_set1_ps(-1e10);
+
+void __fastcall UVMinMax_sse2(int nVertices, vertex_t* pVertices, uvmm_t* uv)
+{
+	__m128 uvmin = s_uvmin;
+	__m128 uvmax = s_uvmax;
+
+	__m128* p = (__m128*)pVertices + 1;
+
+	int i = 0;
+
+	nVertices -= 5;
+
+	for(; i < nVertices; i += 6) // 6 regs for loading, 2 regs for min/max
+	{
+		uvmin = _mm_min_ps(uvmin, p[(i+0)*2]);
+		uvmax = _mm_max_ps(uvmax, p[(i+0)*2]);
+		uvmin = _mm_min_ps(uvmin, p[(i+1)*2]);
+		uvmax = _mm_max_ps(uvmax, p[(i+1)*2]);
+		uvmin = _mm_min_ps(uvmin, p[(i+2)*2]);
+		uvmax = _mm_max_ps(uvmax, p[(i+2)*2]);
+		uvmin = _mm_min_ps(uvmin, p[(i+3)*2]);
+		uvmax = _mm_max_ps(uvmax, p[(i+3)*2]);
+		uvmin = _mm_min_ps(uvmin, p[(i+4)*2]);
+		uvmax = _mm_max_ps(uvmax, p[(i+4)*2]);
+		uvmin = _mm_min_ps(uvmin, p[(i+5)*2]);
+		uvmax = _mm_max_ps(uvmax, p[(i+5)*2]);
+	}
+
+	nVertices += 5;
+
+	for(; i < nVertices; i++)
+	{
+		uvmin = _mm_min_ps(uvmin, p[i*2]);
+		uvmax = _mm_max_ps(uvmax, p[i*2]);
+	}
+
+	_mm_storeh_pi((__m64*)uv, uvmin);
+	_mm_storeh_pi((__m64*)uv + 1, uvmax);
+}
+
+#endif
+
+void __fastcall UVMinMax_c(int nVertices, vertex_t* pVertices, uvmm_t* uv)
+{
+	uv->umin = uv->vmin = +1e10;
+	uv->umax = uv->vmax = -1e10;
+
+	for(; nVertices-- > 0; pVertices++)
+	{
+		float u = pVertices->u;
+		if(uv->umax < u) uv->umax = u;
+		if(uv->umin > u) uv->umin = u;
+		float v = pVertices->v;
+		if(uv->vmax < v) uv->vmax = v;
+		if(uv->vmin > v) uv->vmin = v;
+	}
+}
+
+#if defined(_M_AMD64) || _M_IX86_FP >= 2
+
+static __m128i s_clut[64];
+
+void __fastcall WriteCLUT_T16_I8_CSM1_sse2(WORD* vm, WORD* clut)
+{
+	__m128i* src = (__m128i*)vm;
+	__m128i* dst = (__m128i*)clut;
+
+	for(int i = 0; i < 32; i += 4)
+	{
+		__m128i r0 = _mm_load_si128(&src[i+0]);
+		__m128i r1 = _mm_load_si128(&src[i+1]);
+		__m128i r2 = _mm_load_si128(&src[i+2]);
+		__m128i r3 = _mm_load_si128(&src[i+3]);
+
+		__m128i r4 = _mm_unpacklo_epi16(r0, r1);
+		__m128i r5 = _mm_unpackhi_epi16(r0, r1);
+		__m128i r6 = _mm_unpacklo_epi16(r2, r3);
+		__m128i r7 = _mm_unpackhi_epi16(r2, r3);
+
+		r0 = _mm_unpacklo_epi32(r4, r6);
+		r1 = _mm_unpackhi_epi32(r4, r6);
+		r2 = _mm_unpacklo_epi32(r5, r7);
+		r3 = _mm_unpackhi_epi32(r5, r7);
+
+		r4 = _mm_unpacklo_epi16(r0, r1);
+		r5 = _mm_unpackhi_epi16(r0, r1);
+		r6 = _mm_unpacklo_epi16(r2, r3);
+		r7 = _mm_unpackhi_epi16(r2, r3);
+
+		_mm_store_si128(&dst[i+0], r4);
+		_mm_store_si128(&dst[i+1], r6);
+		_mm_store_si128(&dst[i+2], r5);
+		_mm_store_si128(&dst[i+3], r7);
+	}
+}
+
+void __fastcall WriteCLUT_T32_I8_CSM1_sse2(DWORD* vm, WORD* clut)
+{
+	__m128i* src = (__m128i*)vm;
+	__m128i* dst = s_clut;
+
+	for(int j = 0; j < 64; j += 32, src += 32, dst += 32)
+	{
+		for(int i = 0; i < 16; i += 4)
+		{
+			__m128i r0 = _mm_load_si128(&src[i+0]);
+			__m128i r1 = _mm_load_si128(&src[i+1]);
+			__m128i r2 = _mm_load_si128(&src[i+2]);
+			__m128i r3 = _mm_load_si128(&src[i+3]);
+
+			_mm_store_si128(&dst[i*2+0], _mm_unpacklo_epi64(r0, r1));
+			_mm_store_si128(&dst[i*2+1], _mm_unpacklo_epi64(r2, r3));
+			_mm_store_si128(&dst[i*2+2], _mm_unpackhi_epi64(r0, r1));
+			_mm_store_si128(&dst[i*2+3], _mm_unpackhi_epi64(r2, r3));
+
+			__m128i r4 = _mm_load_si128(&src[i+0+16]);
+			__m128i r5 = _mm_load_si128(&src[i+1+16]);
+			__m128i r6 = _mm_load_si128(&src[i+2+16]);
+			__m128i r7 = _mm_load_si128(&src[i+3+16]);
+
+			_mm_store_si128(&dst[i*2+4], _mm_unpacklo_epi64(r4, r5));
+			_mm_store_si128(&dst[i*2+5], _mm_unpacklo_epi64(r6, r7));
+			_mm_store_si128(&dst[i*2+6], _mm_unpackhi_epi64(r4, r5));
+			_mm_store_si128(&dst[i*2+7], _mm_unpackhi_epi64(r6, r7));
+		}
+	}
+
+	for(int i = 0; i < 32; i++)
+	{
+		__m128i r0 = s_clut[i*2];
+		__m128i r1 = s_clut[i*2+1];
+		__m128i r2 = _mm_unpacklo_epi16(r0, r1);
+		__m128i r3 = _mm_unpackhi_epi16(r0, r1);
+		r0 = _mm_unpacklo_epi16(r2, r3);
+		r1 = _mm_unpackhi_epi16(r2, r3);
+		r2 = _mm_unpacklo_epi16(r0, r1);
+		r3 = _mm_unpackhi_epi16(r0, r1);
+		_mm_store_si128(&((__m128i*)clut)[i], r2);
+		_mm_store_si128(&((__m128i*)clut)[i+32], r3);
+	}
+}
+
+void __fastcall WriteCLUT_T16_I4_CSM1_sse2(WORD* vm, WORD* clut)
+{
+	// TODO (probably not worth, _c is going to be just as fast)
+	WriteCLUT_T16_I4_CSM1_c(vm, clut);
+}
+
+void __fastcall WriteCLUT_T32_I4_CSM1_sse2(DWORD* vm, WORD* clut)
+{
+	__m128i* src = (__m128i*)vm;
+	__m128i* dst = s_clut;
+
+	__m128i r0 = _mm_load_si128(&src[0]);
+	__m128i r1 = _mm_load_si128(&src[1]);
+	__m128i r2 = _mm_load_si128(&src[2]);
+	__m128i r3 = _mm_load_si128(&src[3]);
+
+	_mm_store_si128(&dst[0], _mm_unpacklo_epi64(r0, r1));
+	_mm_store_si128(&dst[1], _mm_unpacklo_epi64(r2, r3));
+	_mm_store_si128(&dst[2], _mm_unpackhi_epi64(r0, r1));
+	_mm_store_si128(&dst[3], _mm_unpackhi_epi64(r2, r3));
+
+	for(int i = 0; i < 2; i++)
+	{
+		__m128i r0 = s_clut[i*2];
+		__m128i r1 = s_clut[i*2+1];
+		__m128i r2 = _mm_unpacklo_epi16(r0, r1);
+		__m128i r3 = _mm_unpackhi_epi16(r0, r1);
+		r0 = _mm_unpacklo_epi16(r2, r3);
+		r1 = _mm_unpackhi_epi16(r2, r3);
+		r2 = _mm_unpacklo_epi16(r0, r1);
+		r3 = _mm_unpackhi_epi16(r0, r1);
+		_mm_store_si128(&((__m128i*)clut)[i], r2);
+		_mm_store_si128(&((__m128i*)clut)[i+32], r3);
+	}
+}
+
+#endif
+
+void __fastcall WriteCLUT_T16_I8_CSM1_c(WORD* vm, WORD* clut)
+{
+	const static DWORD map[] = 
+	{
+		0, 2, 8, 10, 16, 18, 24, 26,
+		4, 6, 12, 14, 20, 22, 28, 30,
+		1, 3, 9, 11, 17, 19, 25, 27, 
+		5, 7, 13, 15, 21, 23, 29, 31
+	};
+
+	for(int j = 0; j < 8; j++, vm += 32, clut += 32) 
+	{
+		for(int i = 0; i < 32; i++)
+		{
+			clut[i] = vm[map[i]];
+		}
+	}
+}
+
+void __fastcall WriteCLUT_T32_I8_CSM1_c(DWORD* vm, WORD* clut)
+{
+	const static DWORD map[] = 
+	{
+		0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15, 
+		64, 65, 68, 69, 72, 73, 76, 77, 66, 67, 70, 71, 74, 75, 78, 79, 
+		16, 17, 20, 21, 24, 25, 28, 29, 18, 19, 22, 23, 26, 27, 30, 31, 
+		80, 81, 84, 85, 88, 89, 92, 93, 82, 83, 86, 87, 90, 91, 94, 95, 
+		32, 33, 36, 37, 40, 41, 44, 45, 34, 35, 38, 39, 42, 43, 46, 47, 
+		96, 97, 100, 101, 104, 105, 108, 109, 98, 99, 102, 103, 106, 107, 110, 111, 
+		48, 49, 52, 53, 56, 57, 60, 61, 50, 51, 54, 55, 58, 59, 62, 63, 
+		112, 113, 116, 117, 120, 121, 124, 125, 114, 115, 118, 119, 122, 123, 126, 127
+	};
+
+	for(int j = 0; j < 2; j++, vm += 128, clut += 128)
+	{
+		for(int i = 0; i < 128; i++) 
+		{
+			DWORD dw = vm[map[i]];
+			clut[i] = (WORD)(dw & 0xffff);
+			clut[i+256] = (WORD)(dw >> 16);
+		}
+	}
+}
+
+void __fastcall WriteCLUT_T16_I4_CSM1_c(WORD* vm, WORD* clut)
+{
+	const static DWORD map[] = 
+	{
+		0, 2, 8, 10, 16, 18, 24, 26,
+		4, 6, 12, 14, 20, 22, 28, 30
+	};
+
+	for(int i = 0; i < 16; i++) 
+	{
+		clut[i] = vm[map[i]];
+	}
+}
+
+void __fastcall WriteCLUT_T32_I4_CSM1_c(DWORD* vm, WORD* clut)
+{
+	const static DWORD map[] = 
+	{
+		0, 1, 4, 5, 8, 9, 12, 13,
+		2, 3, 6, 7, 10, 11, 14, 15
+	};
+
+	for(int i = 0; i < 16; i++) 
+	{
+		DWORD dw = vm[map[i]];
+		clut[i] = (WORD)(dw & 0xffff);
+		clut[i+256] = (WORD)(dw >> 16);
+	}
+}
+
+//
+
+#if defined(_M_AMD64) || _M_IX86_FP >= 2
+
+extern "C" void __fastcall ReadCLUT32_T32_I8_sse2(WORD* src, DWORD* dst)
+{
+	for(int i = 0; i < 256; i += 16)
+	{
+		ReadCLUT32_T32_I4_sse2(&src[i], &dst[i]); // going to be inlined nicely
+	}
+}
+
+extern "C" void __fastcall ReadCLUT32_T32_I4_sse2(WORD* src, DWORD* dst)
+{
+	__m128i r0 = ((__m128i*)src)[0];
+	__m128i r1 = ((__m128i*)src)[1];
+	__m128i r2 = ((__m128i*)src)[0+32];
+	__m128i r3 = ((__m128i*)src)[1+32];
+	_mm_store_si128(&((__m128i*)dst)[0], _mm_unpacklo_epi16(r0, r2));
+	_mm_store_si128(&((__m128i*)dst)[1], _mm_unpackhi_epi16(r0, r2));
+	_mm_store_si128(&((__m128i*)dst)[2], _mm_unpacklo_epi16(r1, r3));
+	_mm_store_si128(&((__m128i*)dst)[3], _mm_unpackhi_epi16(r1, r3));
+}
+
+extern "C" void __fastcall ReadCLUT32_T16_I8_sse2(WORD* src, DWORD* dst)
+{
+	for(int i = 0; i < 256; i += 16)
+	{
+		ReadCLUT32_T16_I4_sse2(&src[i], &dst[i]);
+	}
+}
+
+extern "C" void __fastcall ReadCLUT32_T16_I4_sse2(WORD* src, DWORD* dst)
+{
+	__m128i r0 = ((__m128i*)src)[0];
+	__m128i r1 = ((__m128i*)src)[1];
+	_mm_store_si128(&((__m128i*)dst)[0], _mm_unpacklo_epi16(r0, s_zero));
+	_mm_store_si128(&((__m128i*)dst)[1], _mm_unpackhi_epi16(r0, s_zero));
+	_mm_store_si128(&((__m128i*)dst)[2], _mm_unpacklo_epi16(r1, s_zero));
+	_mm_store_si128(&((__m128i*)dst)[3], _mm_unpackhi_epi16(r1, s_zero));
+}
+
+#endif
+
+void __fastcall ReadCLUT32_T32_I8_c(WORD* src, DWORD* dst)
+{
+	for(int i = 0; i < 256; i++)
+	{
+		dst[i] = ((DWORD)src[i+256] << 16) | src[i];
+	}
+}
+
+void __fastcall ReadCLUT32_T32_I4_c(WORD* src, DWORD* dst)
+{
+	for(int i = 0; i < 16; i++)
+	{
+		dst[i] = ((DWORD)src[i+256] << 16) | src[i];
+	}
+}
+
+void __fastcall ReadCLUT32_T16_I8_c(WORD* src, DWORD* dst)
+{
+	for(int i = 0; i < 256; i++)
+	{
+		dst[i] = (DWORD)src[i];
+	}
+}
+
+void __fastcall ReadCLUT32_T16_I4_c(WORD* src, DWORD* dst)
+{
+	for(int i = 0; i < 16; i++)
+	{
+		dst[i] = (DWORD)src[i];
+	}
+}
+
+//
\ No newline at end of file
diff --git a/gsdx/x86.h b/gsdx/x86.h
new file mode 100644
index 0000000..9c24ea6
--- /dev/null
+++ b/gsdx/x86.h
@@ -0,0 +1,239 @@
+/* 
+ *	Copyright (C) 2007 Gabest
+ *	http://www.gabest.org
+ *
+ *  This Program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *   
+ *  This Program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *   
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
+ *  http://www.gnu.org/copyleft/gpl.html
+ *
+ */
+
+#pragma once
+
+#include "GS.h"
+
+extern "C" void __fastcall memsetd(void* dst, unsigned int c, size_t len);
+
+extern "C" void unSwizzleBlock32_amd64(BYTE* src, BYTE* dst, __int64 dstpitch);
+extern "C" void unSwizzleBlock16_amd64(BYTE* src, BYTE* dst, __int64 dstpitch);
+extern "C" void unSwizzleBlock8_amd64(BYTE* src, BYTE* dst, __int64 dstpitch);
+extern "C" void unSwizzleBlock4_amd64(BYTE* src, BYTE* dst, __int64 dstpitch);
+extern "C" void unSwizzleBlock8HP_amd64(BYTE* src, BYTE* dst, __int64 dstpitch);
+extern "C" void unSwizzleBlock4HLP_amd64(BYTE* src, BYTE* dst, __int64 dstpitch);
+extern "C" void unSwizzleBlock4HHP_amd64(BYTE* src, BYTE* dst, __int64 dstpitch);
+extern "C" void unSwizzleBlock4P_amd64(BYTE* src, BYTE* dst, __int64 dstpitch);
+extern "C" void SwizzleBlock32_amd64(BYTE* dst, BYTE* src, __int64 srcpitch, DWORD WriteMask = 0xffffffff);
+extern "C" void SwizzleBlock16_amd64(BYTE* dst, BYTE* src, __int64 srcpitch);
+extern "C" void SwizzleBlock8_amd64(BYTE* dst, BYTE* src, __int64 srcpitch);
+extern "C" void SwizzleBlock4_amd64(BYTE* dst, BYTE* src, __int64 srcpitch);
+extern "C" void SwizzleBlock32u_amd64(BYTE* dst, BYTE* src, __int64 srcpitch, DWORD WriteMask = 0xffffffff);
+extern "C" void SwizzleBlock16u_amd64(BYTE* dst, BYTE* src, __int64 srcpitch);
+extern "C" void SwizzleBlock8u_amd64(BYTE* dst, BYTE* src, __int64 srcpitch);
+extern "C" void SwizzleBlock4u_amd64(BYTE* dst, BYTE* src, __int64 srcpitch);
+extern "C" void __fastcall unSwizzleBlock32_sse2(BYTE* src, BYTE* dst, int dstpitch);
+extern "C" void __fastcall unSwizzleBlock16_sse2(BYTE* src, BYTE* dst, int dstpitch);
+extern "C" void __fastcall unSwizzleBlock8_sse2(BYTE* src, BYTE* dst, int dstpitch);
+extern "C" void __fastcall unSwizzleBlock4_sse2(BYTE* src, BYTE* dst, int dstpitch);
+extern "C" void __fastcall unSwizzleBlock8HP_sse2(BYTE* src, BYTE* dst, int dstpitch);
+extern "C" void __fastcall unSwizzleBlock4HLP_sse2(BYTE* src, BYTE* dst, int dstpitch);
+extern "C" void __fastcall unSwizzleBlock4HHP_sse2(BYTE* src, BYTE* dst, int dstpitch);
+extern "C" void __fastcall unSwizzleBlock4P_sse2(BYTE* src, BYTE* dst, int dstpitch);
+extern "C" void __fastcall SwizzleBlock32_sse2(BYTE* dst, BYTE* src, int srcpitch, DWORD WriteMask = 0xffffffff);
+extern "C" void __fastcall SwizzleBlock16_sse2(BYTE* dst, BYTE* src, int srcpitch);
+extern "C" void __fastcall SwizzleBlock8_sse2(BYTE* dst, BYTE* src, int srcpitch);
+extern "C" void __fastcall SwizzleBlock4_sse2(BYTE* dst, BYTE* src, int srcpitch);
+extern "C" void __fastcall SwizzleBlock32u_sse2(BYTE* dst, BYTE* src, int srcpitch, DWORD WriteMask = 0xffffffff);
+extern "C" void __fastcall SwizzleBlock16u_sse2(BYTE* dst, BYTE* src, int srcpitch);
+extern "C" void __fastcall SwizzleBlock8u_sse2(BYTE* dst, BYTE* src, int srcpitch);
+extern "C" void __fastcall SwizzleBlock4u_sse2(BYTE* dst, BYTE* src, int srcpitch);
+extern void __fastcall unSwizzleBlock32_c(BYTE* src, BYTE* dst, int dstpitch);
+extern void __fastcall unSwizzleBlock16_c(BYTE* src, BYTE* dst, int dstpitch);
+extern void __fastcall unSwizzleBlock8_c(BYTE* src, BYTE* dst, int dstpitch);
+extern void __fastcall unSwizzleBlock4_c(BYTE* src, BYTE* dst, int dstpitch);
+extern void __fastcall unSwizzleBlock8HP_c(BYTE* src, BYTE* dst, int dstpitch);
+extern void __fastcall unSwizzleBlock4HLP_c(BYTE* src, BYTE* dst, int dstpitch);
+extern void __fastcall unSwizzleBlock4HHP_c(BYTE* src, BYTE* dst, int dstpitch);
+extern void __fastcall unSwizzleBlock4P_c(BYTE* src, BYTE* dst, int dstpitch);
+extern void __fastcall SwizzleBlock32_c(BYTE* dst, BYTE* src, int srcpitch, DWORD WriteMask = 0xffffffff);
+extern void __fastcall SwizzleBlock16_c(BYTE* dst, BYTE* src, int srcpitch);
+extern void __fastcall SwizzleBlock8_c(BYTE* dst, BYTE* src, int srcpitch);
+extern void __fastcall SwizzleBlock4_c(BYTE* dst, BYTE* src, int srcpitch);
+
+extern void __fastcall SwizzleColumn32_c(int y, BYTE* dst, BYTE* src, int srcpitch, DWORD WriteMask = 0xffffffff);
+extern void __fastcall SwizzleColumn16_c(int y, BYTE* dst, BYTE* src, int srcpitch);
+extern void __fastcall SwizzleColumn8_c(int y, BYTE* dst, BYTE* src, int srcpitch);
+extern void __fastcall SwizzleColumn4_c(int y, BYTE* dst, BYTE* src, int srcpitch);
+
+extern void __fastcall ExpandBlock24_sse2(DWORD* src, DWORD* dst, int dstpitch, GIFRegTEXA* pTEXA);
+extern void __fastcall ExpandBlock16_sse2(WORD* src, DWORD* dst, int dstpitch, GIFRegTEXA* pTEXA);
+extern void __fastcall Expand16_sse2(WORD* src, DWORD* dst, int w, GIFRegTEXA* pTEXA);
+extern void __fastcall ExpandBlock24_c(DWORD* src, DWORD* dst, int dstpitch, GIFRegTEXA* pTEXA);
+extern void __fastcall ExpandBlock16_c(WORD* src, DWORD* dst, int dstpitch, GIFRegTEXA* pTEXA);
+extern void __fastcall Expand16_c(WORD* src, DWORD* dst, int w, GIFRegTEXA* pTEXA);
+
+extern "C" void SaturateColor_amd64(int* c);
+extern "C" void __fastcall SaturateColor_sse2(int* c);
+extern "C" void __fastcall SaturateColor_asm(int* c);
+
+struct uvmm_t {float umin, vmin, umax, vmax;};
+struct vertex_t {float xyzw[4]; DWORD color[2]; float u, v;};
+extern "C" void __fastcall UVMinMax_sse2(int nVertices, vertex_t* pVertices, uvmm_t* uv);
+extern "C" void __fastcall UVMinMax_c(int nVertices, vertex_t* pVertices, uvmm_t* uv);
+
+extern "C" void __fastcall WriteCLUT_T16_I8_CSM1_sse2(WORD* vm, WORD* clut);
+extern "C" void __fastcall WriteCLUT_T32_I8_CSM1_sse2(DWORD* vm, WORD* clut);
+extern "C" void __fastcall WriteCLUT_T16_I4_CSM1_sse2(WORD* vm, WORD* clut);
+extern "C" void __fastcall WriteCLUT_T32_I4_CSM1_sse2(DWORD* vm, WORD* clut);
+extern void __fastcall WriteCLUT_T16_I8_CSM1_c(WORD* vm, WORD* clut);
+extern void __fastcall WriteCLUT_T32_I8_CSM1_c(DWORD* vm, WORD* clut);
+extern void __fastcall WriteCLUT_T16_I4_CSM1_c(WORD* vm, WORD* clut);
+extern void __fastcall WriteCLUT_T32_I4_CSM1_c(DWORD* vm, WORD* clut);
+
+extern "C" void __fastcall ReadCLUT32_T32_I8_sse2(WORD* src, DWORD* dst);
+extern "C" void __fastcall ReadCLUT32_T32_I4_sse2(WORD* src, DWORD* dst);
+extern "C" void __fastcall ReadCLUT32_T16_I8_sse2(WORD* src, DWORD* dst);
+extern "C" void __fastcall ReadCLUT32_T16_I4_sse2(WORD* src, DWORD* dst);
+extern void __fastcall ReadCLUT32_T32_I8_c(WORD* src, DWORD* dst);
+extern void __fastcall ReadCLUT32_T32_I4_c(WORD* src, DWORD* dst);
+extern void __fastcall ReadCLUT32_T16_I8_c(WORD* src, DWORD* dst);
+extern void __fastcall ReadCLUT32_T16_I4_c(WORD* src, DWORD* dst);
+
+#ifdef _M_AMD64
+
+#define SaturateColor SaturateColor_amd64
+
+#define unSwizzleBlock32 unSwizzleBlock32_amd64
+#define unSwizzleBlock16 unSwizzleBlock16_amd64
+#define unSwizzleBlock8 unSwizzleBlock8_amd64
+#define unSwizzleBlock4 unSwizzleBlock4_amd64
+#define unSwizzleBlock8HP unSwizzleBlock8HP_amd64
+#define unSwizzleBlock4HLP unSwizzleBlock4HLP_amd64
+#define unSwizzleBlock4HHP unSwizzleBlock4HHP_amd64
+#define unSwizzleBlock4P unSwizzleBlock4P_amd64
+#define SwizzleBlock32 SwizzleBlock32_amd64
+#define SwizzleBlock16 SwizzleBlock16_amd64
+#define SwizzleBlock8 SwizzleBlock8_amd64
+#define SwizzleBlock4 SwizzleBlock4_amd64
+#define SwizzleBlock32u SwizzleBlock32u_amd64
+#define SwizzleBlock16u SwizzleBlock16u_amd64
+#define SwizzleBlock8u SwizzleBlock8u_amd64
+#define SwizzleBlock4u SwizzleBlock4u_amd64
+
+#define SwizzleColumn32 SwizzleColumn32_c
+#define SwizzleColumn16 SwizzleColumn16_c
+#define SwizzleColumn8 SwizzleColumn8_c
+#define SwizzleColumn4 SwizzleColumn4_c
+
+#define ExpandBlock24 ExpandBlock24_sse2
+#define ExpandBlock16 ExpandBlock16_sse2
+#define Expand16 Expand16_sse2
+
+#define UVMinMax UVMinMax_sse2
+
+#define WriteCLUT_T16_I8_CSM1 WriteCLUT_T16_I8_CSM1_sse2
+#define WriteCLUT_T32_I8_CSM1 WriteCLUT_T32_I8_CSM1_sse2
+#define WriteCLUT_T16_I4_CSM1 WriteCLUT_T16_I4_CSM1_sse2
+#define WriteCLUT_T32_I4_CSM1 WriteCLUT_T32_I4_CSM1_sse2
+
+#define ReadCLUT32_T32_I8 ReadCLUT32_T32_I8_sse2
+#define ReadCLUT32_T32_I4 ReadCLUT32_T32_I4_sse2
+#define ReadCLUT32_T16_I8 ReadCLUT32_T16_I8_sse2
+#define ReadCLUT32_T16_I4 ReadCLUT32_T16_I4_sse2
+
+#elif _M_IX86_FP >= 2
+
+#define SaturateColor SaturateColor_sse2
+
+#define unSwizzleBlock32 unSwizzleBlock32_sse2
+#define unSwizzleBlock16 unSwizzleBlock16_sse2
+#define unSwizzleBlock8 unSwizzleBlock8_sse2
+#define unSwizzleBlock4 unSwizzleBlock4_sse2
+#define unSwizzleBlock8HP unSwizzleBlock8HP_sse2
+#define unSwizzleBlock4HLP unSwizzleBlock4HLP_sse2
+#define unSwizzleBlock4HHP unSwizzleBlock4HHP_sse2
+#define unSwizzleBlock4P unSwizzleBlock4P_sse2
+#define SwizzleBlock32 SwizzleBlock32_sse2
+#define SwizzleBlock16 SwizzleBlock16_sse2
+#define SwizzleBlock8 SwizzleBlock8_sse2
+#define SwizzleBlock4 SwizzleBlock4_sse2
+#define SwizzleBlock32u SwizzleBlock32u_sse2
+#define SwizzleBlock16u SwizzleBlock16u_sse2
+#define SwizzleBlock8u SwizzleBlock8u_sse2
+#define SwizzleBlock4u SwizzleBlock4u_sse2
+
+#define SwizzleColumn32 SwizzleColumn32_c
+#define SwizzleColumn16 SwizzleColumn16_c
+#define SwizzleColumn8 SwizzleColumn8_c
+#define SwizzleColumn4 SwizzleColumn4_c
+#define SwizzleColumn4h SwizzleColumn4h_c
+
+#define ExpandBlock24 ExpandBlock24_sse2
+#define ExpandBlock16 ExpandBlock16_sse2
+#define Expand16 Expand16_sse2
+
+#define UVMinMax UVMinMax_sse2
+
+#define WriteCLUT_T16_I8_CSM1 WriteCLUT_T16_I8_CSM1_sse2
+#define WriteCLUT_T32_I8_CSM1 WriteCLUT_T32_I8_CSM1_sse2
+#define WriteCLUT_T16_I4_CSM1 WriteCLUT_T16_I4_CSM1_sse2
+#define WriteCLUT_T32_I4_CSM1 WriteCLUT_T32_I4_CSM1_sse2
+
+#define ReadCLUT32_T32_I8 ReadCLUT32_T32_I8_sse2
+#define ReadCLUT32_T32_I4 ReadCLUT32_T32_I4_sse2
+#define ReadCLUT32_T16_I8 ReadCLUT32_T16_I8_sse2
+#define ReadCLUT32_T16_I4 ReadCLUT32_T16_I4_sse2
+
+#else
+
+#define SaturateColor SaturateColor_asm
+
+#define unSwizzleBlock32 unSwizzleBlock32_c
+#define unSwizzleBlock16 unSwizzleBlock16_c
+#define unSwizzleBlock8 unSwizzleBlock8_c
+#define unSwizzleBlock4 unSwizzleBlock4_c
+#define unSwizzleBlock8HP unSwizzleBlock8HP_c
+#define unSwizzleBlock4HLP unSwizzleBlock4HLP_c
+#define unSwizzleBlock4HHP unSwizzleBlock4HHP_c
+#define unSwizzleBlock4P unSwizzleBlock4P_c
+#define SwizzleBlock32 SwizzleBlock32_c
+#define SwizzleBlock16 SwizzleBlock16_c
+#define SwizzleBlock8 SwizzleBlock8_c
+#define SwizzleBlock4 SwizzleBlock4_c
+#define SwizzleBlock32u SwizzleBlock32_c
+#define SwizzleBlock16u SwizzleBlock16_c
+#define SwizzleBlock8u SwizzleBlock8_c
+#define SwizzleBlock4u SwizzleBlock4_c
+
+#define SwizzleColumn32 SwizzleColumn32_c
+#define SwizzleColumn16 SwizzleColumn16_c
+#define SwizzleColumn8 SwizzleColumn8_c
+#define SwizzleColumn4 SwizzleColumn4_c
+
+#define ExpandBlock24 ExpandBlock24_c
+#define ExpandBlock16 ExpandBlock16_c
+#define Expand16 Expand16_c
+
+#define UVMinMax UVMinMax_c
+
+#define WriteCLUT_T16_I8_CSM1 WriteCLUT_T16_I8_CSM1_c
+#define WriteCLUT_T32_I8_CSM1 WriteCLUT_T32_I8_CSM1_c
+#define WriteCLUT_T16_I4_CSM1 WriteCLUT_T16_I4_CSM1_c
+#define WriteCLUT_T32_I4_CSM1 WriteCLUT_T32_I4_CSM1_c
+
+#define ReadCLUT32_T32_I8 ReadCLUT32_T32_I8_c
+#define ReadCLUT32_T32_I4 ReadCLUT32_T32_I4_c
+#define ReadCLUT32_T16_I8 ReadCLUT32_T16_I8_c
+#define ReadCLUT32_T16_I4 ReadCLUT32_T16_I4_c
+
+#endif
diff --git a/gsdx10/GS.cpp b/gsdx10/GS.cpp
new file mode 100644
index 0000000..4a5b36b
--- /dev/null
+++ b/gsdx10/GS.cpp
@@ -0,0 +1,29 @@
+/* 
+ *	Copyright (C) 2007 Gabest
+ *	http://www.gabest.org
+ *
+ *  This Program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *   
+ *  This Program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *   
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
+ *  http://www.gnu.org/copyleft/gpl.html
+ *
+ */
+
+#include "stdafx.h"
+#include "GSdx10.h"
+#include "GS.h"
+#include "GSRendererHW.h"
+#include "GSRendererSW.h"
+#include "GSRendererNull.h"
+#include "GSSettingsDlg.h"
+
diff --git a/gsdx10/GSDepthStencil.cpp b/gsdx10/GSDepthStencil.cpp
new file mode 100644
index 0000000..7b977cc
--- /dev/null
+++ b/gsdx10/GSDepthStencil.cpp
@@ -0,0 +1,47 @@
+/* 
+ *	Copyright (C) 2007 Gabest
+ *	http://www.gabest.org
+ *
+ *  This Program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *   
+ *  This Program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *   
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
+ *  http://www.gnu.org/copyleft/gpl.html
+ *
+ */
+
+#include "stdafx.h"
+#include "GSTextureCache.h"
+#include "GSRendererHW.h"
+
+GSTextureCache::GSDepthStencil::GSDepthStencil(GSTextureCache* tc)
+	: GSSurface(tc)
+	, m_used(false)
+{
+}
+
+bool GSTextureCache::GSDepthStencil::Create(int w, int h)
+{
+	HRESULT hr;
+
+	hr = m_tc->m_renderer->m_dev.CreateDepthStencil(m_texture, w, h);
+
+	return SUCCEEDED(hr);
+}
+
+void GSTextureCache::GSDepthStencil::Update()
+{
+	__super::Update();
+
+	// TODO: dx 10.1 could update ds
+}
+
diff --git a/gsdx10/GSDevice.cpp b/gsdx10/GSDevice.cpp
new file mode 100644
index 0000000..5b75e4b
--- /dev/null
+++ b/gsdx10/GSDevice.cpp
@@ -0,0 +1,775 @@
+/* 
+ *	Copyright (C) 2007 Gabest
+ *	http://www.gabest.org
+ *
+ *  This Program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *   
+ *  This Program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *   
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
+ *  http://www.gnu.org/copyleft/gpl.html
+ *
+ */
+
+#include "stdafx.h"
+#include "GSDevice.h"
+#include "resource.h"
+
+GSDevice::GSDevice()
+	: m_vb(NULL)
+	, m_vb_stride(0)
+	, m_layout(NULL)
+	, m_topology(D3D10_PRIMITIVE_TOPOLOGY_UNDEFINED)
+	, m_vs(NULL)
+	, m_vs_cb(NULL)
+	, m_gs(NULL)
+	, m_ps(NULL)
+	, m_ps_ss(NULL)
+	, m_scissor(0, 0, 0, 0)
+	, m_viewport(0, 0)
+	, m_dss(NULL)
+	, m_sref(0)
+	, m_bs(NULL)
+	, m_bf(-1)
+	, m_rtv(NULL)
+	, m_dsv(NULL)
+{
+	memset(m_ps_srvs, 0, sizeof(m_ps_srvs));
+}
+
+GSDevice::~GSDevice()
+{
+}
+
+bool GSDevice::Create(HWND hWnd)
+{
+	HRESULT hr;
+
+	DXGI_SWAP_CHAIN_DESC scd;
+
+	memset(&scd, 0, sizeof(scd));
+
+    scd.BufferCount = 2;
+    scd.BufferDesc.Width = 1;
+    scd.BufferDesc.Height = 1;
+    scd.BufferDesc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
+    // scd.BufferDesc.RefreshRate.Numerator = 60;
+    // scd.BufferDesc.RefreshRate.Denominator = 1;
+    scd.BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT;
+    scd.OutputWindow = hWnd;
+    scd.SampleDesc.Count = 1;
+    scd.SampleDesc.Quality = 0;
+    scd.Windowed = TRUE;
+
+	UINT flags = 0;
+
+#ifdef DEBUG
+	flags |= D3D10_CREATE_DEVICE_DEBUG;
+#endif
+
+	hr = D3D10CreateDeviceAndSwapChain(NULL, D3D10_DRIVER_TYPE_HARDWARE, NULL, flags, D3D10_SDK_VERSION, &scd, &m_swapchain, &m_dev);
+
+	if(FAILED(hr)) return false;
+
+	D3D10_BUFFER_DESC bd;
+	D3D10_SAMPLER_DESC sd;
+	D3D10_DEPTH_STENCIL_DESC dsd;
+    D3D10_RASTERIZER_DESC rd;
+	D3D10_BLEND_DESC bsd;
+
+	// convert
+
+	D3D10_INPUT_ELEMENT_DESC il_convert[] =
+	{
+		{"POSITION", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, 0, D3D10_INPUT_PER_VERTEX_DATA, 0},
+		{"TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 16, D3D10_INPUT_PER_VERTEX_DATA, 0},
+	};
+
+	hr = CompileShader(&m_convert.vs, IDR_CONVERT_FX, "vs_main", il_convert, countof(il_convert), &m_convert.il);
+
+	for(int i = 0; i < countof(m_convert.ps); i++)
+	{
+		CStringA main;
+		main.Format("ps_main%d", i);
+		hr = CompileShader(&m_convert.ps[i], IDR_CONVERT_FX, main);
+	}
+
+	memset(&bd, 0, sizeof(bd));
+
+	bd.Usage = D3D10_USAGE_DEFAULT;
+	bd.BindFlags = D3D10_BIND_VERTEX_BUFFER;
+	bd.CPUAccessFlags = 0;
+	bd.MiscFlags = 0;
+	bd.ByteWidth = 4 * sizeof(VertexPT1);
+
+	hr = m_dev->CreateBuffer(&bd, NULL, &m_convert.vb);
+
+	memset(&dsd, 0, sizeof(dsd));
+
+	dsd.DepthEnable = false;
+	dsd.StencilEnable = false;
+
+	hr = m_dev->CreateDepthStencilState(&dsd, &m_convert.dss);
+
+	memset(&bsd, 0, sizeof(bsd));
+
+	bsd.RenderTargetWriteMask[0] = D3D10_COLOR_WRITE_ENABLE_ALL;
+	bsd.BlendEnable[0] = false;
+
+	hr = m_dev->CreateBlendState(&bsd, &m_convert.bs);
+
+	// merge
+
+	D3D10_INPUT_ELEMENT_DESC il_merge[] =
+	{
+		{"POSITION", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, 0, D3D10_INPUT_PER_VERTEX_DATA, 0},
+		{"TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 16, D3D10_INPUT_PER_VERTEX_DATA, 0},
+		{"TEXCOORD", 1, DXGI_FORMAT_R32G32_FLOAT, 0, 24, D3D10_INPUT_PER_VERTEX_DATA, 0},
+	};
+
+	hr = CompileShader(&m_merge.vs, IDR_MERGE_FX, "vs_main", il_merge, countof(il_merge), &m_merge.il);
+	hr = CompileShader(&m_merge.ps, IDR_MERGE_FX, "ps_main");
+
+	memset(&bd, 0, sizeof(bd));
+
+    bd.ByteWidth = sizeof(MergeCB);
+    bd.Usage = D3D10_USAGE_DYNAMIC; // TODO: default
+    bd.BindFlags = D3D10_BIND_CONSTANT_BUFFER;
+    bd.CPUAccessFlags = D3D10_CPU_ACCESS_WRITE;
+    bd.MiscFlags = 0;
+
+    hr = m_dev->CreateBuffer(&bd, NULL, &m_merge.cb);
+
+	memset(&bd, 0, sizeof(bd));
+
+	bd.Usage = D3D10_USAGE_DEFAULT;
+	bd.BindFlags = D3D10_BIND_VERTEX_BUFFER;
+	bd.CPUAccessFlags = 0;
+	bd.MiscFlags = 0;
+	bd.ByteWidth = 4 * sizeof(VertexPT2);
+
+	hr = m_dev->CreateBuffer(&bd, NULL, &m_merge.vb);
+
+	// interlace
+
+	memset(&bd, 0, sizeof(bd));
+
+    bd.ByteWidth = sizeof(InterlaceCB);
+    bd.Usage = D3D10_USAGE_DEFAULT;
+    bd.BindFlags = D3D10_BIND_CONSTANT_BUFFER;
+    bd.CPUAccessFlags = 0;
+    bd.MiscFlags = 0;
+
+    hr = m_dev->CreateBuffer(&bd, NULL, &m_interlace.cb);
+
+	for(int i = 0; i < countof(m_interlace.ps); i++)
+	{
+		CStringA main;
+		main.Format("ps_main%d", i);
+		hr = CompileShader(&m_interlace.ps[i], IDR_INTERLACE_FX, main);
+	}
+
+	//
+
+	memset(&rd, 0, sizeof(rd));
+
+	rd.FillMode = D3D10_FILL_SOLID;
+	rd.CullMode = D3D10_CULL_NONE;
+	rd.FrontCounterClockwise = false;
+	rd.DepthBias = false;
+	rd.DepthBiasClamp = 0;
+	rd.SlopeScaledDepthBias = 0;
+	rd.DepthClipEnable = false; // ???
+	rd.ScissorEnable = true;
+	rd.MultisampleEnable = false;
+	rd.AntialiasedLineEnable = false;
+
+	hr = m_dev->CreateRasterizerState(&rd, &m_rs);
+
+	m_dev->RSSetState(m_rs);
+
+	//
+
+	memset(&sd, 0, sizeof(sd));
+
+	sd.AddressU = D3D10_TEXTURE_ADDRESS_CLAMP;
+	sd.AddressV = D3D10_TEXTURE_ADDRESS_CLAMP;
+	sd.AddressW = D3D10_TEXTURE_ADDRESS_CLAMP;
+	sd.MaxLOD = FLT_MAX;
+	sd.MaxAnisotropy = 16; 
+	sd.ComparisonFunc = D3D10_COMPARISON_NEVER;
+
+	sd.Filter = D3D10_FILTER_MIN_MAG_MIP_LINEAR;
+
+	hr = m_dev->CreateSamplerState(&sd, &m_ss_linear);
+
+	sd.Filter = D3D10_FILTER_MIN_MAG_MIP_POINT;
+
+	hr = m_dev->CreateSamplerState(&sd, &m_ss_point);
+
+	//
+
+	ResetDevice(1, 1);
+
+	//
+
+	return true;
+}
+
+void GSDevice::ResetDevice(int w, int h)
+{
+	m_backbuffer = NULL;
+
+	m_tex_1x1 = GSTexture2D();
+	m_tex_merge = GSTexture2D();
+	m_tex_interlace = GSTexture2D();
+	m_tex_deinterlace = GSTexture2D();
+	m_tex_current = GSTexture2D();
+
+	m_vb = NULL;
+	m_layout = NULL;
+
+	//
+
+	DXGI_SWAP_CHAIN_DESC scd;
+	memset(&scd, 0, sizeof(scd));
+	m_swapchain->GetDesc(&scd);
+	m_swapchain->ResizeBuffers(scd.BufferCount, w, h, scd.BufferDesc.Format, 0);
+	m_swapchain->GetBuffer(0, __uuidof(ID3D10Texture2D), (void**)&m_backbuffer);
+
+	//
+
+	CreateTexture(m_tex_1x1, 1, 1);
+}
+
+void GSDevice::Present()
+{
+	m_swapchain->Present(0, 0);
+}
+
+void GSDevice::EndScene()
+{
+	PSSetShaderResources(NULL, NULL);
+
+	OMSetRenderTargets(NULL, NULL);
+}
+
+void GSDevice::IASet(ID3D10Buffer* vb, UINT count, const void* vertices, UINT stride, ID3D10InputLayout* layout, D3D10_PRIMITIVE_TOPOLOGY topology)
+{
+	D3D10_BOX box = {0, 0, 0, count * stride, 1, 1};
+
+	m_dev->UpdateSubresource(vb, 0, &box, vertices, 0, 0);
+
+	if(m_vb != vb || m_vb_stride != stride)
+	{
+		UINT offset = 0;
+
+		m_dev->IASetVertexBuffers(0, 1, &vb, &stride, &offset);
+
+		m_vb = vb;
+		m_vb_stride = stride;
+	}
+
+	if(m_layout != layout)
+	{
+		m_dev->IASetInputLayout(layout);
+
+		m_layout = layout;
+	}
+
+	if(m_topology != topology)
+	{
+		m_dev->IASetPrimitiveTopology(topology);
+
+		m_topology = topology;
+	}
+}
+
+void GSDevice::VSSet(ID3D10VertexShader* vs, ID3D10Buffer* vs_cb)
+{
+	if(m_vs != vs)
+	{
+		m_dev->VSSetShader(vs);
+
+		m_vs = vs;
+	}
+	
+	if(m_vs_cb != vs_cb)
+	{
+		m_dev->VSSetConstantBuffers(0, 1, &vs_cb);
+
+		m_vs_cb = vs_cb;
+	}
+}
+
+void GSDevice::GSSet(ID3D10GeometryShader* gs)
+{
+	if(m_gs != gs)
+	{
+		m_dev->GSSetShader(gs);
+
+		m_gs = gs;
+	}
+}
+
+void GSDevice::PSSetShaderResources(ID3D10ShaderResourceView* srv0, ID3D10ShaderResourceView* srv1)
+{
+	if(m_ps_srvs[0] != srv0 || m_ps_srvs[1] != srv1)
+	{
+		ID3D10ShaderResourceView* srvs[] = {srv0, srv1};
+	
+		m_dev->PSSetShaderResources(0, 2, srvs);
+
+		m_ps_srvs[0] = srv0;
+		m_ps_srvs[1] = srv1;
+	}
+}
+
+void GSDevice::PSSet(ID3D10PixelShader* ps, ID3D10SamplerState* ss)
+{
+	if(m_ps != ps)
+	{
+		m_dev->PSSetShader(ps);
+
+		m_ps = ps;
+	}
+
+	// ss = m_ss_point;
+
+	if(m_ps_ss != ss)
+	{
+		m_dev->PSSetSamplers(0, 1, &ss);
+
+		m_ps_ss = ss;
+	}
+}
+
+void GSDevice::RSSet(int width, int height, const RECT* scissor)
+{
+	if(m_viewport.cx != width || m_viewport.cy != height)
+	{
+		D3D10_VIEWPORT vp;
+
+		memset(&vp, 0, sizeof(vp));
+		
+		vp.TopLeftX = 0;
+		vp.TopLeftY = 0;
+		vp.Width = width;
+		vp.Height = height;
+		vp.MinDepth = 0.0f;
+		vp.MaxDepth = 1.0f;
+
+		m_dev->RSSetViewports(1, &vp);
+
+		m_viewport = CSize(width, height);
+	}
+
+	CRect r = scissor ? *scissor : CRect(0, 0, width, height);
+
+	if(m_scissor != r)
+	{
+		m_dev->RSSetScissorRects(1, &r);
+
+		m_scissor = r;
+	}
+}
+
+void GSDevice::OMSet(ID3D10DepthStencilState* dss, UINT sref, ID3D10BlendState* bs, float bf)
+{
+	if(m_dss != dss || m_sref != sref)
+	{
+		m_dev->OMSetDepthStencilState(dss, sref);
+
+		m_dss = dss;
+		m_sref = sref;
+	}
+
+	if(m_bs != bs || m_bf != bf)
+	{
+		float BlendFactor[] = {bf, bf, bf, 0};
+
+		m_dev->OMSetBlendState(bs, BlendFactor, 0xffffffff);
+
+		m_bs = bs;
+		m_bf = bf;
+	}
+}
+
+void GSDevice::OMSetRenderTargets(ID3D10RenderTargetView* rtv, ID3D10DepthStencilView* dsv)
+{
+	if(m_rtv != rtv || m_dsv != dsv)
+	{
+		m_dev->OMSetRenderTargets(1, &rtv, dsv);
+
+		m_rtv = rtv;
+		m_dsv = dsv;
+	}
+}
+
+HRESULT GSDevice::CreateRenderTarget(GSTexture2D& t, int w, int h, DXGI_FORMAT format)
+{
+	return Create(t, w, h, format, D3D10_USAGE_DEFAULT, D3D10_BIND_RENDER_TARGET | D3D10_BIND_SHADER_RESOURCE);
+}
+
+HRESULT GSDevice::CreateDepthStencil(GSTexture2D& t, int w, int h, DXGI_FORMAT format)
+{
+	return Create(t, w, h, format, D3D10_USAGE_DEFAULT, D3D10_BIND_DEPTH_STENCIL);
+}
+
+HRESULT GSDevice::CreateTexture(GSTexture2D& t, int w, int h, DXGI_FORMAT format)
+{
+	return Create(t, w, h, format, D3D10_USAGE_DEFAULT, D3D10_BIND_SHADER_RESOURCE);
+}
+
+HRESULT GSDevice::CreateOffscreenPlainSurface(GSTexture2D& t, int w, int h, DXGI_FORMAT format)
+{
+	return Create(t, w, h, format, D3D10_USAGE_STAGING, 0);
+}
+
+HRESULT GSDevice::Create(GSTexture2D& t, int w, int h, DXGI_FORMAT format, D3D10_USAGE usage, UINT bindFlags)
+{
+	HRESULT hr;
+
+	Recycle(t);
+
+	for(POSITION pos = m_pool.GetHeadPosition(); pos; m_pool.GetNext(pos))
+	{
+		const GSTexture2D& t2 = m_pool.GetAt(pos);
+
+		if(t2.m_desc.Usage == usage && t2.m_desc.BindFlags == bindFlags && t2.m_desc.Width == w && t2.m_desc.Height == h && t2.m_desc.Format == format)
+		{
+			t = t2;
+
+			m_pool.RemoveAt(pos);
+
+			return S_OK;
+		}
+	}
+
+	D3D10_TEXTURE2D_DESC desc;
+
+	memset(&desc, 0, sizeof(desc));
+
+	desc.Width = w;
+	desc.Height = h;
+	desc.Format = format;
+	desc.MipLevels = 1;
+	desc.ArraySize = 1;
+	desc.SampleDesc.Count = 1;
+	desc.SampleDesc.Quality = 0;
+	desc.Usage = usage;
+	desc.BindFlags = bindFlags;
+	desc.CPUAccessFlags = 
+		usage == D3D10_USAGE_STAGING ? (D3D10_CPU_ACCESS_READ | D3D10_CPU_ACCESS_WRITE) : 
+		usage == D3D10_USAGE_DYNAMIC ? (D3D10_CPU_ACCESS_WRITE) : 
+		0;
+
+	CComPtr<ID3D10Texture2D> texture;
+
+	hr = m_dev->CreateTexture2D(&desc, NULL, &texture);
+
+	if(SUCCEEDED(hr))
+	{
+		t.m_dev = m_dev;
+		t.m_texture = texture.Detach();
+		t.m_desc = desc;
+	}
+
+//_tprintf(_T("Create %d x %d (%d %d %d) => %08x (%d)\n"), w, h, usage, bindFlags, format, hr, m_pool.GetCount());
+
+	return hr;
+}
+
+void GSDevice::Recycle(GSTexture2D& t)
+{
+	if(t.m_texture)
+	{
+		m_pool.AddHead(t);
+
+		while(m_pool.GetCount() > 200)
+		{
+//_tprintf(_T("Destroy %d x %d (%d)\n"), m_pool.GetTail().m_desc.Width, m_pool.GetTail().m_desc.Height, m_pool.GetCount());
+			m_pool.RemoveTail();
+		}
+
+		t = GSTexture2D();
+	}
+}
+
+bool GSDevice::SaveCurrent(LPCTSTR fn)
+{
+	return SUCCEEDED(D3DX10SaveTextureToFile(m_tex_current, D3DX10_IFF_BMP, fn));
+}
+
+bool GSDevice::SaveToFileD32S8X24(ID3D10Texture2D* ds, LPCTSTR fn)
+{
+	HRESULT hr;
+
+	D3D10_TEXTURE2D_DESC desc;
+
+	memset(&desc, 0, sizeof(desc));
+
+	ds->GetDesc(&desc);
+
+	desc.Usage = D3D10_USAGE_STAGING;
+	desc.BindFlags = 0;
+	desc.CPUAccessFlags = D3D10_CPU_ACCESS_READ;
+
+	CComPtr<ID3D10Texture2D> src, dst;
+
+	hr = m_dev->CreateTexture2D(&desc, NULL, &src);
+
+	m_dev->CopyResource(src, ds);
+
+	desc.Format = DXGI_FORMAT_R8G8B8A8_UNORM;
+	desc.CPUAccessFlags = D3D10_CPU_ACCESS_WRITE;
+
+	hr = m_dev->CreateTexture2D(&desc, NULL, &dst);
+
+	D3D10_MAPPED_TEXTURE2D sm, dm;
+
+	hr = src->Map(0, D3D10_MAP_READ, 0, &sm);
+	
+	hr = dst->Map(0, D3D10_MAP_WRITE, 0, &dm);
+
+	BYTE* s = (BYTE*)sm.pData;
+	BYTE* d = (BYTE*)dm.pData;
+
+	for(int y = 0; y < desc.Height; y++, s += sm.RowPitch, d += dm.RowPitch)
+	{
+		float* sf = (float*)s;
+		DWORD* dd = (DWORD*)d;
+
+		for(int x = 0; x < desc.Width; x++)
+		{
+			BYTE b = (BYTE)(sf[x*2] * 255);
+
+			dd[x] = (b << 24) | (b << 16) | (b << 8) | 0xff;
+		}
+	}
+
+	src->Unmap(0);
+
+	dst->Unmap(0);
+
+	return SUCCEEDED(D3DX10SaveTextureToFile(dst, D3DX10_IFF_BMP, fn));
+}
+
+void GSDevice::StretchRect(GSTexture2D& st, GSTexture2D& dt, const D3DXVECTOR4& dr, bool linear)
+{
+	StretchRect(st, D3DXVECTOR4(0, 0, 1, 1), dt, dr, m_convert.ps[0], linear);
+}
+
+void GSDevice::StretchRect(GSTexture2D& st, const D3DXVECTOR4& sr, GSTexture2D& dt, const D3DXVECTOR4& dr, bool linear)
+{
+	StretchRect(st, sr, dt, dr, m_convert.ps[0], linear);
+}
+
+void GSDevice::StretchRect(GSTexture2D& st, const D3DXVECTOR4& sr, GSTexture2D& dt, const D3DXVECTOR4& dr, ID3D10PixelShader* ps, bool linear)
+{
+	// om
+
+	OMSet(m_convert.dss, 0, m_convert.bs, 0);
+
+	OMSetRenderTargets(dt, NULL);
+
+	// ia
+
+	float left = dr.x * 2 / dt.m_desc.Width - 1.0f;
+	float top = 1.0f - dr.y * 2 / dt.m_desc.Height;
+	float right = dr.z * 2 / dt.m_desc.Width - 1.0f;
+	float bottom = 1.0f - dr.w * 2 / dt.m_desc.Height;
+
+	VertexPT1 vertices[] =
+	{
+		{left, top, 0.5f, 1.0f, sr.x, sr.y},
+		{right, top, 0.5f, 1.0f, sr.z, sr.y},
+		{left, bottom, 0.5f, 1.0f, sr.x, sr.w},
+		{right, bottom, 0.5f, 1.0f, sr.z, sr.w},
+	};
+
+	IASet(m_convert.vb, 4, vertices, m_convert.il, D3D10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
+
+	// vs
+
+	VSSet(m_convert.vs, NULL);
+
+	// gs
+
+	GSSet(NULL);
+
+	// ps
+
+	PSSetShaderResources(st, NULL);
+
+	PSSet(ps, linear ? m_ss_linear : m_ss_point);
+
+	// rs
+
+	RSSet(dt.m_desc.Width, dt.m_desc.Height);
+
+	//
+
+	m_dev->Draw(4, 0);
+
+	EndScene();
+}
+
+void GSDevice::Interlace(GSTexture2D& st, GSTexture2D& dt, int shader, bool linear, float yoffset)
+{
+	InterlaceCB cb;
+
+	cb.ZrH = D3DXVECTOR2(0, 1.0f / dt.m_desc.Height);
+	cb.hH = (float)dt.m_desc.Height / 2;
+
+	m_dev->UpdateSubresource(m_interlace.cb, 0, NULL, &cb, 0, 0);
+	
+	m_dev->PSSetConstantBuffers(0, 1, &m_interlace.cb.p);
+
+	D3DXVECTOR4 sr(0, 0, 1, 1);
+	D3DXVECTOR4 dr(0, yoffset, (float)dt.m_desc.Width, (float)dt.m_desc.Height + yoffset);
+
+	StretchRect(st, sr, dt, dr, m_interlace.ps[shader], linear);
+}
+
+ID3D10Texture2D* GSDevice::Interlace(GSTexture2D& st, CSize ds, int field, int mode, float yoffset)
+{
+	ID3D10Texture2D* t = st;
+
+	if(!m_tex_interlace || m_tex_interlace.m_desc.Width != ds.cx || m_tex_interlace.m_desc.Height != ds.cy)
+	{
+		CreateRenderTarget(m_tex_interlace, ds.cx, ds.cy);
+	}
+
+	if(mode == 0 || mode == 2) // weave or blend
+	{
+		// weave first
+
+		Interlace(m_tex_merge, m_tex_interlace, field, false);
+
+		t = m_tex_interlace;
+
+		if(mode == 2)
+		{
+			// blend
+
+			if(!m_tex_deinterlace || m_tex_deinterlace.m_desc.Width != ds.cx || m_tex_deinterlace.m_desc.Height != ds.cy)
+			{
+				CreateRenderTarget(m_tex_deinterlace, ds.cx, ds.cy);
+			}
+
+			if(field == 0) return NULL;
+
+			Interlace(m_tex_interlace, m_tex_deinterlace, 2, false);
+
+			t = m_tex_deinterlace;
+		}
+	}
+	else if(mode == 1) // bob
+	{
+		Interlace(m_tex_merge, m_tex_interlace, 3, true, yoffset * field);
+
+		t = m_tex_interlace;
+	}
+
+	return t;
+}
+
+HRESULT GSDevice::CompileShader(ID3D10VertexShader** ps, UINT id, LPCSTR entry, D3D10_INPUT_ELEMENT_DESC* layout, int count, ID3D10InputLayout** pl, D3D10_SHADER_MACRO* macro)
+{
+	HRESULT hr;
+
+	CComPtr<ID3D10Blob> shader, error;
+
+    hr = D3DX10CompileFromResource(AfxGetInstanceHandle(), MAKEINTRESOURCE(id), NULL, macro, NULL, entry, "vs_4_0", 0, 0, NULL, &shader, &error, NULL);
+	
+	if(error)
+	{
+		TRACE(_T("%s\n"), CString((LPCSTR)error->GetBufferPointer()));
+	}
+
+	if(FAILED(hr))
+	{
+		return hr;
+	}
+
+	hr = m_dev->CreateVertexShader((DWORD*)shader->GetBufferPointer(), shader->GetBufferSize(), ps);
+
+	if(FAILED(hr))
+	{
+		return hr;
+	}
+
+	hr = m_dev->CreateInputLayout(layout, count, shader->GetBufferPointer(), shader->GetBufferSize(), pl);
+
+	if(FAILED(hr))
+	{
+		return hr;
+	}
+
+	return hr;
+}
+
+HRESULT GSDevice::CompileShader(ID3D10GeometryShader** gs, UINT id, LPCSTR entry, D3D10_SHADER_MACRO* macro)
+{
+	HRESULT hr;
+
+	CComPtr<ID3D10Blob> shader, error;
+
+    hr = D3DX10CompileFromResource(AfxGetInstanceHandle(), MAKEINTRESOURCE(id), NULL, macro, NULL, entry, "gs_4_0", 0, 0, NULL, &shader, &error, NULL);
+	
+	if(error)
+	{
+		TRACE(_T("%s\n"), CString((LPCSTR)error->GetBufferPointer()));
+	}
+
+	if(FAILED(hr))
+	{
+		return hr;
+	}
+
+	hr = m_dev->CreateGeometryShader((DWORD*)shader->GetBufferPointer(), shader->GetBufferSize(), gs);
+
+	if(FAILED(hr))
+	{
+		return hr;
+	}
+
+	return hr;
+}
+
+HRESULT GSDevice::CompileShader(ID3D10PixelShader** ps, UINT id, LPCSTR entry, D3D10_SHADER_MACRO* macro)
+{
+	HRESULT hr;
+
+	CComPtr<ID3D10Blob> shader, error;
+
+    hr = D3DX10CompileFromResource(AfxGetInstanceHandle(), MAKEINTRESOURCE(id), NULL, macro, NULL, entry, "ps_4_0", 0, 0, NULL, &shader, &error, NULL);
+	
+	if(error)
+	{
+		TRACE(_T("%s\n"), CString((LPCSTR)error->GetBufferPointer()));
+	}
+
+	if(FAILED(hr))
+	{
+		return hr;
+	}
+
+	hr = m_dev->CreatePixelShader((DWORD*)shader->GetBufferPointer(), shader->GetBufferSize(), ps);
+
+	if(FAILED(hr))
+	{
+		return hr;
+	}
+
+	return hr;
+}
diff --git a/gsdx10/GSDevice.h b/gsdx10/GSDevice.h
new file mode 100644
index 0000000..a2743fe
--- /dev/null
+++ b/gsdx10/GSDevice.h
@@ -0,0 +1,187 @@
+/* 
+ *	Copyright (C) 2007 Gabest
+ *	http://www.gabest.org
+ *
+ *  This Program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *   
+ *  This Program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *   
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
+ *  http://www.gnu.org/copyleft/gpl.html
+ *
+ */
+
+#pragma once
+
+#include "GSTexture2D.h"
+
+#pragma pack(push, 1)
+
+struct MergeCB
+{
+	D3DXVECTOR4 BGColor;
+	float Alpha;
+	float EN1;
+	float EN2;
+	int MMOD;
+	int SLBG;
+	float Padding[3];
+};
+
+struct InterlaceCB
+{
+	D3DXVECTOR2 ZrH;
+	float hH;
+	float _pad;
+};
+
+struct VertexPT1
+{
+	float x, y, z, w;
+	float tu, tv;
+};
+
+struct VertexPT2
+{
+	float x, y, z, w;
+	float tu1, tv1;
+	float tu2, tv2;
+};
+
+#pragma pack(pop)
+
+class GSDevice
+{
+	// texture cache
+
+	CAtlList<GSTexture2D> m_pool;
+
+	// state cache
+
+	ID3D10Buffer* m_vb;
+	UINT m_vb_stride;
+
+	ID3D10InputLayout* m_layout;
+	D3D10_PRIMITIVE_TOPOLOGY m_topology;
+
+	ID3D10VertexShader* m_vs;
+	ID3D10Buffer* m_vs_cb;
+
+	ID3D10GeometryShader* m_gs;
+
+	ID3D10ShaderResourceView* m_ps_srvs[2];
+
+	ID3D10PixelShader* m_ps;
+	ID3D10SamplerState* m_ps_ss;
+
+	CSize m_viewport;
+	CRect m_scissor;
+
+	ID3D10DepthStencilState* m_dss;
+	UINT m_sref;
+	ID3D10BlendState* m_bs;
+	float m_bf;
+	ID3D10RenderTargetView* m_rtv;
+	ID3D10DepthStencilView* m_dsv;
+
+	//
+
+	void Interlace(GSTexture2D& st, GSTexture2D& dt, int shader, bool linear, float yoffset = 0);
+
+public: // TODO
+	CComPtr<ID3D10Device> m_dev;
+	CComPtr<IDXGISwapChain> m_swapchain;
+	CComPtr<ID3D10Texture2D> m_backbuffer;
+	CComPtr<ID3D10Texture2D> m_tex_current;
+
+	GSTexture2D m_tex_merge;
+	GSTexture2D m_tex_interlace;
+	GSTexture2D m_tex_deinterlace;
+	GSTexture2D m_tex_1x1;
+
+	CComPtr<ID3D10SamplerState> m_ss_linear;
+	CComPtr<ID3D10SamplerState> m_ss_point;
+
+	CComPtr<ID3D10RasterizerState> m_rs;
+
+	struct
+	{
+		CComPtr<ID3D10Buffer> vb;
+		CComPtr<ID3D10InputLayout> il;
+		CComPtr<ID3D10VertexShader> vs;
+		CComPtr<ID3D10PixelShader> ps[4];
+		CComPtr<ID3D10DepthStencilState> dss;
+		CComPtr<ID3D10BlendState> bs;
+	} m_convert;
+
+	struct
+	{
+		CComPtr<ID3D10Buffer> vb;
+		CComPtr<ID3D10InputLayout> il;
+		CComPtr<ID3D10VertexShader> vs;
+		CComPtr<ID3D10PixelShader> ps;
+		CComPtr<ID3D10Buffer> cb;
+	} m_merge;
+
+	struct
+	{
+		CComPtr<ID3D10PixelShader> ps[4];
+		CComPtr<ID3D10Buffer> cb;
+	} m_interlace;
+
+public:
+	GSDevice();
+	virtual ~GSDevice();
+
+	bool Create(HWND hWnd);
+
+	ID3D10Device* operator->() {return m_dev;}
+	operator ID3D10Device*() {return m_dev;}
+
+	void ResetDevice(int w, int h);
+	void EndScene();
+	void Present();
+
+	void IASet(ID3D10Buffer* vb, UINT count, const void* vertices, UINT stride, ID3D10InputLayout* layout, D3D10_PRIMITIVE_TOPOLOGY topology);
+	void VSSet(ID3D10VertexShader* vs, ID3D10Buffer* vs_cb);
+	void GSSet(ID3D10GeometryShader* gs);
+	void PSSetShaderResources(ID3D10ShaderResourceView* srv0, ID3D10ShaderResourceView* srv1);
+	void PSSet(ID3D10PixelShader* ps, ID3D10SamplerState* ss);
+	void RSSet(int width, int height, const RECT* scissor = NULL);
+	void OMSet(ID3D10DepthStencilState* dss, UINT sref, ID3D10BlendState* bs, float bf);
+	void OMSetRenderTargets(ID3D10RenderTargetView* rtv, ID3D10DepthStencilView* dsv);
+
+	template<class T> void IASet(ID3D10Buffer* vb, UINT count, T* vertices, ID3D10InputLayout* layout, D3D10_PRIMITIVE_TOPOLOGY topology)
+	{
+		IASet(vb, count, vertices, sizeof(T), layout, topology);
+	}
+
+	HRESULT CreateRenderTarget(GSTexture2D& t, int w, int h, DXGI_FORMAT format = DXGI_FORMAT_R8G8B8A8_UNORM);
+	HRESULT CreateDepthStencil(GSTexture2D& t, int w, int h, DXGI_FORMAT format = DXGI_FORMAT_D32_FLOAT_S8X24_UINT);
+	HRESULT CreateTexture(GSTexture2D& t, int w, int h, DXGI_FORMAT format = DXGI_FORMAT_R8G8B8A8_UNORM);
+	HRESULT CreateOffscreenPlainSurface(GSTexture2D& t, int w, int h, DXGI_FORMAT format = DXGI_FORMAT_R8G8B8A8_UNORM);
+	HRESULT Create(GSTexture2D& t, int w, int h, DXGI_FORMAT format, D3D10_USAGE usage, UINT bind);	
+
+	void Recycle(GSTexture2D& t);
+
+	bool SaveCurrent(LPCTSTR fn);
+	bool SaveToFileD32S8X24(ID3D10Texture2D* ds, LPCTSTR fn);
+
+	void StretchRect(GSTexture2D& st, GSTexture2D& dt, const D3DXVECTOR4& dr, bool linear = true);
+	void StretchRect(GSTexture2D& st, const D3DXVECTOR4& sr, GSTexture2D& dt, const D3DXVECTOR4& dr, bool linear = true);
+	void StretchRect(GSTexture2D& st, const D3DXVECTOR4& sr, GSTexture2D& dt, const D3DXVECTOR4& dr, ID3D10PixelShader* ps, bool linear = true);
+
+	ID3D10Texture2D* Interlace(GSTexture2D& st, CSize ds, int field, int mode, float yoffset);
+
+	HRESULT CompileShader(ID3D10VertexShader** ps, UINT id, LPCSTR entry, D3D10_INPUT_ELEMENT_DESC* layout, int count, ID3D10InputLayout** pl, D3D10_SHADER_MACRO* macro = NULL);
+	HRESULT CompileShader(ID3D10GeometryShader** gs, UINT id, LPCSTR entry, D3D10_SHADER_MACRO* macro = NULL);
+	HRESULT CompileShader(ID3D10PixelShader** ps, UINT id, LPCSTR entry, D3D10_SHADER_MACRO* macro = NULL);
+};
diff --git a/gsdx10/GSRenderTarget.cpp b/gsdx10/GSRenderTarget.cpp
new file mode 100644
index 0000000..6ba4b2b
--- /dev/null
+++ b/gsdx10/GSRenderTarget.cpp
@@ -0,0 +1,223 @@
+/* 
+ *	Copyright (C) 2007 Gabest
+ *	http://www.gabest.org
+ *
+ *  This Program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *   
+ *  This Program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *   
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
+ *  http://www.gnu.org/copyleft/gpl.html
+ *
+ */
+
+#include "stdafx.h"
+#include "GSTextureCache.h"
+#include "GSRendererHW.h"
+
+GSTextureCache::GSRenderTarget::GSRenderTarget(GSTextureCache* tc)
+	: GSSurface(tc)
+	, m_used(true)
+{
+}
+
+bool GSTextureCache::GSRenderTarget::Create(int w, int h)
+{
+	HRESULT hr;
+
+	hr = m_tc->m_renderer->m_dev.CreateRenderTarget(m_texture, w, h);
+	
+	if(FAILED(hr)) return false;
+
+	float color[4] = {0, 0, 0, 0};
+
+	m_tc->m_renderer->m_dev->ClearRenderTargetView(m_texture, color);
+
+	return true;
+}
+
+void GSTextureCache::GSRenderTarget::Update()
+{
+	__super::Update();
+
+	// FIXME: the union of the rects may also update wrong parts of the render target (but a lot faster :)
+
+	CRect r = m_dirty.GetDirtyRect(m_TEX0);
+
+	m_dirty.RemoveAll();
+
+	if(r.IsRectEmpty()) return;
+
+	// s->m_perfmon.Put(GSPerfMon::WriteRT, 1);
+
+	HRESULT hr;
+
+	if(r.right > 1024) {ASSERT(0); r.right = 1024;}
+	if(r.bottom > 1024) {ASSERT(0); r.bottom = 1024;}
+
+	int w = r.Width();
+	int h = r.Height();
+
+	static BYTE* buff = (BYTE*)_aligned_malloc(1024 * 1024 * 4, 16);
+	static int pitch = 1024 * 4;
+
+	GIFRegTEXA TEXA;
+
+	TEXA.AEM = 1;
+	TEXA.TA0 = 0;
+	TEXA.TA1 = 0x80;
+
+	GIFRegCLAMP CLAMP;
+
+	CLAMP.WMS = 0;
+	CLAMP.WMT = 0;
+
+	m_tc->m_renderer->m_mem.ReadTexture(r, buff, pitch, m_TEX0, TEXA, CLAMP);
+	
+	// s->m_perfmon.Put(GSPerfMon::Unswizzle, w * h * 4);
+
+	GSTexture2D texture;
+
+	hr = m_tc->m_renderer->m_dev.CreateTexture(texture, w, h);
+
+	if(FAILED(hr)) return;
+
+	D3D10_BOX box = {0, 0, 0, w, h, 1};
+
+	m_tc->m_renderer->m_dev->UpdateSubresource(texture, 0, &box, buff, pitch, 0);
+
+	D3DXVECTOR4 dst(m_scale.x * r.left, m_scale.y * r.top, m_scale.x * r.right, m_scale.y * r.bottom);
+
+	m_tc->m_renderer->m_dev.StretchRect(texture, m_texture, dst);
+
+	m_tc->m_renderer->m_dev.Recycle(texture);
+}
+
+void GSTextureCache::GSRenderTarget::Read(CRect r)
+{
+	HRESULT hr;
+
+	if(m_TEX0.PSM != PSM_PSMCT32 
+	&& m_TEX0.PSM != PSM_PSMCT24
+	&& m_TEX0.PSM != PSM_PSMCT16
+	&& m_TEX0.PSM != PSM_PSMCT16S)
+	{
+		//ASSERT(0);
+		return;
+	}
+
+	TRACE(_T("GSRenderTarget::Read %d,%d - %d,%d (%08x)\n"), r.left, r.top, r.right, r.bottom, m_TEX0.TBP0);
+
+	// m_tc->m_renderer->m_perfmon.Put(GSPerfMon::ReadRT, 1);
+
+	//
+
+	float left = m_scale.x * r.left / m_texture.m_desc.Width;
+	float top = m_scale.y * r.top / m_texture.m_desc.Height;
+	float right = m_scale.x * r.right / m_texture.m_desc.Width;
+	float bottom = m_scale.y * r.bottom / m_texture.m_desc.Height;
+
+	D3DXVECTOR4 src(left, top, right, bottom);
+	D3DXVECTOR4 dst(0, 0, r.Width(), r.Height());
+	
+	DXGI_FORMAT format = m_TEX0.PSM == PSM_PSMCT16 || m_TEX0.PSM == PSM_PSMCT16S ? DXGI_FORMAT_R16_UINT : DXGI_FORMAT_R8G8B8A8_UNORM;
+
+	int shader = m_TEX0.PSM == PSM_PSMCT16 || m_TEX0.PSM == PSM_PSMCT16S ? 1 : 0;
+
+	GSTexture2D rt;
+
+	hr = m_tc->m_renderer->m_dev.CreateRenderTarget(rt, r.Width(), r.Height(), format);
+
+	m_tc->m_renderer->m_dev.StretchRect(m_texture, src, rt, dst, m_tc->m_renderer->m_dev.m_convert.ps[shader]);
+
+	GSTexture2D offscreen;
+
+	hr = m_tc->m_renderer->m_dev.CreateOffscreenPlainSurface(offscreen, r.Width(), r.Height(), format);
+
+	m_tc->m_renderer->m_dev->CopyResource(offscreen, rt);
+
+	m_tc->m_renderer->m_dev.Recycle(rt);
+
+	D3D10_MAPPED_TEXTURE2D map;
+
+	if(SUCCEEDED(hr) && SUCCEEDED(offscreen->Map(0, D3D10_MAP_READ, 0, &map)))
+	{
+		// TODO: block level write
+
+		DWORD bp = m_TEX0.TBP0;
+		DWORD bw = m_TEX0.TBW;
+
+		GSLocalMemory::pixelAddress pa = GSLocalMemory::m_psm[m_TEX0.PSM].pa;
+
+		BYTE* bits = (BYTE*)map.pData;
+
+		if(m_TEX0.PSM == PSM_PSMCT32)
+		{
+			for(int y = r.top; y < r.bottom; y++, bits += map.RowPitch)
+			{
+				DWORD addr = pa(0, y, bp, bw);
+				int* offset = GSLocalMemory::m_psm[m_TEX0.PSM].rowOffset[y & 7];
+
+				for(int x = r.left, i = 0; x < r.right; x++, i++)
+				{
+					m_tc->m_renderer->m_mem.writePixel32(addr + offset[x], ((DWORD*)bits)[i]);
+				}
+			}
+		}
+		else if(m_TEX0.PSM == PSM_PSMCT24)
+		{
+			for(int y = r.top; y < r.bottom; y++, bits += map.RowPitch)
+			{
+				DWORD addr = pa(0, y, bp, bw);
+				int* offset = GSLocalMemory::m_psm[m_TEX0.PSM].rowOffset[y & 7];
+
+				for(int x = r.left, i = 0; x < r.right; x++, i++)
+				{
+					m_tc->m_renderer->m_mem.writePixel24(addr + offset[x], ((DWORD*)bits)[i]);
+				}
+			}
+		}
+		else if(m_TEX0.PSM == PSM_PSMCT16)
+		{
+			for(int y = r.top; y < r.bottom; y++, bits += map.RowPitch)
+			{
+				DWORD addr = pa(0, y, bp, bw);
+				int* offset = GSLocalMemory::m_psm[m_TEX0.PSM].rowOffset[y & 7];
+
+				for(int x = r.left, i = 0; x < r.right; x++, i++)
+				{
+					m_tc->m_renderer->m_mem.writePixel16(addr + offset[x], ((WORD*)bits)[i]);
+				}
+			}
+		}
+		else if(m_TEX0.PSM == PSM_PSMCT16S)
+		{
+			for(int y = r.top; y < r.bottom; y++, bits += map.RowPitch)
+			{
+				DWORD addr = pa(0, y, bp, bw);
+				int* offset = GSLocalMemory::m_psm[m_TEX0.PSM].rowOffset[y & 7];
+
+				for(int x = r.left, i = 0; x < r.right; x++, i++)
+				{
+					m_tc->m_renderer->m_mem.writePixel16S(addr + offset[x], ((WORD*)bits)[i]);
+				}
+			}
+		}
+		else
+		{
+			ASSERT(0);
+		}
+
+		offscreen->Unmap(0);
+	}
+
+	m_tc->m_renderer->m_dev.Recycle(offscreen);
+}
diff --git a/gsdx10/GSRenderer.cpp b/gsdx10/GSRenderer.cpp
new file mode 100644
index 0000000..62145e0
--- /dev/null
+++ b/gsdx10/GSRenderer.cpp
@@ -0,0 +1,466 @@
+/* 
+ *	Copyright (C) 2007 Gabest
+ *	http://www.gabest.org
+ *
+ *  This Program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *   
+ *  This Program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *   
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
+ *  http://www.gnu.org/copyleft/gpl.html
+ *
+ */
+
+#include "StdAfx.h"
+#include "GSRenderer.h"
+#include "GSSettingsDlg.h"
+
+BEGIN_MESSAGE_MAP(GSRenderer, CWnd)
+	ON_WM_CLOSE()
+END_MESSAGE_MAP()
+
+GSRenderer::GSRenderer(BYTE* base, bool mt, void (*irq)(), bool nloophack)
+	: GSState(base, mt, irq, nloophack)
+	, m_osd(true)
+	, m_field(0)
+	, m_crc(0)
+	, m_options(0)
+	, m_frameskip(0)
+{
+	m_interlace = AfxGetApp()->GetProfileInt(_T("Settings"), _T("interlace"), 0);
+	m_aspectratio = AfxGetApp()->GetProfileInt(_T("Settings"), _T("aspectratio"), 1);
+	m_filter = AfxGetApp()->GetProfileInt(_T("Settings"), _T("filter"), 1);
+	m_vsync = !!AfxGetApp()->GetProfileInt(_T("Settings"), _T("vsync"), FALSE);
+}
+
+GSRenderer::~GSRenderer()
+{
+	DestroyWindow();
+}
+
+bool GSRenderer::Create(LPCTSTR title)
+{
+	CRect r;
+
+	GetDesktopWindow()->GetWindowRect(r);
+
+	CSize s(r.Width() / 3, r.Width() / 4);
+
+	r = CRect(r.CenterPoint() - CSize(s.cx / 2, s.cy / 2), s);
+
+	LPCTSTR wc = AfxRegisterWndClass(CS_VREDRAW|CS_HREDRAW|CS_DBLCLKS, AfxGetApp()->LoadStandardCursor(IDC_ARROW), 0, 0);
+
+	if(!CreateEx(0, wc, title, WS_OVERLAPPEDWINDOW, r, NULL, 0))
+	{
+		return false;
+	}
+
+	if(!m_dev.Create(m_hWnd))
+	{
+		return false;
+	}
+
+	Reset();
+
+	return true;
+}
+
+void GSRenderer::Show()
+{
+	SetWindowPos(&wndTop, 0, 0, 0, 0, SWP_NOMOVE|SWP_NOSIZE);
+	SetForegroundWindow();
+	ShowWindow(SW_SHOWNORMAL);
+}
+
+void GSRenderer::Hide()
+{
+	ShowWindow(SW_HIDE);
+}
+
+void GSRenderer::OnClose()
+{
+	Hide();
+
+	PostMessage(WM_QUIT);
+}
+
+void GSRenderer::VSync(int field)
+{
+	m_field = !!field;
+
+	MSG msg;
+
+	memset(&msg, 0, sizeof(msg));
+
+	while(msg.message != WM_QUIT && PeekMessage(&msg, NULL, 0, 0, PM_REMOVE))
+	{
+		if(msg.message == WM_KEYDOWN)
+		{
+			int step = (::GetAsyncKeyState(VK_SHIFT) & 0x80000000) ? -1 : 1;
+
+			if(msg.wParam == VK_F5)
+			{
+				m_interlace = (m_interlace + 7 + step) % 7;
+				continue;
+			}
+
+			if(msg.wParam == VK_F6)
+			{
+				m_aspectratio = (m_aspectratio + 3 + step) % 3;
+				continue;
+			}			
+
+			if(msg.wParam == VK_F7)
+			{
+				SetWindowText(_T("PCSX2"));
+				m_osd = !m_osd;
+				continue;
+			}
+		}
+
+		TranslateMessage(&msg);
+		DispatchMessage(&msg);
+	}
+
+	Flush();
+
+	Flip();
+
+	Present();
+}
+
+bool GSRenderer::MakeSnapshot(char* path)
+{
+	CString fn;
+	fn.Format(_T("%sgsdx10_%s.bmp"), CString(path), CTime::GetCurrentTime().Format(_T("%Y%m%d%H%M%S")));
+	return m_dev.SaveCurrent(fn);
+}
+
+void GSRenderer::SetGameCRC(int crc, int options)
+{
+	m_crc = crc;
+	m_options = options;
+
+	if(AfxGetApp()->GetProfileInt(_T("Settings"), _T("nloophack"), 2) == 2)
+	{
+		switch(crc)
+		{
+		case 0xa39517ab: // ffx pal/eu
+		case 0xa39517ae: // ffx pal/fr
+		case 0x941bb7d9: // ffx pal/de
+		case 0xa39517a9: // ffx pal/it
+		case 0x941bb7de: // ffx pal/es
+		case 0xbb3d833a: // ffx ntsc/us
+		case 0x6a4efe60: // ffx ntsc/j
+		case 0x3866ca7e: // ffx int. ntsc/asia (SLPM-67513, some kind of a asia version) 
+		case 0x658597e2: // ffx int. ntsc/j
+		case 0x9aac5309: // ffx-2 pal/e
+		case 0x9aac530c: // ffx-2 pal/fr
+		case 0x9aac530a: // ffx-2 pal/fr? (maybe belgium or luxembourg version)
+		case 0x9aac530d: // ffx-2 pal/de
+		case 0x9aac530b: // ffx-2 pal/it
+		case 0x48fe0c71: // ffx-2 ntsc/us
+		case 0xe1fd9a2d: // ffx-2 int+lm ntsc/j
+		case 0xf0a6d880: // harvest moon ntsc/us
+			m_nloophack = true;
+			break;
+		}
+	}
+}
+
+void GSRenderer::SetFrameSkip(int frameskip)
+{
+	if(m_frameskip != frameskip)
+	{
+		m_frameskip = frameskip;
+
+		if(frameskip)
+		{
+		}
+		else
+		{
+		}
+	}
+}
+
+// TODO
+
+void GSRenderer::FinishFlip(FlipInfo src[2])
+{
+	CSize fs(0, 0);
+	CSize ds(0, 0);
+
+	for(int i = 0; i < 2; i++)
+	{
+		if(src[i].t)
+		{
+			CSize s = GetFrameSize(i);
+
+			s.cx = (int)(src[i].s.x * s.cx);
+			s.cy = (int)(src[i].s.y * s.cy);
+
+			ASSERT(fs.cx == 0 || fs.cx == s.cx);
+			ASSERT(fs.cy == 0 || fs.cy == s.cy || fs.cy + 1 == s.cy);
+
+			fs.cx = s.cx;
+			fs.cy = s.cy;
+
+			if(SMODE2->INT && SMODE2->FFMD) s.cy *= 2;
+
+			ASSERT(ds.cx == 0 || ds.cx == s.cx);
+			ASSERT(ds.cy == 0 || ds.cy == s.cy || ds.cy + 1 == s.cy);
+
+			ds.cx = s.cx;
+			ds.cy = s.cy;
+		}
+	}
+
+	if(fs.cx == 0 || fs.cy == 0)
+	{
+		return;
+	}
+
+	// merge
+
+	if(!m_dev.m_tex_merge || m_dev.m_tex_merge.m_desc.Width != fs.cx || m_dev.m_tex_merge.m_desc.Height != fs.cy)
+	{
+		m_dev.CreateRenderTarget(m_dev.m_tex_merge, fs.cx, fs.cy);
+	}
+
+	Merge(src, m_dev.m_tex_merge);
+
+	ID3D10Texture2D* current = m_dev.m_tex_merge;
+
+	if(SMODE2->INT && m_interlace > 0)
+	{
+		int field = 1 - ((m_interlace - 1) & 1);
+		int mode = (m_interlace - 1) >> 1;
+
+		current = m_dev.Interlace(m_dev.m_tex_merge, ds, m_field ^ field, mode, src[1].s.y);
+
+		if(!current) return;
+	}
+
+	m_dev.m_tex_current = current;
+}
+
+void GSRenderer::Merge(FlipInfo src[2], GSTexture2D& dst)
+{
+	// om
+
+	m_dev.OMSetRenderTargets(dst, NULL);
+
+	m_dev.OMSet(m_dev.m_convert.dss, 0, m_dev.m_convert.bs, 0);
+
+	// ia
+
+	CRect r[2];
+	
+	r[0] = GetFrameRect(0);
+	r[1] = GetFrameRect(1);
+
+	VertexPT2 vertices[] =
+	{
+		{-1, +1, 0.5f, 1.0f, 
+			src[0].s.x * r[0].left / src[0].t.m_desc.Width, src[0].s.y * r[0].top / src[0].t.m_desc.Height,
+			src[1].s.x * r[1].left / src[1].t.m_desc.Width, src[1].s.y * r[1].top / src[1].t.m_desc.Height},
+		{+1, +1, 0.5f, 1.0f, 
+			src[0].s.x * r[0].right / src[0].t.m_desc.Width, src[0].s.y * r[0].top / src[0].t.m_desc.Height,
+			src[1].s.x * r[1].right / src[1].t.m_desc.Width, src[1].s.y * r[1].top / src[1].t.m_desc.Height},
+		{-1, -1, 0.5f, 1.0f, 
+			src[0].s.x * r[0].left / src[0].t.m_desc.Width, src[0].s.y * r[0].bottom / src[0].t.m_desc.Height,
+			src[1].s.x * r[1].left / src[1].t.m_desc.Width, src[1].s.y * r[1].bottom / src[1].t.m_desc.Height}, 
+		{+1, -1, 0.5f, 1.0f,
+			src[0].s.x * r[0].right / src[0].t.m_desc.Width, src[0].s.y * r[0].bottom / src[0].t.m_desc.Height,
+			src[1].s.x * r[1].right / src[1].t.m_desc.Width, src[1].s.y * r[1].bottom / src[1].t.m_desc.Height}, 
+	};
+
+	m_dev.IASet(m_dev.m_merge.vb, 4, vertices, m_dev.m_merge.il, D3D10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
+
+	// vs
+
+	m_dev.VSSet(m_dev.m_merge.vs, NULL);
+
+	// gs
+
+	m_dev.GSSet(NULL);
+
+	// ps
+	
+	MergeCB* cb = NULL;
+	
+	if(SUCCEEDED(m_dev.m_merge.cb->Map(D3D10_MAP_WRITE_DISCARD, NULL, (void**)&cb)))
+	{
+		cb->BGColor.x = (float)BGCOLOR->R / 255;
+		cb->BGColor.y = (float)BGCOLOR->G / 255;
+		cb->BGColor.z = (float)BGCOLOR->B / 255;
+		cb->BGColor.w = 0;
+		cb->Alpha = (float)PMODE->ALP / 255;
+		cb->EN1 = (float)IsEnabled(0);
+		cb->EN2 = (float)IsEnabled(1);
+		cb->MMOD = !!PMODE->MMOD;
+		cb->SLBG = !!PMODE->SLBG;
+
+		m_dev.m_merge.cb->Unmap();
+	}
+	
+	m_dev->PSSetConstantBuffers(0, 1, &m_dev.m_merge.cb.p);
+
+	m_dev.PSSetShaderResources(src[0].t ? src[0].t : m_dev.m_tex_1x1, src[1].t ? src[1].t : m_dev.m_tex_1x1);
+
+	m_dev.PSSet(m_dev.m_merge.ps, m_dev.m_ss_linear);
+
+	// rs
+
+	m_dev.RSSet(dst.m_desc.Width, dst.m_desc.Height);
+
+	//
+
+	m_dev->Draw(4, 0);
+
+	m_dev.EndScene();
+}
+
+void GSRenderer::Present()
+{
+	m_perfmon.Put(GSPerfMon::Frame);
+
+	HRESULT hr;
+
+	CRect cr;
+
+	GetClientRect(&cr);
+
+	D3D10_TEXTURE2D_DESC desc;
+
+	memset(&desc, 0, sizeof(desc));
+
+	m_dev.m_backbuffer->GetDesc(&desc);
+
+	if(desc.Width != cr.Width() || desc.Height != cr.Height())
+	{
+		// TODO: ResetDevice();
+
+		m_dev.ResetDevice(cr.Width(), cr.Height());		
+	}
+
+	CComPtr<ID3D10RenderTargetView> rtv;
+
+	hr = m_dev->CreateRenderTargetView(m_dev.m_backbuffer, NULL, &rtv.p);
+
+	float color[4] = {0, 0, 0, 0};
+
+	m_dev->ClearRenderTargetView(rtv, color);
+
+	if(m_dev.m_tex_current)
+	{
+		static int ar[][2] = {{0, 0}, {4, 3}, {16, 9}};
+
+		int arx = ar[m_aspectratio][0];
+		int ary = ar[m_aspectratio][1];
+
+		CRect r = cr;
+
+		if(arx > 0 && ary > 0)
+		{
+			if(r.Width() * ary > r.Height() * arx)
+			{
+				int w = r.Height() * arx / ary;
+				r.left = r.CenterPoint().x - w / 2;
+				if(r.left & 1) r.left++;
+				r.right = r.left + w;
+			}
+			else
+			{
+				int h = r.Width() * ary / arx;
+				r.top = r.CenterPoint().y - h / 2;
+				if(r.top & 1) r.top++;
+				r.bottom = r.top + h;
+			}
+		}
+
+		r &= cr;
+
+		GSTexture2D st(m_dev.m_tex_current);
+		GSTexture2D dt(m_dev.m_backbuffer);
+		D3DXVECTOR4 dr(r.left, r.top, r.right, r.bottom);
+
+		m_dev.StretchRect(st, dt, dr);
+	}
+
+	// osd
+
+	static UINT64 s_frame = 0;
+	static CString s_stats;
+
+	if(m_perfmon.GetFrame() - s_frame >= 30)
+	{
+		m_perfmon.Update();
+
+		s_frame = m_perfmon.GetFrame();
+
+		double fps = 1000.0f / m_perfmon.Get(GSPerfMon::Frame);
+		
+		s_stats.Format(
+			_T("%I64d | %d x %d | %.2f fps (%d%%) | %s - %s | %s | %d/%d | %d%% CPU | %.2f | %.2f/%.2f | %.2f"), 
+			m_perfmon.GetFrame(), GetDisplaySize().cx, GetDisplaySize().cy, fps, (int)(100.0 * fps / GetFPS()),
+			SMODE2->INT ? (CString(_T("Interlaced ")) + (SMODE2->FFMD ? _T("(frame)") : _T("(field)"))) : _T("Progressive"),
+			g_interlace[m_interlace].name,
+			g_aspectratio[m_aspectratio].name,
+			(int)m_perfmon.Get(GSPerfMon::Prim),
+			(int)m_perfmon.Get(GSPerfMon::Draw),
+			m_perfmon.CPU(),
+			m_perfmon.Get(GSPerfMon::Swizzle) / 1024,
+			m_perfmon.Get(GSPerfMon::Unswizzle) / 1024,
+			m_perfmon.Get(GSPerfMon::Unswizzle2) / 1024,
+			m_perfmon.Get(GSPerfMon::Texture) / 1024
+			);
+
+		if(m_osd) // && m_d3dpp.Windowed
+		{
+			SetWindowText(s_stats);
+		}
+
+		if(m_perfmon.Get(GSPerfMon::COLCLAMP)) _tprintf(_T("*** NOT SUPPORTED: color wrap ***\n"));
+		if(m_perfmon.Get(GSPerfMon::PABE)) _tprintf(_T("*** NOT SUPPORTED: per pixel alpha blend ***\n"));
+		if(m_perfmon.Get(GSPerfMon::DATE)) _tprintf(_T("*** PERFORMANCE WARNING: destination alpha test used ***\n"));
+		if(m_perfmon.Get(GSPerfMon::ABE)) _tprintf(_T("*** NOT SUPPORTED: alpha blending mode ***\n"));
+		if(m_perfmon.Get(GSPerfMon::DepthTexture)) _tprintf(_T("*** NOT SUPPORTED: depth texture ***\n"));		
+	}
+
+/*
+	if(m_osd && !m_d3dpp.Windowed)
+	{
+		hr = m_dev->BeginScene();
+
+		hr = m_dev->SetRenderTarget(0, pBackBuffer);
+		hr = m_dev->SetDepthStencilSurface(NULL);
+
+		CRect r;
+		
+		GetClientRect(r);
+
+		D3DCOLOR c = D3DCOLOR_ARGB(255, 0, 255, 0);
+
+		CString str = s_stats;
+
+		str += _T("\n\nF5: interlace mode\nF6: aspect ratio\nF7: OSD");
+
+		if(m_pD3DXFont->DrawText(NULL, str, -1, &r, DT_CALCRECT|DT_LEFT|DT_WORDBREAK, c))
+		{
+			m_pD3DXFont->DrawText(NULL, str, -1, &r, DT_LEFT|DT_WORDBREAK, c);
+		}
+
+		hr = m_dev->EndScene();
+	}
+*/
+	m_dev.Present();
+}
\ No newline at end of file
diff --git a/gsdx10/GSRenderer.h b/gsdx10/GSRenderer.h
new file mode 100644
index 0000000..79f1ead
--- /dev/null
+++ b/gsdx10/GSRenderer.h
@@ -0,0 +1,150 @@
+/* 
+ *	Copyright (C) 2007 Gabest
+ *	http://www.gabest.org
+ *
+ *  This Program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *   
+ *  This Program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *   
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
+ *  http://www.gnu.org/copyleft/gpl.html
+ *
+ */
+
+#pragma once
+
+#include "GSDevice.h"
+
+class GSRenderer : public CWnd, public GSState
+{
+	DECLARE_MESSAGE_MAP()
+
+protected:
+	int m_interlace;
+	int m_aspectratio;
+	int m_filter;
+	int m_options;
+	bool m_vsync;
+	bool m_osd;
+	int m_field;
+	int m_crc;
+	int m_frameskip;
+
+	GSPerfMon m_perfmon;
+
+public:
+	GSRenderer(BYTE* base, bool mt, void (*irq)(), bool nloophack);
+	virtual ~GSRenderer();
+
+	virtual bool Create(LPCTSTR title);
+
+	void Show();
+	void Hide();
+
+	void OnClose();
+
+	void VSync(int field);
+	bool MakeSnapshot(char* path);
+	void SetGameCRC(int crc, int options);
+	void SetFrameSkip(int frameskip);
+
+	// TODO
+
+	GSDevice m_dev;
+
+	struct FlipInfo 
+	{
+		GSTexture2D t; 
+		GSScale s;
+	};
+
+	virtual void Flip() = 0;
+
+	void FinishFlip(FlipInfo src[2]);
+	void Merge(FlipInfo src[2], GSTexture2D& dst);
+	void Present();
+};
+
+template <class Vertex> class GSRendererT : public GSRenderer
+{
+protected:
+	Vertex* m_vertices;
+	int m_count;
+	int m_maxcount;
+	GSVertexList<Vertex> m_vl;
+
+	void Reset()
+	{
+		m_count = 0;
+		m_vl.RemoveAll();
+
+		__super::Reset();
+	}
+
+	void VertexKick(bool skip)
+	{
+		while(m_vl.GetCount() >= primVertexCount[PRIM->PRIM])
+		{
+			if(m_count + 6 > m_maxcount)
+			{
+				m_maxcount = max(10000, m_maxcount * 3/2);
+
+				Vertex* vertices = (Vertex*)_aligned_malloc(sizeof(Vertex) * m_maxcount, 16);
+
+				if(m_vertices)
+				{
+					memcpy(vertices, m_vertices, sizeof(Vertex) * m_count);
+
+					_aligned_free(m_vertices);
+				}
+
+				m_vertices = vertices;
+			}
+
+			DrawingKick(skip);
+		}
+	}
+
+	virtual void DrawingKick(bool skip) = 0;
+
+	void ResetPrim()
+	{
+		m_vl.RemoveAll();
+	}
+
+	void FlushPrim() 
+	{
+		if(m_count > 0)
+		{
+			Draw();
+
+			m_count = 0;
+		}
+	}
+
+	virtual void Draw() = 0;
+
+public:
+	GSRendererT(BYTE* base, bool mt, void (*irq)(), bool nloophack)
+		: GSRenderer(base, mt, irq, nloophack)
+		, m_vertices(NULL)
+		, m_maxcount(0)
+	{
+	}
+
+	virtual ~GSRendererT()
+	{
+		if(m_vertices)
+		{
+			_aligned_free(m_vertices);
+		}
+	}
+};
\ No newline at end of file
diff --git a/gsdx10/GSRendererHW.cpp b/gsdx10/GSRendererHW.cpp
new file mode 100644
index 0000000..77ce45e
--- /dev/null
+++ b/gsdx10/GSRendererHW.cpp
@@ -0,0 +1,1100 @@
+/* 
+ *	Copyright (C) 2007 Gabest
+ *	http://www.gabest.org
+ *
+ *  This Program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *   
+ *  This Program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *   
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
+ *  http://www.gnu.org/copyleft/gpl.html
+ *
+ */
+
+#include "stdafx.h"
+#include "GSRendererHW.h"
+#include "resource.h"
+
+GSRendererHW::GSRendererHW(BYTE* base, bool mt, void (*irq)(), bool nloophack)
+	: GSRendererT(base, mt, irq, nloophack)
+	, m_tc(this)
+	, m_width(1024)
+	, m_height(1024)
+	, m_skip(0)
+{
+	if(!AfxGetApp()->GetProfileInt(_T("Settings"), _T("nativeres"), FALSE))
+	{
+		m_width = AfxGetApp()->GetProfileInt(_T("Settings"), _T("resx"), 1024);
+		m_height = AfxGetApp()->GetProfileInt(_T("Settings"), _T("resy"), 1024);
+	}
+}
+
+bool GSRendererHW::Create(LPCTSTR title)
+{
+	if(!__super::Create(title))
+		return false;
+
+	if(!m_tfx.Create(&m_dev))
+		return false;
+
+	D3D10_DEPTH_STENCIL_DESC dsd;
+
+	memset(&dsd, 0, sizeof(dsd));
+
+	dsd.DepthEnable = false;
+	dsd.StencilEnable = true;
+	dsd.StencilReadMask = 1;
+	dsd.StencilWriteMask = 1;
+	dsd.FrontFace.StencilFunc = D3D10_COMPARISON_ALWAYS;
+	dsd.FrontFace.StencilPassOp = D3D10_STENCIL_OP_REPLACE;
+	dsd.FrontFace.StencilFailOp = D3D10_STENCIL_OP_KEEP;
+	dsd.FrontFace.StencilDepthFailOp = D3D10_STENCIL_OP_KEEP;
+	dsd.BackFace.StencilFunc = D3D10_COMPARISON_ALWAYS;
+	dsd.BackFace.StencilPassOp = D3D10_STENCIL_OP_REPLACE;
+	dsd.BackFace.StencilFailOp = D3D10_STENCIL_OP_KEEP;
+	dsd.BackFace.StencilDepthFailOp = D3D10_STENCIL_OP_KEEP;
+
+	m_dev->CreateDepthStencilState(&dsd, &m_date.dss);
+
+	D3D10_BLEND_DESC bd;
+
+	memset(&bd, 0, sizeof(bd));
+
+	m_dev->CreateBlendState(&bd, &m_date.bs);
+
+	return true;
+}
+
+void GSRendererHW::VertexKick(bool skip)
+{
+	GSVertexHW& v = m_vl.AddTail();
+
+	v.x = (float)m_v.XYZ.X;
+	v.y = (float)m_v.XYZ.Y;
+	v.z = (float)m_v.XYZ.Z;
+
+	v.c = m_v.RGBAQ.ai32[0];
+
+	v.f = m_v.FOG.ai32[1];
+
+	if(PRIM->TME)
+	{
+		if(PRIM->FST)
+		{
+			v.w = 1.0f;
+			v.u = (float)(int)m_v.UV.U;
+			v.v = (float)(int)m_v.UV.V;
+		}
+		else
+		{
+			v.w = m_v.RGBAQ.Q;
+			v.u = m_v.ST.S;
+			v.v = m_v.ST.T;
+		}
+	}
+	else
+	{
+		v.w = 1.0f;
+		v.u = 0;
+		v.v = 0;
+	}
+
+	__super::VertexKick(skip);
+}
+
+void GSRendererHW::DrawingKick(bool skip)
+{
+	GSVertexHW* v = &m_vertices[m_count];
+	int nv = 0;
+
+	switch(PRIM->PRIM)
+	{
+	case GS_POINTLIST:
+		m_vl.RemoveAt(0, v[0]);
+		nv = 1;
+		break;
+	case GS_LINELIST:
+		m_vl.RemoveAt(0, v[0]);
+		m_vl.RemoveAt(0, v[1]);
+		nv = 2;
+		break;
+	case GS_LINESTRIP:
+		m_vl.RemoveAt(0, v[0]);
+		m_vl.GetAt(0, v[1]);
+		nv = 2;
+		break;
+	case GS_TRIANGLELIST:
+		m_vl.RemoveAt(0, v[0]);
+		m_vl.RemoveAt(0, v[1]);
+		m_vl.RemoveAt(0, v[2]);
+		nv = 3;
+		break;
+	case GS_TRIANGLESTRIP:
+		m_vl.RemoveAt(0, v[0]);
+		m_vl.GetAt(0, v[1]);
+		m_vl.GetAt(1, v[2]);
+		nv = 3;
+		break;
+	case GS_TRIANGLEFAN:
+		m_vl.GetAt(0, v[0]);
+		m_vl.RemoveAt(1, v[1]);
+		m_vl.GetAt(1, v[2]);
+		nv = 3;
+		break;
+	case GS_SPRITE:
+		m_vl.RemoveAt(0, v[0]);
+		m_vl.RemoveAt(0, v[1]);
+		nv = 2;
+		break;
+	default:
+		//m_vl.RemoveAll();
+		ASSERT(0);
+		return;
+	}
+
+	if(skip)
+	{
+		return;
+	}
+
+	float sx0 = m_context->scissor.x0;
+	float sy0 = m_context->scissor.y0;
+	float sx1 = m_context->scissor.x1;
+	float sy1 = m_context->scissor.y1;
+
+	switch(nv)
+	{
+	case 1:
+		if(v[0].x < sx0
+		|| v[0].x > sx1
+		|| v[0].y < sy0
+		|| v[0].y > sy1)
+			return;
+		break;
+	case 2:
+		if(v[0].x < sx0 && v[1].x < sx0
+		|| v[0].x > sx1 && v[1].x > sx1
+		|| v[0].y < sy0 && v[1].y < sy0
+		|| v[0].y > sy1 && v[1].y > sy1)
+			return;
+		break;
+	case 3:
+		if(v[0].x < sx0 && v[1].x < sx0 && v[2].x < sx0
+		|| v[0].x > sx1 && v[1].x > sx1 && v[2].x > sx1
+		|| v[0].y < sy0 && v[1].y < sy0 && v[2].y < sy0
+		|| v[0].y > sy1 && v[1].y > sy1 && v[2].y > sy1)
+			return;
+		break;
+	default:
+		__assume(0);
+	}
+
+	m_count += nv;
+
+	// costs a few fps, but fixes RR's shadows (or anything which paints overlapping shapes with date)
+/*
+	if(m_context->TEST.DATE)
+	{
+		Flush();
+	}
+*/
+}
+/*
+int s_n = 0;
+bool s_dump = false;
+bool s_save = false;
+bool s_savez = false;
+*/
+
+void GSRendererHW::Draw()
+{
+/*
+TRACE(_T("[%d] FlushPrim f %05x (%d) z %05x (%d %d %d %d) t %05x %05x (%d)\n"), 
+	  (int)m_perfmon.GetFrame(), 
+	  (int)m_context->FRAME.Block(), 
+	  (int)m_context->FRAME.PSM, 
+	  (int)m_context->ZBUF.Block(), 
+	  (int)m_context->ZBUF.PSM, 
+	  m_context->TEST.ZTE, 
+	  m_context->TEST.ZTST, 
+	  m_context->ZBUF.ZMSK, 
+	  PRIM->TME ? (int)m_context->TEX0.TBP0 : 0xfffff, 
+	  PRIM->TME && m_context->TEX0.PSM > PSM_PSMCT16S ? (int)m_context->TEX0.CBP : 0xfffff, 
+	  PRIM->TME ? (int)m_context->TEX0.PSM : 0xff);
+*/
+	//
+
+	if(DetectBadFrame())
+	{
+		return;
+	}
+
+/*
+if(s_n >= 4653)
+{
+	s_save = true;
+}
+*/
+	//
+
+	GIFRegTEX0 TEX0;
+
+	// rt
+
+	TEX0.TBP0 = m_context->FRAME.Block();
+	TEX0.TBW = m_context->FRAME.FBW;
+	TEX0.PSM = m_context->FRAME.PSM;
+
+	GSTextureCache::GSRenderTarget* rt = m_tc.GetRenderTarget(TEX0, m_width, m_height);
+
+	// ds
+
+	TEX0.TBP0 = m_context->ZBUF.Block();
+	TEX0.TBW = m_context->FRAME.FBW;
+	TEX0.PSM = m_context->ZBUF.PSM;
+
+	GSTextureCache::GSDepthStencil* ds = m_tc.GetDepthStencil(TEX0, m_width, m_height);
+
+	// tex
+
+	GSTextureCache::GSTexture* tex = NULL;
+
+	if(PRIM->TME)
+	{
+		tex = m_tc.GetTexture();
+
+		if(!tex) return;
+	}
+/*
+if(s_dump)
+{
+	CString str;
+	str.Format(_T("c:\\temp2\\_%05d_f%I64d_tex_%05x_%d.dds"), s_n++, m_perfmon.GetFrame(), (int)m_context->TEX0.TBP0, (int)m_context->TEX0.PSM);
+	if(PRIM->TME) if(s_save) D3DX10SaveTextureToFile(tex->m_texture, D3DX10_IFF_DDS, str);
+	str.Format(_T("c:\\temp2\\_%05d_f%I64d_rt0_%05x_%d.bmp"), s_n++, m_perfmon.GetFrame(), m_context->FRAME.Block(), m_context->FRAME.PSM);
+	if(s_save) D3DX10SaveTextureToFile(rt->m_texture, D3DX10_IFF_BMP, str);
+	str.Format(_T("c:\\temp2\\_%05d_f%I64d_rz0_%05x_%d.bmp"), s_n-1, m_perfmon.GetFrame(), m_context->ZBUF.Block(), m_context->ZBUF.PSM);
+	if(s_savez) m_dev.SaveToFileD32S8X24(ds->m_texture, str);
+}
+*/
+	//
+
+	int prim = PRIM->PRIM;
+
+	if(!OverrideInput(prim, tex))
+	{
+		return;
+	}
+
+	D3D10_PRIMITIVE_TOPOLOGY topology;
+
+	switch(prim)
+	{
+	case GS_POINTLIST:
+		topology = D3D10_PRIMITIVE_TOPOLOGY_POINTLIST;
+		// m_perfmon.Put(GSPerfMon::Prim, m_count);
+		break;
+	case GS_LINELIST: 
+	case GS_LINESTRIP:
+	case GS_SPRITE:
+		topology = D3D10_PRIMITIVE_TOPOLOGY_LINELIST;
+		// m_perfmon.Put(GSPerfMon::Prim, m_count / 2);
+		break;
+	case GS_TRIANGLELIST: 
+	case GS_TRIANGLESTRIP: 
+	case GS_TRIANGLEFAN: 
+		topology = D3D10_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
+		// m_perfmon.Put(GSPerfMon::Prim, m_count / 3);
+		break;
+	default:
+		__assume(0);
+	}
+
+	// m_perfmon.Put(GSPerfMon::Draw, 1);
+
+	// date
+
+	SetupDATE(rt, ds);
+
+	// om
+
+	GSTextureFX::OMDepthStencilSelector om_dssel;
+
+	om_dssel.zte = m_context->TEST.ZTE;
+	om_dssel.ztst = m_context->TEST.ZTST;
+	om_dssel.zwe = !m_context->ZBUF.ZMSK;
+	om_dssel.date = m_context->TEST.DATE;
+
+	GSTextureFX::OMBlendSelector om_bsel;
+
+	om_bsel.abe = PRIM->ABE || (PRIM->PRIM == 1 || PRIM->PRIM == 2) && PRIM->AA1;
+	om_bsel.a = m_context->ALPHA.A;
+	om_bsel.b = m_context->ALPHA.B;
+	om_bsel.c = m_context->ALPHA.C;
+	om_bsel.d = m_context->ALPHA.D;
+	om_bsel.wr = (m_context->FRAME.FBMSK & 0x000000ff) != 0x000000ff;
+	om_bsel.wg = (m_context->FRAME.FBMSK & 0x0000ff00) != 0x0000ff00;
+	om_bsel.wb = (m_context->FRAME.FBMSK & 0x00ff0000) != 0x00ff0000;
+	om_bsel.wa = (m_context->FRAME.FBMSK & 0xff000000) != 0xff000000;
+
+	float factor = (float)(int)m_context->ALPHA.FIX / 0x80;
+
+	m_tfx.SetupOM(om_dssel, om_bsel, factor, rt->m_texture, ds->m_texture);
+
+	// ia
+
+	m_tfx.SetupIA(m_vertices, m_count, topology);
+
+	// vs
+
+	GSTextureFX::VSConstantBuffer vs_cb;
+
+	float sx = 2.0f * rt->m_scale.x / (rt->m_texture.m_desc.Width * 16);
+	float sy = 2.0f * rt->m_scale.y / (rt->m_texture.m_desc.Height * 16);
+	float ox = (float)(int)m_context->XYOFFSET.OFX;
+	float oy = (float)(int)m_context->XYOFFSET.OFY;
+
+	vs_cb.VertexScale = D3DXVECTOR4(sx, -sy, 1.0f / UINT_MAX, 0);
+	vs_cb.VertexOffset = D3DXVECTOR4(ox * sx + 1, -(oy * sy + 1), 0, -1);
+	vs_cb.TextureScale = D3DXVECTOR2(1.0f, 1.0f);
+
+	if(PRIM->TME && PRIM->FST)
+	{
+		vs_cb.TextureScale.x = 1.0f / (16 << m_context->TEX0.TW);
+		vs_cb.TextureScale.y = 1.0f / (16 << m_context->TEX0.TH);
+	}
+
+	m_tfx.SetupVS(&vs_cb);
+
+	// gs
+
+	GSTextureFX::GSSelector gs_sel;
+
+	gs_sel.iip = PRIM->IIP;
+
+	switch(prim)
+	{
+	case GS_POINTLIST:
+		gs_sel.prim = 0;
+		break;
+	case GS_LINELIST: 
+	case GS_LINESTRIP:
+		gs_sel.prim = 1;
+		break;
+	case GS_TRIANGLELIST: 
+	case GS_TRIANGLESTRIP: 
+	case GS_TRIANGLEFAN: 
+		gs_sel.prim = 2;
+		break;
+	case GS_SPRITE:
+		gs_sel.prim = 3;
+		break;
+	default:
+		__assume(0);
+	}	
+
+	m_tfx.SetupGS(gs_sel);
+
+	// ps
+
+	GSTextureFX::PSSelector ps_sel;
+
+	ps_sel.fst = PRIM->FST;
+	ps_sel.clamp = 0;
+	ps_sel.bpp = 0;
+	ps_sel.aem = m_env.TEXA.AEM;
+	ps_sel.tfx = m_context->TEX0.TFX;
+	ps_sel.tcc = m_context->TEX0.TCC;
+	ps_sel.ate = m_context->TEST.ATE;
+	ps_sel.atst = m_context->TEST.ATST;
+	ps_sel.fog = PRIM->FGE;
+	ps_sel.clr1 = om_bsel.abe && om_bsel.a == 1 && om_bsel.b == 2 && om_bsel.d == 1;
+	ps_sel.fba = m_context->FBA.FBA;
+	ps_sel.aout = m_context->FRAME.PSM == PSM_PSMCT16 || m_context->FRAME.PSM == PSM_PSMCT16S ? 1 : 0;
+
+	GSTextureFX::PSSamplerSelector ps_ssel;
+
+	ps_ssel.min = m_filter == 2 ? (m_context->TEX1.MMIN & 1) : m_filter;
+	ps_ssel.mag = m_filter == 2 ? (m_context->TEX1.MMAG & 1) : m_filter;
+	ps_ssel.tau = 0;
+	ps_ssel.tav = 0;
+
+	GSTextureFX::PSConstantBuffer ps_cb;
+
+	ps_cb.FogColor = D3DXVECTOR4((float)(int)m_env.FOGCOL.FCR / 255, (float)(int)m_env.FOGCOL.FCG / 255, (float)(int)m_env.FOGCOL.FCB / 255, 0);
+	ps_cb.ClampMin = D3DXVECTOR2(-4096, -4096);
+	ps_cb.ClampMax = D3DXVECTOR2(+4096, +4096);
+	ps_cb.TA0 = (float)(int)m_env.TEXA.TA0 / 255;
+	ps_cb.TA1 = (float)(int)m_env.TEXA.TA1 / 255;
+	ps_cb.AREF = (float)(int)m_context->TEST.AREF / 255;
+
+	if(m_context->TEST.ATST == 2 || m_context->TEST.ATST == 5)
+	{
+		ps_cb.AREF -= 0.9f/256;
+	}
+	else if(m_context->TEST.ATST == 3 || m_context->TEST.ATST == 6)
+	{
+		ps_cb.AREF += 0.9f/256;
+	}
+
+	ID3D10ShaderResourceView* tex_view = NULL;
+	ID3D10ShaderResourceView* pal_view = NULL;
+
+	if(tex)
+	{
+		ps_sel.bpp = tex->m_bpp2;
+
+		switch(m_context->CLAMP.WMS)
+		{
+		case 0: case 3: ps_ssel.tau = 1; break;
+		case 1: case 2: ps_ssel.tau = 0; break;
+		default: __assume(0);
+		}
+
+		switch(m_context->CLAMP.WMT)
+		{
+		case 0: case 3: ps_ssel.tav = 1; break;
+		case 1: case 2: ps_ssel.tav = 0; break;
+		default: __assume(0);
+		}
+
+		if(m_context->CLAMP.WMS == 2)
+		{
+			ps_cb.ClampMin.x = (float)(int)m_context->CLAMP.MINU / (1 << m_context->TEX0.TW);
+			ps_cb.ClampMax.x = (float)(int)m_context->CLAMP.MAXU / (1 << m_context->TEX0.TW);
+			ps_sel.clamp = 1;
+		}
+
+		if(m_context->CLAMP.WMT == 2)
+		{
+			ps_cb.ClampMin.y = (float)(int)m_context->CLAMP.MINV / (1 << m_context->TEX0.TH);
+			ps_cb.ClampMax.y = (float)(int)m_context->CLAMP.MAXV / (1 << m_context->TEX0.TH);
+			ps_sel.clamp = 1;
+		}
+
+		float w = (float)(int)tex->m_texture.m_desc.Width;
+		float h = (float)(int)tex->m_texture.m_desc.Height;
+
+		ps_cb.WH = D3DXVECTOR2(w, h);
+		ps_cb.rWrH = D3DXVECTOR2(1.0f / w, 1.0f / h);
+		ps_cb.rWZ = D3DXVECTOR2(1.0f / w, 0);
+		ps_cb.ZrH = D3DXVECTOR2(0, 1.0f / h);
+
+		tex_view = tex->m_texture;
+		pal_view = tex->m_palette;
+	}
+	else
+	{
+		ps_sel.tfx = 4;
+	}
+
+	m_tfx.SetupPS(ps_sel, &ps_cb, ps_ssel, tex_view, pal_view);
+
+	// rs
+
+	UINT w = rt->m_texture.m_desc.Width;
+	UINT h = rt->m_texture.m_desc.Height;
+
+	CRect scissor(
+		(int)(rt->m_scale.x * (m_context->SCISSOR.SCAX0)),
+		(int)(rt->m_scale.y * (m_context->SCISSOR.SCAY0)), 
+		(int)(rt->m_scale.x * (m_context->SCISSOR.SCAX1 + 1)),
+		(int)(rt->m_scale.y * (m_context->SCISSOR.SCAY1 + 1)));
+
+	scissor &= CRect(0, 0, w, h);
+
+	m_tfx.SetupRS(w, h, scissor);
+
+	// draw
+
+	if(!m_context->TEST.ATE || m_context->TEST.ATST != 0)
+	{
+		m_dev->Draw(m_count, 0);
+	}
+
+	if(m_context->TEST.ATE && m_context->TEST.ATST != 1 && m_context->TEST.AFAIL)
+	{
+		ASSERT(!m_env.PABE.PABE);
+
+		static const DWORD iatst[] = {1, 0, 5, 6, 7, 2, 3, 4};
+
+		ps_sel.atst = iatst[ps_sel.atst];
+
+		m_tfx.UpdatePS(ps_sel, ps_ssel);
+
+		bool z = om_dssel.zwe;
+		bool r = om_bsel.wr;
+		bool g = om_bsel.wg;
+		bool b = om_bsel.wb;
+		bool a = om_bsel.wa;
+
+		switch(m_context->TEST.AFAIL)
+		{
+		case 0: z = r = g = b = a = false; break; // none
+		case 1: z = false; break; // rgba
+		case 2: r = g = b = a = false; break; // z
+		case 3: z = a = false; break; // rgb
+		default: __assume(0);
+		}
+
+		if(z || r || g || b || a)
+		{
+			om_dssel.zwe = z;
+			om_bsel.wr = r;
+			om_bsel.wg = g;
+			om_bsel.wb = b;
+			om_bsel.wa = a;
+
+			m_tfx.UpdateOM(om_dssel, om_bsel, factor);
+
+			m_dev->Draw(m_count, 0);
+		}
+	}
+
+	m_dev.EndScene();
+
+/*
+	if(m_env.COLCLAMP.CLAMP == 0) m_perfmon.Put(GSPerfMon::COLCLAMP);
+	if(m_env.PABE.PABE) m_perfmon.Put(GSPerfMon::PABE);
+	if(m_context->TEST.DATE) m_perfmon.Put(GSPerfMon::DATE);
+	if(om_bsel.abe && om_bsel.a == om_bsel.d && om_bsel.a != om_bsel.b && om_bsel.a != 1 && om_bsel.b != 2) m_perfmon.Put(GSPerfMon::ABE);
+*/
+/*
+if(s_dump)
+{
+	CString str;
+	str.Format(_T("c:\\temp2\\_%05d_f%I64d_rt1_%05x_%d.bmp"), s_n++, m_perfmon.GetFrame(), m_context->FRAME.Block(), m_context->FRAME.PSM);
+	if(s_save) D3DX10SaveTextureToFile(rt->m_texture, D3DX10_IFF_BMP, str);
+	str.Format(_T("c:\\temp2\\_%05d_f%I64d_rz1_%05x_%d.bmp"), s_n-1, m_perfmon.GetFrame(), m_context->ZBUF.Block(), m_context->ZBUF.PSM);
+	if(s_savez) m_dev.SaveToFileD32S8X24(ds->m_texture, str);
+}
+*/
+}
+
+void GSRendererHW::Flip()
+{
+	FlipInfo src[2];
+
+	for(int i = 0; i < countof(src); i++)
+	{
+		if(!IsEnabled(i))
+		{
+			continue;
+		}
+
+		GIFRegTEX0 TEX0;
+
+		TEX0.TBP0 = DISPFB[i]->Block();
+		TEX0.TBW = DISPFB[i]->FBW;
+		TEX0.PSM = DISPFB[i]->PSM;
+
+		if(GSTextureCache::GSRenderTarget* rt = m_tc.GetRenderTarget(TEX0, m_width, m_height, true))
+		{
+			src[i].t = rt->m_texture;
+			src[i].s = rt->m_scale;
+/*
+if(s_dump)
+{
+	CString str;
+	str.Format(_T("c:\\temp2\\_%05d_f%I64d_fr%d_%05x_%d.bmp"), s_n++, m_perfmon.GetFrame(), i, (int)TEX0.TBP0, (int)TEX0.PSM);
+	if(s_save) ::D3DX10SaveTextureToFile(rt->m_texture, D3DX10_IFF_BMP, str);
+}
+*/
+//s_dump = m_perfmon.GetFrame() >= 5000;
+//if(m_perfmon.GetFrame() == 5000) m_tc.RemoveAll();
+		}
+	}
+
+	FinishFlip(src);
+
+	m_tc.IncAge();
+
+	m_skip = 0;
+}
+
+void GSRendererHW::InvalidateTexture(const GIFRegBITBLTBUF& BITBLTBUF, CRect r)
+{
+	//TRACE(_T("[%d] InvalidateTexture %d,%d - %d,%d %05x\n"), (int)m_perfmon.GetFrame(), r.left, r.top, r.right, r.bottom, (int)BITBLTBUF.DBP);
+
+	m_tc.InvalidateTexture(BITBLTBUF, &r);
+}
+
+void GSRendererHW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, CRect r)
+{
+	//TRACE(_T("[%d] InvalidateLocalMem %d,%d - %d,%d %05x\n"), (int)m_perfmon.GetFrame(), r.left, r.top, r.right, r.bottom, (int)BITBLTBUF.SBP);
+
+	m_tc.InvalidateLocalMem(BITBLTBUF, &r);
+}
+
+void GSRendererHW::MinMaxUV(int w, int h, CRect& r)
+{
+	r.SetRect(0, 0, w, h);
+
+	if(m_count > 100) 
+	{
+		return;
+	}
+
+	if(m_context->CLAMP.WMS < 3 || m_context->CLAMP.WMT < 3)
+	{
+		uvmm_t uv;
+
+		uv.umin = uv.vmin = 0;
+		uv.umax = uv.vmax = 1;
+
+		if(PRIM->FST)
+		{
+			UVMinMax(m_count, (vertex_t*)m_vertices, &uv);
+
+			uv.umin *= 1.0f / (16 << m_context->TEX0.TW);
+			uv.umax *= 1.0f / (16 << m_context->TEX0.TW);
+			uv.vmin *= 1.0f / (16 << m_context->TEX0.TH);
+			uv.vmax *= 1.0f / (16 << m_context->TEX0.TH);
+		}
+		else
+		{
+			// FIXME
+
+			if(m_count > 0)// && m_count < 100)
+			{
+				uv.umin = uv.vmin = +1e10;
+				uv.umax = uv.vmax = -1e10;
+
+				for(int i = 0, j = m_count; i < j; i++)
+				{
+					float w = 1.0f / m_vertices[i].w;
+					float u = m_vertices[i].u * w;
+					if(uv.umax < u) uv.umax = u;
+					if(uv.umin > u) uv.umin = u;
+					float v = m_vertices[i].v * w;
+					if(uv.vmax < v) uv.vmax = v;
+					if(uv.vmin > v) uv.vmin = v;
+				}
+			}
+		}
+
+		CSize bs = GSLocalMemory::m_psm[m_context->TEX0.PSM].bs;
+
+		CSize bsm(bs.cx-1, bs.cy-1);
+
+		if(m_context->CLAMP.WMS < 3)
+		{
+			if(m_context->CLAMP.WMS == 0)
+			{
+				float fmin = floor(uv.umin);
+				float fmax = floor(uv.umax);
+
+				if(fmin != fmax) {uv.umin = 0; uv.umax = 1.0f;}
+				else {uv.umin -= fmin; uv.umax -= fmax;}
+
+				// FIXME: 
+				if(uv.umin == 0 && uv.umax != 1.0f) uv.umax = 1.0f;
+			}
+			else if(m_context->CLAMP.WMS == 1)
+			{
+				if(uv.umin < 0) uv.umin = 0;
+				else if(uv.umin > 1.0f) uv.umin = 1.0f;
+				if(uv.umax < 0) uv.umax = 0;
+				else if(uv.umax > 1.0f) uv.umax = 1.0f;
+				if(uv.umin > uv.umax) uv.umin = uv.umax;
+			}
+			else if(m_context->CLAMP.WMS == 2)
+			{
+				float minu = 1.0f * m_context->CLAMP.MINU / w;
+				float maxu = 1.0f * m_context->CLAMP.MAXU / w;
+				if(uv.umin < minu) uv.umin = minu;
+				else if(uv.umin > maxu) uv.umin = maxu;
+				if(uv.umax < minu) uv.umax = minu;
+				else if(uv.umax > maxu) uv.umax = maxu;
+				if(uv.umin > uv.umax) uv.umin = uv.umax;
+			}
+
+			r.left = max((int)(uv.umin * w) & ~bsm.cx, 0);
+			r.right = min(((int)(uv.umax * w) + bsm.cx + 1) & ~bsm.cx, w);
+		}
+
+		if(m_context->CLAMP.WMT < 3)
+		{
+			if(m_context->CLAMP.WMT == 0)
+			{
+				float fmin = floor(uv.vmin);
+				float fmax = floor(uv.vmax);
+
+				if(fmin != fmax) {uv.vmin = 0; uv.vmax = 1.0f;}
+				else {uv.vmin -= fmin; uv.vmax -= fmax;}
+
+				// FIXME: 
+				if(uv.vmin == 0 && uv.vmax != 1.0f) uv.vmax = 1.0f;
+			}
+			else if(m_context->CLAMP.WMT == 1)
+			{
+				if(uv.vmin < 0) uv.vmin = 0;
+				else if(uv.vmin > 1.0f) uv.vmin = 1.0f;
+				if(uv.vmax < 0) uv.vmax = 0;
+				else if(uv.vmax > 1.0f) uv.vmax = 1.0f;
+				if(uv.vmin > uv.vmax) uv.vmin = uv.vmax;
+			}
+			else if(m_context->CLAMP.WMT == 2)
+			{
+				float minv = 1.0f * m_context->CLAMP.MINV / h;
+				float maxv = 1.0f * m_context->CLAMP.MAXV / h;
+				if(uv.vmin < minv) uv.vmin = minv;
+				else if(uv.vmin > maxv) uv.vmin = maxv;
+				if(uv.vmax < minv) uv.vmax = minv;
+				else if(uv.vmax > maxv) uv.vmax = maxv;
+				if(uv.vmin > uv.vmax) uv.vmin = uv.vmax;
+			}
+			
+			r.top = max((int)(uv.vmin * h) & ~bsm.cy, 0);
+			r.bottom = min(((int)(uv.vmax * h) + bsm.cy + 1) & ~bsm.cy, h);
+		}
+	}
+	//ASSERT(r.left <= r.right);
+	//ASSERT(r.top <= r.bottom);
+}
+
+void GSRendererHW::SetupDATE(GSTextureCache::GSRenderTarget* rt, GSTextureCache::GSDepthStencil* ds)
+{
+	if(!m_context->TEST.DATE) return; // || (::GetAsyncKeyState(VK_CONTROL)&0x80000000)
+
+	// sfex3 (after the capcom logo), vf4 (first menu fading in), ffxii shadows, rumble roses shadows
+
+	float xmin = -1, xmax = +1;
+	float ymin = -1, ymax = +1;
+
+	float umin = 0, umax = 1;
+	float vmin = 0, vmax = 1;
+
+	// if(m_count < 1000) {
+
+#if _M_IX86_FP >= 2 || defined(_M_AMD64)
+		
+	__m128 xymin = _mm_set1_ps(+1e10);
+	__m128 xymax = _mm_set1_ps(-1e10);
+
+	for(int i = 0, j = m_count; i < j; i++)
+	{
+		xymin = _mm_min_ps(m_vertices[i].m128[0], xymin);
+		xymax = _mm_max_ps(m_vertices[i].m128[0], xymax);
+	}
+
+	xmin = xymin.m128_f32[0];
+	ymin = xymin.m128_f32[1];
+	xmax = xymax.m128_f32[0];
+	ymax = xymax.m128_f32[1];
+
+#else	
+
+	xmin = ymin = +1e10;
+	xmax = ymax = -1e10;
+
+	for(int i = 0, j = m_count; i < j; i++)
+	{
+		float x = m_vertices[i].x;
+
+		if(x < xmin) xmin = x;
+		if(x > xmax) xmax = x;
+		
+		float y = m_vertices[i].y;
+
+		if(y < ymin) ymin = y;
+		if(y > ymax) ymax = y;
+	}
+
+#endif
+
+	float sx = 2.0f * rt->m_scale.x / (rt->m_texture.m_desc.Width * 16);
+	float sy = 2.0f * rt->m_scale.y / (rt->m_texture.m_desc.Height * 16);
+	float ox = (float)(int)m_context->XYOFFSET.OFX;
+	float oy = (float)(int)m_context->XYOFFSET.OFY;
+
+	xmin = xmin * sx - (ox * sx + 1);
+	xmax = xmax * sx - (ox * sx + 1);
+	ymin = ymin * sy - (oy * sy + 1);
+	ymax = ymax * sy - (oy * sy + 1);
+
+	if(xmin < -1) xmin = -1;
+	if(xmax > +1) xmax = +1;
+	if(ymin < -1) ymin = -1;
+	if(ymax > +1) ymax = +1;
+	
+	umin = (xmin + 1) / 2;
+	umax = (xmax + 1) / 2;
+	vmin = (ymin + 1) / 2;
+	vmax = (ymax + 1) / 2;
+
+	// }
+
+	// om
+
+	GSTexture2D tmp;
+
+	m_dev.CreateRenderTarget(tmp, rt->m_texture.m_desc.Width, rt->m_texture.m_desc.Height);
+
+	m_dev->ClearDepthStencilView(ds->m_texture, D3D10_CLEAR_STENCIL, 0, 0);
+
+	m_dev.OMSetRenderTargets(tmp, ds->m_texture);
+
+	m_dev.OMSet(m_date.dss, 1, m_date.bs, 0);
+
+	// ia
+
+	VertexPT1 vertices[] =
+	{
+		{xmin, -ymin, 0.5f, 1.0f, umin, vmin},
+		{xmax, -ymin, 0.5f, 1.0f, umax, vmin},
+		{xmin, -ymax, 0.5f, 1.0f, umin, vmax},
+		{xmax, -ymax, 0.5f, 1.0f, umax, vmax},
+	};
+
+	m_dev.IASet(m_dev.m_convert.vb, 4, vertices, m_dev.m_convert.il, D3D10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP);
+
+	// vs
+
+	m_dev.VSSet(m_dev.m_convert.vs, NULL);
+
+	// gs
+
+	m_dev.GSSet(NULL);
+
+	// ps
+
+	m_dev.PSSetShaderResources(rt->m_texture, NULL);
+
+	m_dev.PSSet(m_dev.m_convert.ps[m_context->TEST.DATM ? 2 : 3], m_dev.m_ss_point);
+
+	// rs
+
+	m_dev.RSSet(tmp.m_desc.Width, tmp.m_desc.Height);
+
+	// set
+
+	m_dev->Draw(4, 0);
+
+	//
+
+	m_dev.EndScene();
+
+	m_dev.Recycle(tmp);
+}
+
+bool GSRendererHW::DetectBadFrame()
+{
+	DWORD FBP = m_context->FRAME.Block();
+	DWORD FPSM = m_context->FRAME.PSM;
+
+	bool TME = PRIM->TME;
+	DWORD TBP0 = m_context->TEX0.TBP0;
+	DWORD TPSM = m_context->TEX0.PSM;
+
+	switch(m_crc)
+	{
+	case 0x21068223: // okami ntsc/us
+	case 0x891f223f: // okami pal/fr
+
+		if(m_skip == 0)
+		{
+			if(TME && FBP == 0x00e00 && FPSM == PSM_PSMCT32 && TBP0 == 0x00000 && TPSM == PSM_PSMCT32)
+			{
+				m_skip = 1000;
+			}
+		}
+		else
+		{
+			if(TME && FBP == 0x00e00 && FPSM == PSM_PSMCT32 && TBP0 == 0x03800 && TPSM == PSM_PSMT4)
+			{
+				m_skip = 0;
+			}
+		}
+
+		break;
+
+	case 0x053D2239: // mgs3s1 ntsc/us
+	// TODO: case 0x086273D2: mgs3 snake eater pal/fr
+
+		if(m_skip == 0)
+		{
+			if(TME && FBP == 0x02000 && FPSM == PSM_PSMCT32 && (TBP0 == 0x00000 || TBP0 == 0x01000) && TPSM == PSM_PSMCT24)
+			{
+				m_skip = 1000; // 76, 79
+			}
+			else if(TME && FBP == 0x02800 && FPSM == PSM_PSMCT24 && (TBP0 == 0x00000 || TBP0 == 0x01000) && TPSM == PSM_PSMCT32)
+			{
+				m_skip = 1000; // 69
+			}
+		}
+		else 
+		{
+			if(!TME && (FBP == 0x00000 || FBP == 0x01000) && FPSM == PSM_PSMCT32)
+			{
+				m_skip = 0;
+			}
+		}
+
+		break;
+
+	case 0x278722BF: // dbz bt2 ntsc/us
+
+		if(m_skip == 0)
+		{
+			if(TME && /*FBP == 0x00000 && FPSM == PSM_PSMCT16 &&*/ TBP0 == 0x02000 && TPSM == PSM_PSMZ16)
+			{
+				m_skip = 27;
+			}
+		}
+
+		break;
+
+	case 0x72B3802A: // sfex3 ntsc/us
+
+		if(m_skip == 0)
+		{
+			if(TME && FBP == 0x00f00 && FPSM == PSM_PSMCT16 && (TBP0 == 0x00500 || TBP0 == 0x00000) && TPSM == PSM_PSMCT32)
+			{
+				m_skip = 4;
+			}
+		}
+
+		break;
+
+	case 0x28703748: // bully ntsc/us
+
+		if(m_skip == 0)
+		{
+			if(TME && (FBP == 0x00000 || FBP == 0x01180) && (TBP0 == 0x00000 || TBP0 == 0x01180) && FBP == TBP0 && FPSM == PSM_PSMCT32 && FPSM == TPSM)
+			{
+				return true; // allowed for bully
+			}
+
+			if(TME && (FBP == 0x00000 || FBP == 0x01180) && FPSM == PSM_PSMCT16S && TBP0 == 0x02300 && TPSM == PSM_PSMZ16S)
+			{
+				m_skip = 6;
+			}
+		}
+		else 
+		{
+			if(!TME && (FBP == 0x00000 || FBP == 0x01180) && FPSM == PSM_PSMCT32)
+			{
+				m_skip = 0;
+			}
+		}
+
+		break;
+
+	case 0xC19A374E: // shadow of the colossus ntsc/us
+
+		if(m_skip == 0)
+		{
+			if(TME && FBP == 0x02b80 && FPSM == PSM_PSMCT24 && TBP0 == 0x01e80 && TPSM == PSM_PSMCT24)
+			{
+				m_skip = 9;
+			}
+			else if(TME && FBP == 0x01e80 && FPSM == PSM_PSMCT32 && TBP0 == 0x03880 && TPSM == PSM_PSMCT32)
+			{
+				m_skip = 8;
+			}
+		}
+
+		break;
+	}
+
+	if(m_skip == 0)
+	{
+		if(TME)
+		{
+			if(HasSharedBits(FBP, FPSM, TBP0, TPSM))
+			{
+				m_skip = 1;
+			}
+
+			// depth textures (bully, mgs3s1 intro)
+
+			if(TPSM == PSM_PSMZ32 || TPSM == PSM_PSMZ24 || TPSM == PSM_PSMZ16 || TPSM == PSM_PSMZ16S)
+			{
+				// m_perfmon.Put(GSPerfMon::DepthTexture);
+				m_skip = 1;
+			}
+		}
+	}
+
+	if(m_skip > 0)
+	{
+		m_skip--;
+
+		return true;
+	}
+
+	return false;
+}
+
+bool GSRendererHW::OverrideInput(int& prim, GSTextureCache::GSTexture* tex)
+{
+	#pragma region ffxii pal video conversion
+
+	if(m_crc == 0x78da0252 || m_crc == 0xc1274668 || m_crc == 0xdc2a467e || m_crc == 0xca284668)
+	{
+		static DWORD* video = NULL;
+		static bool ok = false;
+
+		if(prim == GS_POINTLIST && m_count >= 448*448 && m_count <= 448*512)
+		{
+			// incoming pixels are stored in columns, one column is 16x512, total res 448x512 or 448x454
+
+			if(!video) video = new DWORD[512*512];
+
+			for(int x = 0, i = 0, rows = m_count / 448; x < 448; x += 16)
+			{
+				DWORD* dst = &video[x];
+
+				for(int y = 0; y < rows; y++, dst += 512)
+				{
+					for(int j = 0; j < 16; j++, i++)
+					{
+						dst[j] = m_vertices[i].c;
+					}
+				}
+			}
+
+			ok = true;
+
+			return false;
+		}
+		else if(prim == GS_LINELIST && m_count == 512*2 && ok)
+		{
+			// normally, this step would copy the video onto screen with 512 texture mapped horizontal lines,
+			// but we use the stored video data to create a new texture, and replace the lines with two triangles
+
+			ok = false;
+
+			m_dev.Recycle(tex->m_texture);
+			m_dev.Recycle(tex->m_palette);
+
+			m_dev.CreateTexture(tex->m_texture, 512, 512);
+
+			D3D10_BOX box = {0, 0, 0, 448, 512, 1};
+
+			m_dev->UpdateSubresource(tex->m_texture, 0, &box, video, 512*4, 0);
+
+			m_vertices[0] = m_vertices[0];
+			m_vertices[1] = m_vertices[m_count - 1];
+
+			prim = GS_SPRITE;
+			m_count = 2;
+
+			return true;
+		}
+	}
+
+	#pragma endregion
+
+	return true;
+}
+
diff --git a/gsdx10/GSRendererHW.h b/gsdx10/GSRendererHW.h
new file mode 100644
index 0000000..683c817
--- /dev/null
+++ b/gsdx10/GSRendererHW.h
@@ -0,0 +1,63 @@
+/* 
+ *	Copyright (C) 2007 Gabest
+ *	http://www.gabest.org
+ *
+ *  This Program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *   
+ *  This Program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *   
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
+ *  http://www.gnu.org/copyleft/gpl.html
+ *
+ */
+
+#pragma once
+
+#include "GSRenderer.h"
+#include "GSTextureCache.h"
+#include "GSTextureFX.h"
+#include "GSVertexHW.h"
+
+class GSRendererHW : public GSRendererT<GSVertexHW>
+{
+	friend class GSTextureCache;
+
+protected:
+	int m_width;
+	int m_height;
+	int m_skip;
+
+	GSTextureCache m_tc;
+	GSTextureFX m_tfx;
+
+	void VertexKick(bool skip);
+	void DrawingKick(bool skip);
+	void Draw();
+	void Flip();
+	void InvalidateTexture(const GIFRegBITBLTBUF& BITBLTBUF, CRect r);
+	void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, CRect r);
+	void MinMaxUV(int w, int h, CRect& r);
+
+	struct
+	{
+		CComPtr<ID3D10DepthStencilState> dss;
+		CComPtr<ID3D10BlendState> bs;
+	} m_date;
+
+	void SetupDATE(GSTextureCache::GSRenderTarget* rt, GSTextureCache::GSDepthStencil* ds);
+	bool OverrideInput(int& prim, GSTextureCache::GSTexture* tex);	
+	bool DetectBadFrame();
+
+public:
+	GSRendererHW(BYTE* base, bool mt, void (*irq)(), bool nloophack);
+
+	bool Create(LPCTSTR title);
+};
\ No newline at end of file
diff --git a/gsdx10/GSRendererNull.cpp b/gsdx10/GSRendererNull.cpp
new file mode 100644
index 0000000..c6d105c
--- /dev/null
+++ b/gsdx10/GSRendererNull.cpp
@@ -0,0 +1,93 @@
+/* 
+ *	Copyright (C) 2007 Gabest
+ *	http://www.gabest.org
+ *
+ *  This Program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *   
+ *  This Program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *   
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
+ *  http://www.gnu.org/copyleft/gpl.html
+ *
+ */
+
+#include "StdAfx.h"
+#include "GSRendererNull.h"
+
+GSRendererNull::GSRendererNull(BYTE* base, bool mt, void (*irq)(), bool nloophack)
+	: GSRendererT(base, mt, irq, nloophack)
+{
+}
+
+GSRendererNull::~GSRendererNull()
+{
+}
+
+void GSRendererNull::VertexKick(bool skip)
+{
+	m_vl.AddTail();
+
+	__super::VertexKick(skip);
+}
+
+void GSRendererNull::DrawingKick(bool skip)
+{
+	VertexNull v;
+
+	switch(PRIM->PRIM)
+	{
+	case GS_POINTLIST:
+		m_vl.RemoveAt(0, v);
+		break;
+	case GS_LINELIST:
+		m_vl.RemoveAt(0, v);
+		m_vl.RemoveAt(0, v);
+		break;
+	case GS_LINESTRIP:
+		m_vl.RemoveAt(0, v);
+		m_vl.GetAt(0, v);
+		break;
+	case GS_TRIANGLELIST:
+		m_vl.RemoveAt(0, v);
+		m_vl.RemoveAt(0, v);
+		m_vl.RemoveAt(0, v);
+		break;
+	case GS_TRIANGLESTRIP:
+		m_vl.RemoveAt(0, v);
+		m_vl.GetAt(0, v);
+		m_vl.GetAt(1, v);
+		break;
+	case GS_TRIANGLEFAN:
+		m_vl.GetAt(0, v);
+		m_vl.RemoveAt(1, v);
+		m_vl.GetAt(1, v);
+		break;
+	case GS_SPRITE:
+		m_vl.RemoveAt(0, v);
+		m_vl.RemoveAt(0, v);
+		break;
+	default:
+		ASSERT(0);
+		m_vl.RemoveAll();
+		return;
+	}
+
+	if(!skip)
+	{
+		//m_perfmon.Put(GSPerfMon::Prim, 1);
+	}
+}
+
+void GSRendererNull::Flip()
+{
+	FlipInfo rt[2];
+	FinishFlip(rt);
+}
diff --git a/gsdx10/GSRendererNull.h b/gsdx10/GSRendererNull.h
new file mode 100644
index 0000000..09c2e82
--- /dev/null
+++ b/gsdx10/GSRendererNull.h
@@ -0,0 +1,39 @@
+/* 
+ *	Copyright (C) 2007 Gabest
+ *	http://www.gabest.org
+ *
+ *  This Program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *   
+ *  This Program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *   
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
+ *  http://www.gnu.org/copyleft/gpl.html
+ *
+ */
+
+#pragma once
+
+#include "GSRenderer.h"
+
+struct VertexNull {};
+
+class GSRendererNull : public GSRendererT<VertexNull>
+{
+protected:
+	void VertexKick(bool skip);
+	void DrawingKick(bool skip);
+	void Draw() {}
+	void Flip();
+
+public:
+	GSRendererNull(BYTE* base, bool mt, void (*irq)(), bool nloophack);
+	virtual ~GSRendererNull();
+};
\ No newline at end of file
diff --git a/gsdx10/GSRendererSW.cpp b/gsdx10/GSRendererSW.cpp
new file mode 100644
index 0000000..c7474db
--- /dev/null
+++ b/gsdx10/GSRendererSW.cpp
@@ -0,0 +1,1039 @@
+/* 
+ *	Copyright (C) 2007 Gabest
+ *	http://www.gabest.org
+ *
+ *  This Program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *   
+ *  This Program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *   
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
+ *  http://www.gnu.org/copyleft/gpl.html
+ *
+ */
+
+#include "StdAfx.h"
+#include "GSRendererSW.h"
+
+#pragma warning(push)
+#pragma warning(disable: 4701)
+
+template <class Vertex> 
+GSRendererSW<Vertex>::GSRendererSW(BYTE* base, bool mt, void (*irq)(), bool nloophack)
+	: GSRendererT(base, mt, irq, nloophack)
+{
+	int i = SHRT_MIN;
+	int j = 0;
+	for(; i < 0; i++, j++) {m_clip[j] = 0; m_mask[j] = (BYTE)i;}
+	for(; i < 256; i++, j++) {m_clip[j] = (BYTE)i; m_mask[j] = (BYTE)i;}
+	for(; i <= SHRT_MAX; i++, j++) {m_clip[j] = 255; m_mask[j] = (BYTE)i;}
+
+	m_uv = (uv_wrap_t*)_aligned_malloc(sizeof(uv_wrap_t), 16);
+
+	// w00t :P
+
+	#define InitATST(iZTST, iATST) \
+		m_dv[iZTST][iATST] = &GSRendererSW<Vertex>::DrawVertex<iZTST, iATST>; \
+
+	#define InitZTST(iZTST) \
+		InitATST(iZTST, 0) \
+		InitATST(iZTST, 1) \
+		InitATST(iZTST, 2) \
+		InitATST(iZTST, 3) \
+		InitATST(iZTST, 4) \
+		InitATST(iZTST, 5) \
+		InitATST(iZTST, 6) \
+		InitATST(iZTST, 7) \
+
+	#define InitDV() \
+		InitZTST(0) \
+		InitZTST(1) \
+		InitZTST(2) \
+		InitZTST(3) \
+
+	InitDV();
+
+	#define InitTFX(iLOD, bLCM, bTCC, iTFX) \
+		m_dvtfx[iLOD][bLCM][bTCC][iTFX] = &GSRendererSW<Vertex>::DrawVertexTFX<iLOD, bLCM, bTCC, iTFX>; \
+
+	#define InitTCC(iLOD, bLCM, bTCC) \
+		InitTFX(iLOD, bLCM, bTCC, 0) \
+		InitTFX(iLOD, bLCM, bTCC, 1) \
+		InitTFX(iLOD, bLCM, bTCC, 2) \
+		InitTFX(iLOD, bLCM, bTCC, 3) \
+
+	#define InitLCM(iLOD, bLCM) \
+		InitTCC(iLOD, bLCM, false) \
+		InitTCC(iLOD, bLCM, true) \
+
+	#define InitLOD(iLOD) \
+		InitLCM(iLOD, false) \
+		InitLCM(iLOD, true) \
+
+	#define InitDVTFX() \
+		InitLOD(0) \
+		InitLOD(1) \
+		InitLOD(2) \
+		InitLOD(3) \
+
+
+	InitDVTFX();
+}
+
+template <class Vertex>
+GSRendererSW<Vertex>::~GSRendererSW()
+{
+	_aligned_free(m_uv);
+}
+
+template <class Vertex>
+void GSRendererSW<Vertex>::DrawingKick(bool skip)
+{
+	Vertex* v = &m_vertices[m_count];
+	int nv = 0;
+
+	switch(PRIM->PRIM)
+	{
+	case GS_POINTLIST:
+		m_vl.RemoveAt(0, v[0]);
+		nv = 1;
+		break;
+	case GS_LINELIST:
+		m_vl.RemoveAt(0, v[0]);
+		m_vl.RemoveAt(0, v[1]);
+		nv = 2;
+		break;
+	case GS_LINESTRIP:
+		m_vl.RemoveAt(0, v[0]);
+		m_vl.GetAt(0, v[1]);
+		nv = 2;
+		break;
+	case GS_TRIANGLELIST:
+		m_vl.RemoveAt(0, v[0]);
+		m_vl.RemoveAt(0, v[1]);
+		m_vl.RemoveAt(0, v[2]);
+		nv = 3;
+		break;
+	case GS_TRIANGLESTRIP:
+		m_vl.RemoveAt(0, v[0]);
+		m_vl.GetAt(0, v[1]);
+		m_vl.GetAt(1, v[2]);
+		nv = 3;
+		break;
+	case GS_TRIANGLEFAN:
+		m_vl.GetAt(0, v[0]);
+		m_vl.RemoveAt(1, v[1]);
+		m_vl.GetAt(1, v[2]);
+		nv = 3;
+		break;
+	case GS_SPRITE:
+		m_vl.RemoveAt(0, v[0]);
+		m_vl.RemoveAt(0, v[1]);
+		nv = 4;
+		v[0].p.z = v[1].p.z;
+		v[0].p.q = v[1].p.q;
+		v[0].t.z = v[1].t.z;
+		v[2] = v[1];
+		v[3] = v[1];
+		v[1].p.y = v[0].p.y;
+		v[1].t.y = v[0].t.y;
+		v[2].p.x = v[0].p.x;
+		v[2].t.x = v[0].t.x;
+		break;
+	default:
+		ASSERT(0);
+		return;
+	}
+
+	if(skip)
+	{
+		return;
+	}
+
+	Vertex::Scalar sx0((int)m_context->SCISSOR.SCAX0);
+	Vertex::Scalar sy0((int)m_context->SCISSOR.SCAY0);
+	Vertex::Scalar sx1((int)m_context->SCISSOR.SCAX1);
+	Vertex::Scalar sy1((int)m_context->SCISSOR.SCAY1);
+
+	switch(nv)
+	{
+	case 1:
+		if(v[0].p.x < sx0
+		|| v[0].p.x > sx1
+		|| v[0].p.y < sy0
+		|| v[0].p.y > sy1)
+			return;
+		break;
+	case 2:
+		if(v[0].p.x < sx0 && v[1].p.x < sx0
+		|| v[0].p.x > sx1 && v[1].p.x > sx1
+		|| v[0].p.y < sy0 && v[1].p.y < sy0
+		|| v[0].p.y > sy1 && v[1].p.y > sy1)
+			return;
+		break;
+	case 3:
+		if(v[0].p.x < sx0 && v[1].p.x < sx0 && v[2].p.x < sx0
+		|| v[0].p.x > sx1 && v[1].p.x > sx1 && v[2].p.x > sx1
+		|| v[0].p.y < sy0 && v[1].p.y < sy0 && v[2].p.y < sy0
+		|| v[0].p.y > sy1 && v[1].p.y > sy1 && v[2].p.y > sy1)
+			return;
+		break;
+	case 4:
+		if(v[0].p.x < sx0 && v[3].p.x < sx0
+		|| v[0].p.x > sx1 && v[3].p.x > sx1
+		|| v[0].p.y < sy0 && v[3].p.y < sy0
+		|| v[0].p.y > sy1 && v[3].p.y > sy1)
+			return;
+		break;
+	default:
+		__assume(0);
+	}
+
+	if(PRIM->IIP == 0 || PRIM->PRIM == GS_SPRITE)
+	{
+		Vertex::Vector c = v[nv - 1].c;
+
+		for(int i = 0; i < nv - 1; i++) 
+		{
+			v[i].c = c;
+		}
+	}
+
+	m_count += nv;
+}
+/*
+extern int s_n;
+extern bool s_dump;
+extern bool s_save;
+*/
+
+static int bZTE; // , iZTST, iATST, iLOD, bLCM, bTCC, iTFX;
+
+template <class Vertex>
+void GSRendererSW<Vertex>::Draw()
+{
+/*
+if(s_dump)
+{
+	CString str;
+	str.Format(_T("c:\\temp1\\_%05d_f%I64d_tex_%05x_%d.bmp"), s_n++, m_perfmon.GetFrame(), (int)m_context->TEX0.TBP0, (int)m_context->TEX0.PSM);
+	if(PRIM->TME) if(s_save) {m_mem.SetupCLUT32(m_context->TEX0, m_env.TEXA); m_mem.SaveBMP(m_dev, str, m_context->TEX0.TBP0, m_context->TEX0.TBW, m_context->TEX0.PSM, 1 << m_context->TEX0.TW, 1 << m_context->TEX0.TH);}
+
+	str.Format(_T("c:\\temp1\\_%05d_f%I64d_rt0_%05x_%d.bmp"), s_n++, m_perfmon.GetFrame(), m_context->FRAME.Block(), m_context->FRAME.PSM);
+	if(s_save) {m_mem.SaveBMP(m_dev, str, m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, m_regs.GetFrameSize(1).cx, 512);}//m_regs.GetFrameSize(1).cy);
+}
+*/
+	bZTE = m_context->TEST.ZTE && m_context->TEST.ZTST >= 2 || !m_context->ZBUF.ZMSK;
+
+	int iZTST = !m_context->TEST.ZTE ? 1 : m_context->TEST.ZTST;
+	int iATST = !m_context->TEST.ATE ? 1 : m_context->TEST.ATST;
+
+	m_pDrawVertex = m_dv[iZTST][iATST];
+
+	if(PRIM->TME)
+	{
+		int iLOD = (m_context->TEX1.MMAG & 1) + (m_context->TEX1.MMIN & 1);
+		int bLCM = m_context->TEX1.LCM ? 1 : 0;
+		int bTCC = m_context->TEX0.TCC ? 1 : 0;
+		int iTFX = m_context->TEX0.TFX;
+
+		if(PRIM->FST)
+		{
+			iLOD = 3;
+			bLCM = m_context->TEX1.K <= 0 && (m_context->TEX1.MMAG & 1) || m_context->TEX1.K > 0 && (m_context->TEX1.MMIN & 1);
+		}
+
+		if(m_filter != D3D10_FILTER_TYPE_LINEAR)
+		{
+			if(iLOD == 3) bLCM = 0;
+			else iLOD = 0;
+		}
+
+		m_pDrawVertexTFX = m_dvtfx[iLOD][bLCM][bTCC][iTFX];
+
+		SetupTexture();
+	}
+	
+	m_scissor.SetRect(
+		max(m_context->SCISSOR.SCAX0, 0),
+		max(m_context->SCISSOR.SCAY0, 0),
+		min(m_context->SCISSOR.SCAX1 + 1, m_context->FRAME.FBW * 64),
+		min(m_context->SCISSOR.SCAY1 + 1, 4096));
+
+	m_clamp = (m_env.COLCLAMP.CLAMP ? m_clip : m_mask) + 32768;
+
+	int prims = 0;
+
+	Vertex* vertices = m_vertices;
+
+	switch(PRIM->PRIM)
+	{
+	case GS_POINTLIST:
+		prims = m_count;
+		for(int i = 0; i < prims; i++, vertices++) DrawPoint(vertices);
+		break;
+	case GS_LINELIST: 
+	case GS_LINESTRIP: 
+		ASSERT(!(m_count & 1));
+		prims = m_count / 2;
+		for(int i = 0; i < prims; i++, vertices += 2) DrawLine(vertices);
+		break;
+	case GS_TRIANGLELIST: 
+	case GS_TRIANGLESTRIP: 
+	case GS_TRIANGLEFAN:
+		ASSERT(!(m_count % 3));
+		prims = m_count / 3;
+		for(int i = 0; i < prims; i++, vertices += 3) DrawTriangle(vertices);
+		break;
+	case GS_SPRITE:
+		ASSERT(!(m_count & 3));
+		prims = m_count / 4;
+		for(int i = 0; i < prims; i++, vertices += 4) DrawSprite(vertices);
+		break;
+	default:
+		__assume(0);
+	}
+/*
+	m_perfmon.Put(GSPerfMon::Prim, nPrims);
+	m_perfmon.Put(GSPerfMon::Draw, 1);
+
+if(s_dump)
+{
+	CString str;
+	str.Format(_T("c:\\temp1\\_%05d_f%I64d_rt1_%05x_%d.bmp"), s_n++, m_perfmon.GetFrame(), m_context->FRAME.Block(), m_context->FRAME.PSM);
+	if(s_save) {m_mem.SaveBMP(m_dev, str, m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, m_regs.GetFrameSize(1).cx, 512);}//m_regs.GetFrameSize(1).cy);
+}
+*/
+}
+
+template <class Vertex>
+void GSRendererSW<Vertex>::Flip()
+{
+	HRESULT hr;
+
+	FlipInfo src[2];
+
+	for(int i = 0; i < countof(src); i++)
+	{
+		if(!IsEnabled(i))
+		{
+			continue;
+		}
+
+		int w = DISPFB[i]->FBW * 64;
+		int h = GetFrameRect(i).bottom; // TODO: round up
+
+		//GSLocalMemory::RoundUp(, GSLocalMemory::GetBlockSize(DISPFB[i].PSM));
+
+		if(m_texture[i].m_desc.Width != (UINT)w || m_texture[i].m_desc.Height != (UINT)h)
+		{
+			m_texture[i] = GSTexture2D();
+		}
+
+		if(!m_texture[i])
+		{
+			hr = m_dev.CreateTexture(m_texture[i], w, h);
+
+			if(FAILED(hr)) continue;
+		}
+
+		GIFRegTEX0 TEX0;
+
+		TEX0.TBP0 = DISPFB[i]->Block();
+		TEX0.TBW = DISPFB[i]->FBW;
+		TEX0.PSM = DISPFB[i]->PSM;
+
+		GIFRegCLAMP CLAMP;
+
+		CLAMP.WMS = CLAMP.WMT = 1;
+
+		static BYTE* buff = (BYTE*)_aligned_malloc(1024 * 1024 * 4, 16);
+		static int pitch = 1024 * 4;
+
+		m_mem.ReadTexture(CRect(0, 0, w, h), buff, pitch, TEX0, m_env.TEXA, CLAMP);
+
+		D3D10_BOX box = {0, 0, 0, w, h, 1};
+
+		m_dev->UpdateSubresource(m_texture[i], 0, &box, buff, pitch, 0);
+
+		src[i].t = m_texture[i];
+		src[i].s = GSScale(1, 1);
+/*
+if(s_dump)
+{
+	CString str;
+	str.Format(_T("c:\\temp1\\_%05d_f%I64d_fr%d_%05x.bmp"), s_n++, m_perfmon.GetFrame(), i, (int)TEX0.TBP0);
+	if(s_save) ::D3DX10SaveTextureToFile(m_texture[i], D3DX10_IFF_BMP, str);
+}
+*/
+// s_dump = m_perfmon.GetFrame() >= 5000;
+		
+	}
+
+	FinishFlip(src);
+}
+
+template <class Vertex>
+void GSRendererSW<Vertex>::RowInit(int x, int y)
+{
+	m_faddr_x0 = (m_context->ftbl->pa)(0, y, m_context->FRAME.Block(), m_context->FRAME.FBW);
+	m_faddr_ro = &m_context->ftbl->rowOffset[y&7][x];
+
+	if(bZTE)
+	{
+		m_zaddr_x0 = (m_context->ztbl->pa)(0, y, m_context->ZBUF.Block(), m_context->FRAME.FBW);
+		m_zaddr_ro = &m_context->ztbl->rowOffset[y&7][x];
+	}
+
+	m_fx = x-1; // -1 because RowStep() will do +1, yea lame...
+	m_fy = y;
+
+	RowStep();
+}
+
+template <class Vertex>
+void GSRendererSW<Vertex>::RowStep()
+{
+	m_fx++;
+
+	m_faddr = m_faddr_x0 + *m_faddr_ro++;
+
+	if(bZTE)
+	{
+		m_zaddr = m_zaddr_x0 + *m_zaddr_ro++;
+	}
+}
+
+template <class Vertex>
+void GSRendererSW<Vertex>::DrawPoint(Vertex* v)
+{
+	CPoint p = *v;
+
+	if(m_scissor.PtInRect(p))
+	{
+		RowInit(p.x, p.y);
+
+		(this->*m_pDrawVertex)(*v);
+	}
+}
+
+template <class Vertex>
+void GSRendererSW<Vertex>::DrawLine(Vertex* v)
+{
+	Vertex dv = v[1] - v[0];
+
+	Vertex::Vector dp = dv.p;
+
+	dp.x.abs();
+	dp.y.abs();
+
+	int dx = (int)dp.x;
+	int dy = (int)dp.y;
+
+	if(dx == 0 && dy == 0) return;
+
+	int i = dx > dy ? 0 : 1;
+
+	Vertex edge = v[0];
+	Vertex dedge = dv / dp.v[i];
+
+	// TODO: clip with the scissor
+
+	int steps = (int)dp.v[i];
+
+	while(steps-- > 0)
+	{
+		CPoint p = edge;
+
+		if(m_scissor.PtInRect(p))
+		{
+			RowInit(p.x, p.y);
+
+			(this->*m_pDrawVertex)(edge);
+		}
+
+		edge += dedge;
+	}
+}
+
+template <class Vertex>
+void GSRendererSW<Vertex>::DrawTriangle(Vertex* v)
+{
+	if(v[1].p.y < v[0].p.y) {Vertex::Exchange(&v[0], &v[1]);}
+	if(v[2].p.y < v[0].p.y) {Vertex::Exchange(&v[0], &v[2]);}
+	if(v[2].p.y < v[1].p.y) {Vertex::Exchange(&v[1], &v[2]);}
+
+	if(!(v[0].p.y < v[2].p.y)) return;
+
+	Vertex v01 = v[1] - v[0];
+	Vertex v02 = v[2] - v[0];
+
+	Vertex::Scalar temp = v01.p.y / v02.p.y;
+	Vertex::Scalar longest = temp * v02.p.x - v01.p.x;
+
+	int ledge, redge;
+	if(Vertex::Scalar(0) < longest) {ledge = 0; redge = 1; if(longest < Vertex::Scalar(1)) longest = Vertex::Scalar(1);}
+	else if(longest < Vertex::Scalar(0)) {ledge = 1; redge = 0; if(Vertex::Scalar(-1) < longest) longest = Vertex::Scalar(-1);}
+	else return;
+
+	Vertex edge[2] = {v[0], v[0]};
+
+	Vertex dedge[2];
+	dedge[0].p.y = dedge[1].p.y = Vertex::Scalar(1);
+	if(Vertex::Scalar(0) < v01.p.y) dedge[ledge] = v01 / v01.p.y;
+	if(Vertex::Scalar(0) < v02.p.y) dedge[redge] = v02 / v02.p.y;
+
+	Vertex scan;
+
+	Vertex dscan = (v02 * temp - v01) / longest;
+	dscan.p.y = 0;
+
+	for(int i = 0; i < 2; i++, v++)
+	{ 
+		int top = edge[0].p.y.ceil_i();
+		int bottom = v[1].p.y.ceil_i();
+
+		if(top < m_scissor.top) top = min(m_scissor.top, bottom);
+		if(bottom > m_scissor.bottom) bottom = m_scissor.bottom;
+
+		if(edge[0].p.y < Vertex::Scalar(top)) // for(int j = 0; j < 2; j++) edge[j] += dedge[j] * ((float)top - edge[0].p.y);
+		{
+			Vertex::Scalar dy = Vertex::Scalar(top) - edge[0].p.y;
+			edge[0] += dedge[0] * dy;
+			edge[1].p.x += dedge[1].p.x * dy;
+			edge[0].p.y = edge[1].p.y = Vertex::Scalar(top);
+		}
+
+		ASSERT(top >= bottom || (int)((edge[1].p.y - edge[0].p.y) * 10) == 0);
+
+		for(; top < bottom; top++)
+		{
+			int left = edge[0].p.x.ceil_i();
+			int right = edge[1].p.x.ceil_i();
+
+			if(left < m_scissor.left) left = m_scissor.left;
+			if(right > m_scissor.right) right = m_scissor.right;
+
+			if(right > left)
+			{
+				scan = edge[0];
+
+				if(edge[0].p.x < Vertex::Scalar(left))
+				{
+					scan += dscan * (Vertex::Scalar(left) - edge[0].p.x);
+					scan.p.x = Vertex::Scalar(left);
+				}
+
+				RowInit(left, top);
+
+				for(int steps = right - left; steps > 0; steps--)
+				{
+					(this->*m_pDrawVertex)(scan);
+					scan += dscan;
+					RowStep();
+				}
+			}
+
+			// for(int j = 0; j < 2; j++) edge[j] += dedge[j];
+			edge[0] += dedge[0];
+			edge[1].p += dedge[1].p;
+		}
+
+		if(v[1].p.y < v[2].p.y)
+		{
+			edge[ledge] = v[1];
+			dedge[ledge] = (v[2] - v[1]) / (v[2].p.y - v[1].p.y);
+			edge[ledge] += dedge[ledge] * (edge[ledge].p.y.ceil_s() - edge[ledge].p.y);
+		}
+	}
+}
+
+template <class Vertex>
+void GSRendererSW<Vertex>::DrawSprite(Vertex* v)
+{
+	if(v[2].p.y < v[0].p.y) {Vertex::Exchange(&v[0], &v[2]); Vertex::Exchange(&v[1], &v[3]);}
+	if(v[1].p.x < v[0].p.x) {Vertex::Exchange(&v[0], &v[1]); Vertex::Exchange(&v[2], &v[3]);}
+
+	if(v[0].p.x == v[1].p.x || v[0].p.y == v[2].p.y) return;
+
+	Vertex v01 = v[1] - v[0];
+	Vertex v02 = v[2] - v[0];
+
+	Vertex edge = v[0];
+	Vertex dedge = v02 / v02.p.y;
+	Vertex scan;
+	Vertex dscan = v01 / v01.p.x;
+
+	int top = v[0].p.y.ceil_i();
+	int bottom = v[2].p.y.ceil_i();
+
+	if(top < m_scissor.top) top = min(m_scissor.top, bottom);
+	if(bottom > m_scissor.bottom) bottom = m_scissor.bottom;
+
+	if(v[0].p.y < Vertex::Scalar(top)) edge += dedge * (Vertex::Scalar(top) - v[0].p.y);
+
+	int left = v[0].p.x.ceil_i();
+	int right = v[1].p.x.ceil_i();
+
+	if(left < m_scissor.left) left = m_scissor.left;
+	if(right > m_scissor.right) right = m_scissor.right;
+
+	if(left >= right || top >= bottom) return;
+
+	if(v[0].p.x < Vertex::Scalar(left)) edge += dscan * (Vertex::Scalar(left) - v[0].p.x);
+
+	if(DrawFilledRect(left, top, right, bottom, edge))
+		return;
+
+	for(; top < bottom; top++)
+	{
+		scan = edge;
+
+		RowInit(left, top);
+
+		for(int steps = right - left; steps > 0; steps--)
+		{
+			(this->*m_pDrawVertex)(scan);
+			scan += dscan;
+			RowStep();
+		}
+
+		edge += dedge;
+	}
+}
+
+template <class Vertex>
+bool GSRendererSW<Vertex>::DrawFilledRect(int left, int top, int right, int bottom, const Vertex& v)
+{
+	if(left >= right || top >= bottom)
+		return false;
+
+	ASSERT(top >= 0);
+	ASSERT(bottom >= 0);
+
+	if(PRIM->IIP && PRIM->PRIM != GS_SPRITE
+	|| m_context->TEST.ZTE && m_context->TEST.ZTST != 1
+	|| m_context->TEST.ATE && m_context->TEST.ATST != 1
+	|| m_context->TEST.DATE
+	|| PRIM->TME
+	|| PRIM->ABE
+	|| PRIM->FGE
+	|| m_env.DTHE.DTHE
+	|| m_context->FRAME.FBMSK)
+		return false;
+
+	DWORD FBP = m_context->FRAME.Block(), FBW = m_context->FRAME.FBW;
+	DWORD ZBP = m_context->ZBUF.Block();
+
+	if(!m_context->ZBUF.ZMSK)
+	{
+		m_mem.FillRect(CRect(left, top, right, bottom), v.GetZ(), m_context->ZBUF.PSM, ZBP, FBW);
+	}
+
+	__declspec(align(16)) union {struct {short Rf, Gf, Bf, Af;}; UINT64 Cui64;};
+	Cui64 = v.c;
+
+	Rf = m_clamp[Rf];
+	Gf = m_clamp[Gf];
+	Bf = m_clamp[Bf];
+	Af |= m_context->FBA.FBA << 7;
+
+	DWORD Cdw;
+	
+	if(m_context->FRAME.PSM == PSM_PSMCT16 || m_context->FRAME.PSM == PSM_PSMCT16S)
+	{
+		Cdw = ((DWORD)(Rf&0xf8) >> 3)
+			| ((DWORD)(Gf&0xf8) << 2) 
+			| ((DWORD)(Bf&0xf8) << 7) 
+			| ((DWORD)(Af&0x80) << 8);
+	}
+	else
+	{
+#if _M_IX86_FP >= 2 || defined(_M_AMD64)
+		__m128i r0 = _mm_load_si128((__m128i*)&Cui64);
+		Cdw = (DWORD)_mm_cvtsi128_si32(_mm_packus_epi16(r0, r0));
+#else
+		Cdw = ((DWORD)(Rf&0xff) << 0)
+			| ((DWORD)(Gf&0xff) << 8) 
+			| ((DWORD)(Bf&0xff) << 16) 
+			| ((DWORD)(Af&0xff) << 24);
+#endif
+	}
+
+	m_mem.FillRect(CRect(left, top, right, bottom), Cdw, m_context->FRAME.PSM, FBP, FBW);
+
+	return true;
+}
+
+template <class Vertex>
+template <int iZTST, int iATST> 
+void GSRendererSW<Vertex>::DrawVertex(const Vertex& v)
+{
+	DWORD vz;
+
+	switch(iZTST)
+	{
+	case 0: return;
+	case 1: break;
+	case 2: 
+		vz = v.GetZ(); 
+		if(vz < m_mem.readPixelX(m_context->ZBUF.PSM, m_zaddr)) return; 
+		// if(vz < (m_mem.*m_context->ztbl->rpa)(m_zaddr)) return; 
+		break;
+	case 3: 
+		vz = v.GetZ(); 
+		if(vz <= m_mem.readPixelX(m_context->ZBUF.PSM, m_zaddr)) return; 
+		// if(vz <= (m_mem.*m_context->ztbl->rpa)(m_zaddr)) return; 
+		break;
+	default:
+		__assume(0);
+	}
+
+	union
+	{
+		struct {Vertex::Vector Cf, Cd, Ca;};
+		struct {Vertex::Vector Cfda[3];};
+	};
+
+	Cf = v.c;
+
+	if(PRIM->TME)
+	{
+		(this->*m_pDrawVertexTFX)(Cf, v);
+	}
+
+	if(PRIM->FGE)
+	{
+		Vertex::Scalar a = Cf.a;
+		Vertex::Vector Cfog((DWORD)m_env.FOGCOL.ai32[0]);
+		Cf = Cfog + (Cf - Cfog) * v.t.z;
+		Cf.a = a;
+	}
+
+	BOOL ZMSK = m_context->ZBUF.ZMSK;
+	DWORD FBMSK = m_context->FRAME.FBMSK;
+
+	bool fAlphaPass = true;
+
+	BYTE Af = (BYTE)(int)Cf.a;
+
+	switch(iATST)
+	{
+	case 0: fAlphaPass = false; break;
+	case 1: fAlphaPass = true; break;
+	case 2: fAlphaPass = Af < m_context->TEST.AREF; break;
+	case 3: fAlphaPass = Af <= m_context->TEST.AREF; break;
+	case 4: fAlphaPass = Af == m_context->TEST.AREF; break;
+	case 5: fAlphaPass = Af >= m_context->TEST.AREF; break;
+	case 6: fAlphaPass = Af > m_context->TEST.AREF; break;
+	case 7: fAlphaPass = Af != m_context->TEST.AREF; break;
+	default: __assume(0);
+	}
+
+	if(!fAlphaPass)
+	{
+		switch(m_context->TEST.AFAIL)
+		{
+		case 0: return;
+		case 1: ZMSK = 1; break; // RGBA
+		case 2: FBMSK = 0xffffffff; break; // Z
+		case 3: FBMSK = 0xff000000; ZMSK = 1; break; // RGB
+		default: __assume(0);
+		}
+	}
+
+	if(!ZMSK)
+	{
+		if(iZTST != 2 && iZTST != 3) vz = v.GetZ(); 
+		m_mem.writePixelX(m_context->ZBUF.PSM, m_zaddr, vz);
+		// (m_mem.*m_context->ztbl->wpa)(m_zaddr, vz);
+	}
+
+	if(FBMSK != ~0)
+	{
+		if(m_context->TEST.DATE && m_context->FRAME.PSM <= PSM_PSMCT16S && m_context->FRAME.PSM != PSM_PSMCT24)
+		{
+			DWORD c = m_mem.readPixelX(m_context->FRAME.PSM, m_faddr);
+			// DWORD c = (m_mem.*m_context->ftbl->rpa)(m_faddr);
+			BYTE A = (BYTE)(c >> (m_context->FRAME.PSM == PSM_PSMCT32 ? 31 : 15));
+			if(A ^ m_context->TEST.DATM) return;
+		}
+
+		// FIXME: for AA1 the value of Af should be calculated from the pixel coverage...
+
+		bool fABE = (PRIM->ABE || PRIM->AA1 && (PRIM->PRIM == 1 || PRIM->PRIM == 2)) && (!m_env.PABE.PABE || (int)Cf.a >= 0x80);
+
+		if(FBMSK || fABE)
+		{
+			GIFRegTEXA TEXA;
+			/*
+			TEXA.AEM = 0;
+			TEXA.TA0 = 0;
+			TEXA.TA1 = 0x80;
+			*/
+			TEXA.ai32[0] = 0;
+			TEXA.ai32[1] = 0x80;
+
+			Cd = m_mem.readTexelX(m_context->FRAME.PSM, m_faddr, TEXA);
+			// Cd = (m_mem.*m_context->ftbl->rta)(m_faddr, TEXA);
+		}
+
+		if(fABE)
+		{
+			Ca = Vertex::Vector(Vertex::Scalar(0));
+			Ca.a = Vertex::Scalar((int)m_context->ALPHA.FIX);
+
+			Vertex::Scalar a = Cf.a;
+			Cf = ((Cfda[m_context->ALPHA.A] - Cfda[m_context->ALPHA.B]) * Cfda[m_context->ALPHA.C].a >> 7) + Cfda[m_context->ALPHA.D];
+			Cf.a = a;
+		}
+
+		DWORD Cdw; 
+
+		if(m_env.COLCLAMP.CLAMP && !m_env.DTHE.DTHE)
+		{
+			Cdw = Cf;
+		}
+		else
+		{
+			__declspec(align(16)) union {struct {short Rf, Gf, Bf, Af;}; UINT64 Cui64;};
+			Cui64 = Cf;
+
+			if(m_env.DTHE.DTHE)
+			{
+				short DMxy = (signed char)((*((WORD*)&m_env.DIMX.i64 + (m_fy&3)) >> ((m_fx&3)<<2)) << 5) >> 5;
+				Rf = (short)(Rf + DMxy);
+				Gf = (short)(Gf + DMxy);
+				Bf = (short)(Bf + DMxy);
+			}
+
+			Rf = m_clamp[Rf];
+			Gf = m_clamp[Gf];
+			Bf = m_clamp[Bf];
+			Af |= m_context->FBA.FBA << 7;
+
+#if _M_IX86_FP >= 2 || defined(_M_AMD64)
+			__m128i r0 = _mm_load_si128((__m128i*)&Cui64);
+			Cdw = (DWORD)_mm_cvtsi128_si32(_mm_packus_epi16(r0, r0));
+#else
+			Cdw = ((DWORD)(Rf&0xff) << 0)
+				| ((DWORD)(Gf&0xff) << 8) 
+				| ((DWORD)(Bf&0xff) << 16) 
+				| ((DWORD)(Af&0xff) << 24);
+#endif
+		}
+
+		if(FBMSK != 0)
+		{
+			Cdw = (Cdw & ~FBMSK) | ((DWORD)Cd & FBMSK);
+		}
+
+		m_mem.writeFrameX(m_context->FRAME.PSM, m_faddr, Cdw);
+		// (m_mem.*m_context->ftbl->wfa)(m_faddr, Cdw);
+	}
+}
+
+static const float s_one_over_log2 = 1.0f / log(2.0f);
+
+template <class Vertex>
+template <int iLOD, bool bLCM, bool bTCC, int iTFX>
+void GSRendererSW<Vertex>::DrawVertexTFX(typename Vertex::Vector& Cf, const Vertex& v)
+{
+	ASSERT(PRIM->TME);
+	
+	Vertex::Vector t = v.t;
+
+	bool fBiLinear = iLOD == 2; 
+
+	if(iLOD == 3)
+	{
+		fBiLinear = !!bLCM;
+	}
+	else
+	{
+		t.q.rcp();
+		t *= t.q;
+
+		if(iLOD == 1)
+		{
+			float lod = (float)(int)m_context->TEX1.K;
+			if(!bLCM) lod += log(fabs((float)t.q)) * s_one_over_log2 * (1 << m_context->TEX1.L);
+			fBiLinear = lod <= 0 && (m_context->TEX1.MMAG & 1) || lod > 0 && (m_context->TEX1.MMIN & 1);
+		}
+	}
+
+	if(fBiLinear) t -= Vertex::Scalar(0.5f);
+
+	__declspec(align(16)) short ituv[8] = 
+	{
+		(short)(int)t.x, 
+		(short)(int)t.x+1, 
+		(short)(int)t.y, 
+		(short)(int)t.y+1
+	};
+
+#if _M_IX86_FP >= 2 || defined(_M_AMD64)
+
+	__m128i uv = _mm_load_si128((__m128i*)ituv);
+	__m128i mask = _mm_load_si128((__m128i*)m_uv->mask);
+	__m128i region = _mm_or_si128(_mm_and_si128(uv, *(__m128i*)m_uv->and), *(__m128i*)m_uv->or);
+	__m128i clamp = _mm_min_epi16(_mm_max_epi16(uv, *(__m128i*)m_uv->min), *(__m128i*)m_uv->max);
+	_mm_store_si128((__m128i*)ituv, _mm_or_si128(_mm_and_si128(region, mask), _mm_andnot_si128(mask, clamp)));
+
+#else
+
+	for(int i = 0; i < 4; i++)
+	{
+		short region = (ituv[i] & m_uv->and[i]) | m_uv->or[i];
+		short clamp = ituv[i] < m_uv->min[i] ? m_uv->min[i] : ituv[i] > m_uv->max[i] ? m_uv->max[i] : ituv[i];
+		ituv[i] = (region & m_uv->mask[i]) | (clamp & ~m_uv->mask[i]);
+	}
+
+#endif
+
+	Vertex::Vector Ct[4];
+
+	if(fBiLinear)
+	{
+		for(int i = 0; i < 4; i++)
+		{
+			Ct[i] = m_mem.readTexelX(m_context->TEX0.PSM, ituv[i&1], ituv[2+(i>>1)], m_context->TEX0, m_env.TEXA);
+			// Ct[i] = (m_mem.*m_context->ttbl->rt)(ituv[i&1], ituv[2+(i>>1)], m_context->TEX0, m_env.TEXA);
+			// Ct[i] = m_pTexture[(ituv[2+(i>>1)] << m_context->TEX0.TW) + ituv[i&1]];
+		}
+
+		Vertex::Vector ft = t - t.floor();
+
+		Ct[0] = Ct[0] + (Ct[1] - Ct[0]) * ft.x;
+		Ct[2] = Ct[2] + (Ct[3] - Ct[2]) * ft.x;
+		Ct[0] = Ct[0] + (Ct[2] - Ct[0]) * ft.y;
+	}
+	else 
+	{
+		Ct[0] = m_mem.readTexelX(m_context->TEX0.PSM, ituv[0], ituv[2], m_context->TEX0, m_env.TEXA);
+		// Ct[0] = (m_mem.*m_context->ttbl->rt)(ituv[0], ituv[2], m_context->TEX0, m_env.TEXA);
+		// Ct[0] = m_pTexture[(ituv[2] << m_context->TEX0.TW) + ituv[0]];
+	}
+
+	Vertex::Scalar a = Cf.a;
+
+	switch(iTFX)
+	{
+	case 0:
+		Cf = (Cf * Ct[0] >> 7);
+		if(!bTCC) Cf.a = a;
+		break;
+	case 1:
+		Cf = Ct[0];
+		break;
+	case 2:
+		Cf = (Cf * Ct[0] >> 7) + Cf.a;
+		Cf.a = !bTCC ? a : (Ct[0].a + a);
+		break;
+	case 3:
+		Cf = (Cf * Ct[0] >> 7) + Cf.a;
+		Cf.a = !bTCC ? a : Ct[0].a;
+		break;
+	default: 
+		__assume(0);
+	}
+
+	Cf.sat();
+}
+
+template <class Vertex>
+void GSRendererSW<Vertex>::SetupTexture()
+{
+	m_mem.SetupCLUT32(m_context->TEX0, m_env.TEXA);
+
+	//
+
+	int tw = 1 << m_context->TEX0.TW;
+	int th = 1 << m_context->TEX0.TH;
+
+	switch(m_context->CLAMP.WMS)
+	{
+	case 0: m_uv->and[0] = (short)(tw-1); m_uv->or[0] = 0; m_uv->mask[0] = 0xffff; break;
+	case 1: m_uv->min[0] = 0; m_uv->max[0] = (short)(tw-1); m_uv->mask[0] = 0; break;
+	case 2: m_uv->min[0] = (short)m_context->CLAMP.MINU; m_uv->max[0] = (short)m_context->CLAMP.MAXU; m_uv->mask[0] = 0; break;
+	case 3: m_uv->and[0] = (short)m_context->CLAMP.MINU; m_uv->or[0] = (short)m_context->CLAMP.MAXU; m_uv->mask[0] = 0xffff; break;
+	default: __assume(0);
+	}
+
+	m_uv->and[1] = m_uv->and[0];
+	m_uv->or[1] = m_uv->or[0];
+	m_uv->min[1] = m_uv->min[0];
+	m_uv->max[1] = m_uv->max[0];
+	m_uv->mask[1] = m_uv->mask[0];
+
+	switch(m_context->CLAMP.WMT)
+	{
+	case 0: m_uv->and[2] = (short)(th-1); m_uv->or[2] = 0; m_uv->mask[2] = 0xffff; break;
+	case 1: m_uv->min[2] = 0; m_uv->max[2] = (short)(th-1); m_uv->mask[2] = 0; break;
+	case 2: m_uv->min[2] = (short)m_context->CLAMP.MINV; m_uv->max[2] = (short)m_context->CLAMP.MAXV; m_uv->mask[2] = 0; break;
+	case 3: m_uv->and[2] = (short)m_context->CLAMP.MINV; m_uv->or[2] = (short)m_context->CLAMP.MAXV; m_uv->mask[2] = 0xffff; break;
+	default: __assume(0);
+	}
+
+	m_uv->and[3] = m_uv->and[2];
+	m_uv->or[3] = m_uv->or[2];
+	m_uv->min[3] = m_uv->min[2];
+	m_uv->max[3] = m_uv->max[2];
+	m_uv->mask[3] = m_uv->mask[2];
+}
+
+//
+// GSRendererSWFP
+//
+
+GSRendererSWFP::GSRendererSWFP(BYTE* base, bool mt, void (*irq)(), bool nloophack)
+	: GSRendererSW<GSVertexSWFP>(base, mt, irq, nloophack)
+{
+}
+
+void GSRendererSWFP::VertexKick(bool skip)
+{
+	GSVertexSWFP& v = m_vl.AddTail();
+
+	v.p.x = (int)m_v.XYZ.X - (int)m_context->XYOFFSET.OFX;
+	v.p.y = (int)m_v.XYZ.Y - (int)m_context->XYOFFSET.OFY;
+	v.p *= GSVertexSWFP::Scalar(1.0f / 16);
+	v.p.z = (float)m_v.XYZ.Z;
+	//v.p.z = (float)(m_v.XYZ.Z >> 16);
+	//v.p.q = (float)(m_v.XYZ.Z & 0xffff);
+
+	v.c = (DWORD)m_v.RGBAQ.ai32[0];
+
+	if(PRIM->FGE)
+	{
+		v.t.z = (float)m_v.FOG.F * (1.0f / 255);
+	}
+
+	if(PRIM->TME)
+	{
+		if(PRIM->FST)
+		{
+			v.t.x = (float)(int)m_v.UV.U;
+			v.t.y = (float)(int)m_v.UV.V;
+			v.t *= GSVertexSWFP::Scalar(1.0f / 16);
+			v.t.q = 1.0f;
+		}
+		else
+		{
+			v.t.x = m_v.ST.S * (1 << m_context->TEX0.TW);
+			v.t.y = m_v.ST.T * (1 << m_context->TEX0.TH);
+			v.t.q = m_v.RGBAQ.Q;
+		}
+	}
+
+	__super::VertexKick(skip);
+}
+
+#pragma warning(pop)
diff --git a/gsdx10/GSRendererSW.h b/gsdx10/GSRendererSW.h
new file mode 100644
index 0000000..3f4141c
--- /dev/null
+++ b/gsdx10/GSRendererSW.h
@@ -0,0 +1,94 @@
+/* 
+ *	Copyright (C) 2007 Gabest
+ *	http://www.gabest.org
+ *
+ *  This Program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *   
+ *  This Program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *   
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
+ *  http://www.gnu.org/copyleft/gpl.html
+ *
+ */
+
+#pragma once
+
+#include "GSRenderer.h"
+#include "GSVertexSW.h"
+
+template <class Vertex>
+class GSRendererSW : public GSRendererT<Vertex>
+{
+protected:
+	GSTexture2D m_texture[2];
+
+	void DrawingKick(bool skip);
+	void Draw();
+	void Flip();
+
+	DWORD m_faddr_x0, m_faddr;
+	DWORD m_zaddr_x0, m_zaddr;
+	int* m_faddr_ro;
+	int* m_zaddr_ro;
+	int m_fx, m_fy;
+	void RowInit(int x, int y);
+	void RowStep();
+
+	void DrawPoint(Vertex* v);
+	void DrawLine(Vertex* v);
+	void DrawTriangle(Vertex* v);
+	void DrawSprite(Vertex* v);
+	bool DrawFilledRect(int left, int top, int right, int bottom, const Vertex& v);
+
+	template <int iZTST, int iATST> 
+	void DrawVertex(const Vertex& v);
+
+	typedef void (GSRendererSW<Vertex>::*DrawVertexPtr)(const Vertex& v);
+	DrawVertexPtr m_dv[4][8], m_pDrawVertex;
+
+	template <int iLOD, bool bLCM, bool bTCC, int iTFX>
+	void DrawVertexTFX(typename Vertex::Vector& Cf, const Vertex& v);
+
+	typedef void (GSRendererSW<Vertex>::*DrawVertexTFXPtr)(typename Vertex::Vector& Cf, const Vertex& v);
+	DrawVertexTFXPtr m_dvtfx[4][2][2][4], m_pDrawVertexTFX;
+
+	void SetupTexture();
+
+	struct uv_wrap_t {union {struct {short min[8], max[8];}; struct {short and[8], or[8];};}; unsigned short mask[8];}* m_uv;
+
+	CRect m_scissor;
+	BYTE m_clip[65536];
+	BYTE m_mask[65536];
+	BYTE* m_clamp;
+
+public:
+	GSRendererSW(BYTE* base, bool mt, void (*irq)(), bool nloophack);
+	virtual ~GSRendererSW();
+};
+
+class GSRendererSWFP : public GSRendererSW<GSVertexSWFP>
+{
+protected:
+	void VertexKick(bool skip);
+
+public:
+	GSRendererSWFP(BYTE* base, bool mt, void (*irq)(), bool nloophack);
+};
+/*
+class GSRendererSWFX : public GSRendererSW<GSVertexSWFX>
+{
+protected:
+	void VertexKick(bool skip);
+
+public:
+	GSRendererSWFX();
+};
+*/
\ No newline at end of file
diff --git a/gsdx10/GSSettingsDlg.cpp b/gsdx10/GSSettingsDlg.cpp
new file mode 100644
index 0000000..b00444c
--- /dev/null
+++ b/gsdx10/GSSettingsDlg.cpp
@@ -0,0 +1,238 @@
+/* 
+ *	Copyright (C) 2007 Gabest
+ *	http://www.gabest.org
+ *
+ *  This Program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *   
+ *  This Program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *   
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
+ *  http://www.gnu.org/copyleft/gpl.html
+ *
+ */
+
+#include "stdafx.h"
+#include "GSSettingsDlg.h"
+#include <shlobj.h>
+
+GSSetting g_renderers[] =
+{
+	{0, _T("Direct3D10"), NULL},
+	{1, _T("Software"), NULL},
+	{2, _T("Do not render"), NULL},
+};
+
+GSSetting g_interlace[] =
+{
+	{0, _T("None"), NULL},
+	{1, _T("Weave tff"), _T("saw-tooth")},
+	{2, _T("Weave bff"), _T("saw-tooth")},
+	{3, _T("Bob tff"), _T("use blend if shaking")},
+	{4, _T("Bob bff"), _T("use blend if shaking")},
+	{5, _T("Blend tff"), _T("slight blur, 1/2 fps")},
+	{6, _T("Blend bff"), _T("slight blur, 1/2 fps")},
+};
+
+GSSetting g_aspectratio[] =
+{
+	{0, _T("Stretch"), NULL},
+	{1, _T("4:3"), NULL},
+	{2, _T("16:9"), NULL},
+};
+
+IMPLEMENT_DYNAMIC(GSSettingsDlg, CDialog)
+GSSettingsDlg::GSSettingsDlg(CWnd* pParent /*=NULL*/)
+	: CDialog(GSSettingsDlg::IDD, pParent)
+	, m_filter(1)
+	, m_nloophack(2)
+	, m_nativeres(FALSE)
+	, m_vsync(FALSE)
+{
+}
+
+GSSettingsDlg::~GSSettingsDlg()
+{
+}
+
+void GSSettingsDlg::InitComboBox(CComboBox& combobox, const GSSetting* settings, int count, DWORD sel, DWORD maxid)
+{
+	for(int i = 0; i < count; i++)
+	{
+		if(settings[i].id <= maxid)
+		{
+			CString str = settings[i].name;
+			if(settings[i].note != NULL) str = str + _T(" (") + settings[i].note + _T(")");
+			int item = combobox.AddString(str);
+			combobox.SetItemData(item, settings[i].id);
+			if(settings[i].id == sel) combobox.SetCurSel(item);
+		}
+	}
+}
+
+void GSSettingsDlg::DoDataExchange(CDataExchange* pDX)
+{
+	CDialog::DoDataExchange(pDX);
+	DDX_Control(pDX, IDC_COMBO3, m_resolution);
+	DDX_Control(pDX, IDC_COMBO1, m_renderer);
+	DDX_Control(pDX, IDC_COMBO2, m_interlace);
+	DDX_Control(pDX, IDC_COMBO5, m_aspectratio);
+	DDX_Check(pDX, IDC_CHECK4, m_filter);
+	DDX_Check(pDX, IDC_CHECK6, m_nloophack);	
+	DDX_Control(pDX, IDC_SPIN1, m_resx);
+	DDX_Control(pDX, IDC_SPIN2, m_resy);
+	DDX_Check(pDX, IDC_CHECK1, m_nativeres);
+	DDX_Control(pDX, IDC_EDIT1, m_resxedit);
+	DDX_Control(pDX, IDC_EDIT2, m_resyedit);
+	DDX_Check(pDX, IDC_CHECK2, m_vsync);
+}
+
+BEGIN_MESSAGE_MAP(GSSettingsDlg, CDialog)
+	ON_BN_CLICKED(IDC_CHECK1, &GSSettingsDlg::OnBnClickedCheck1)
+END_MESSAGE_MAP()
+
+// GSSettingsDlg message handlers
+
+BOOL GSSettingsDlg::OnInitDialog()
+{
+	__super::OnInitDialog();
+
+    CWinApp* pApp = AfxGetApp();
+
+	m_modes.RemoveAll();
+
+	// windowed
+
+	DXGI_MODE_DESC  mode;
+	memset(&mode, 0, sizeof(mode));
+	m_modes.AddTail(mode);
+
+	int iItem = m_resolution.AddString(_T("Windowed"));
+	m_resolution.SetItemDataPtr(iItem, m_modes.GetTailPosition());
+	m_resolution.SetCurSel(iItem);
+
+	// fullscreen
+/*
+	CComPtr<ID3D10Device> dev;
+
+	if(SUCCEEDED(D3D10CreateDevice(NULL, D3D10_DRIVER_TYPE_HARDWARE, NULL, 0, D3D10_SDK_VERSION, &dev)))
+	{
+		// DXGI_MODE_DESC
+
+		int ModeWidth = pApp->GetProfileInt(_T("Settings"), _T("ModeWidth"), 0);
+		int ModeHeight = pApp->GetProfileInt(_T("Settings"), _T("ModeHeight"), 0);
+		int ModeRefreshRate = pApp->GetProfileInt(_T("Settings"), _T("ModeRefreshRate"), 0);
+
+		UINT nModes = pD3D->GetAdapterModeCount(D3DADAPTER_DEFAULT, D3DFMT_X8R8G8B8);
+
+		for(UINT i = 0; i < nModes; i++)
+		{
+			D3DDISPLAYMODE mode;
+
+			if(S_OK == pD3D->EnumAdapterModes(D3DADAPTER_DEFAULT, D3DFMT_X8R8G8B8, i, &mode))
+			{
+				CString str;
+				str.Format(_T("%dx%d %dHz"), mode.Width, mode.Height, mode.RefreshRate);
+				int iItem = m_resolution.AddString(str);
+
+				m_modes.AddTail(mode);
+				m_resolution.SetItemDataPtr(iItem, m_modes.GetTailPosition());
+
+				if(ModeWidth == mode.Width && ModeHeight == mode.Height && ModeRefreshRate == mode.RefreshRate)
+				{
+					m_resolution.SetCurSel(iItem);
+				}
+			}
+		}
+
+		pD3D->GetDeviceCaps(D3DADAPTER_DEFAULT, D3DDEVTYPE_HAL, &caps);
+	}
+*/
+
+	InitComboBox(m_renderer, g_renderers, countof(g_renderers), pApp->GetProfileInt(_T("Settings"), _T("renderer"), 0));
+	InitComboBox(m_interlace, g_interlace, countof(g_interlace), pApp->GetProfileInt(_T("Settings"), _T("interlace"), 0));
+	InitComboBox(m_aspectratio, g_aspectratio, countof(g_aspectratio), pApp->GetProfileInt(_T("Settings"), _T("aspectratio"), 1));
+
+	//
+
+	m_filter = pApp->GetProfileInt(_T("Settings"), _T("filter"), 1);
+	m_nloophack = pApp->GetProfileInt(_T("Settings"), _T("nloophack"), 2);
+	m_vsync = !!pApp->GetProfileInt(_T("Settings"), _T("vsync"), FALSE);
+
+	m_resx.SetRange(512, 4096);
+	m_resy.SetRange(512, 4096);
+	m_resx.SetPos(pApp->GetProfileInt(_T("Settings"), _T("resx"), 1024));
+	m_resy.SetPos(pApp->GetProfileInt(_T("Settings"), _T("resy"), 1024));
+	m_nativeres = !!pApp->GetProfileInt(_T("Settings"), _T("nativeres"), FALSE);
+
+	m_resx.EnableWindow(!m_nativeres);
+	m_resy.EnableWindow(!m_nativeres);
+	m_resxedit.EnableWindow(!m_nativeres);
+	m_resyedit.EnableWindow(!m_nativeres);
+
+	//
+
+	UpdateData(FALSE);
+
+	return TRUE;  // return TRUE unless you set the focus to a control
+	// EXCEPTION: OCX Property Pages should return FALSE
+}
+
+void GSSettingsDlg::OnOK()
+{
+	CWinApp* pApp = AfxGetApp();
+
+	UpdateData();
+
+	if(m_resolution.GetCurSel() >= 0)
+	{
+        const DXGI_MODE_DESC& mode = m_modes.GetAt((POSITION)m_resolution.GetItemData(m_resolution.GetCurSel()));
+
+		pApp->WriteProfileInt(_T("Settings"), _T("ModeWidth"), mode.Width);
+		pApp->WriteProfileInt(_T("Settings"), _T("ModeHeight"), mode.Height);
+		pApp->WriteProfileInt(_T("Settings"), _T("ModeRefreshRateNumerator"), mode.RefreshRate.Numerator);
+		pApp->WriteProfileInt(_T("Settings"), _T("ModeRefreshRateDenominator"), mode.RefreshRate.Denominator);
+	}
+
+	if(m_renderer.GetCurSel() >= 0)
+	{
+		pApp->WriteProfileInt(_T("Settings"), _T("renderer"), (DWORD)m_renderer.GetItemData(m_renderer.GetCurSel()));
+	}
+
+	if(m_interlace.GetCurSel() >= 0)
+	{
+		pApp->WriteProfileInt(_T("Settings"), _T("interlace"), (DWORD)m_interlace.GetItemData(m_interlace.GetCurSel()));
+	}
+
+	if(m_aspectratio.GetCurSel() >= 0)
+	{
+		pApp->WriteProfileInt(_T("Settings"), _T("aspectratio"), (DWORD)m_aspectratio.GetItemData(m_aspectratio.GetCurSel()));
+	}
+
+	pApp->WriteProfileInt(_T("Settings"), _T("filter"), m_filter);
+	pApp->WriteProfileInt(_T("Settings"), _T("nloophack"), m_nloophack);
+	pApp->WriteProfileInt(_T("Settings"), _T("vsync"), m_vsync);
+
+	pApp->WriteProfileInt(_T("Settings"), _T("resx"), m_resx.GetPos());
+	pApp->WriteProfileInt(_T("Settings"), _T("resy"), m_resy.GetPos());
+	pApp->WriteProfileInt(_T("Settings"), _T("nativeres"), m_nativeres);
+
+	__super::OnOK();
+}
+
+void GSSettingsDlg::OnBnClickedCheck1()
+{
+	UpdateData();
+
+	m_resx.EnableWindow(!m_nativeres);
+	m_resy.EnableWindow(!m_nativeres);
+	m_resxedit.EnableWindow(!m_nativeres);
+	m_resyedit.EnableWindow(!m_nativeres);
+}
diff --git a/gsdx10/GSSettingsDlg.h b/gsdx10/GSSettingsDlg.h
new file mode 100644
index 0000000..41d3030
--- /dev/null
+++ b/gsdx10/GSSettingsDlg.h
@@ -0,0 +1,72 @@
+/* 
+ *	Copyright (C) 2007 Gabest
+ *	http://www.gabest.org
+ *
+ *  This Program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *   
+ *  This Program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *   
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
+ *  http://www.gnu.org/copyleft/gpl.html
+ *
+ */
+
+#pragma once
+
+#include "resource.h"
+#include "afxwin.h"
+#include "afxcmn.h"
+
+struct GSSetting {DWORD id; const TCHAR* name; const TCHAR* note;};
+
+extern GSSetting g_renderers[]; 
+extern GSSetting g_interlace[];
+extern GSSetting g_aspectratio[];
+
+class GSSettingsDlg : public CDialog
+{
+	DECLARE_DYNAMIC(GSSettingsDlg)
+
+private:
+	CAtlList<DXGI_MODE_DESC> m_modes;
+
+	void InitComboBox(CComboBox& combobox, const GSSetting* settings, int count, DWORD sel, DWORD maxid = ~0);
+
+public:
+	GSSettingsDlg(CWnd* pParent = NULL);   // standard constructor
+	virtual ~GSSettingsDlg();
+
+// Dialog Data
+	enum { IDD = IDD_CONFIG };
+	CComboBox m_resolution;
+	CComboBox m_renderer;
+	CComboBox m_interlace;
+	CComboBox m_aspectratio;
+	int m_filter;
+	int m_nloophack;
+	CSpinButtonCtrl m_resx;
+	CSpinButtonCtrl m_resy;
+	BOOL m_nativeres;
+	CEdit m_resxedit;
+	CEdit m_resyedit;
+	BOOL m_vsync;
+
+protected:
+	virtual void DoDataExchange(CDataExchange* pDX);    // DDX/DDV support
+	virtual BOOL OnInitDialog();
+	virtual void OnOK();
+
+	DECLARE_MESSAGE_MAP()
+
+public:
+	afx_msg void OnBnClickedCheck1();
+};
+
diff --git a/gsdx10/GSTexture.cpp b/gsdx10/GSTexture.cpp
new file mode 100644
index 0000000..10b0c0c
--- /dev/null
+++ b/gsdx10/GSTexture.cpp
@@ -0,0 +1,355 @@
+/* 
+ *	Copyright (C) 2007 Gabest
+ *	http://www.gabest.org
+ *
+ *  This Program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *   
+ *  This Program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *   
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
+ *  http://www.gnu.org/copyleft/gpl.html
+ *
+ */
+
+#include "stdafx.h"
+#include "GSTextureCache.h"
+#include "GSRendererHW.h"
+
+GSTextureCache::GSTexture::GSTexture(GSTextureCache* tc) 
+	: GSSurface(tc)
+	, m_valid(0, 0, 0, 0)
+	, m_bpp(0)
+	, m_bpp2(0)
+	, m_rendered(false)
+{
+	memset(m_clut, 0, sizeof(m_clut));
+}
+
+bool GSTextureCache::GSTexture::Create()
+{
+	// m_tc->m_renderer->m_perfmon.Put(GSPerfMon::WriteTexture, 1);
+
+	HRESULT hr;
+
+	m_TEX0 = m_tc->m_renderer->m_context->TEX0;
+	m_CLAMP = m_tc->m_renderer->m_context->CLAMP;
+
+	DWORD psm = m_TEX0.PSM;
+
+	switch(psm)
+	{
+	case PSM_PSMT8:
+	case PSM_PSMT8H:
+	case PSM_PSMT4:
+	case PSM_PSMT4HL:
+	case PSM_PSMT4HH:
+		psm = m_TEX0.CPSM;
+		break;
+	}
+
+	DXGI_FORMAT format;
+
+	switch(psm)
+	{
+	default:
+		ASSERT(0);
+	case PSM_PSMCT32:
+		m_bpp = 32;
+		m_bpp2 = 0;
+		format = DXGI_FORMAT_R8G8B8A8_UNORM;
+		break;
+	case PSM_PSMCT24:
+		m_bpp = 32;
+		m_bpp2 = 1;
+		format = DXGI_FORMAT_R8G8B8A8_UNORM;
+		break;
+	case PSM_PSMCT16:
+	case PSM_PSMCT16S:
+		m_bpp = 16;
+		m_bpp2 = 3;
+		format = DXGI_FORMAT_R16_UNORM;
+		break;
+	}
+
+	int w = 1 << m_TEX0.TW;
+	int h = 1 << m_TEX0.TH;
+
+	hr = m_tc->m_renderer->m_dev.CreateTexture(m_texture, w, h, format);
+
+	return SUCCEEDED(hr);
+}
+
+bool GSTextureCache::GSTexture::Create(GSRenderTarget* rt)
+{
+	rt->Update();
+
+	// m_tc->m_renderer->m_perfmon.Put(GSPerfMon::ConvertRT2T, 1);
+
+	HRESULT hr;
+
+	m_scale = rt->m_scale;
+	m_TEX0 = m_tc->m_renderer->m_context->TEX0;
+	m_CLAMP = m_tc->m_renderer->m_context->CLAMP;
+	m_rendered = true;
+
+	int tw = 1 << m_TEX0.TW;
+	int th = 1 << m_TEX0.TH;
+	int tp = (int)m_TEX0.TW << 6;
+
+	int w = (int)(m_scale.x * tw + 0.5f);
+	int h = (int)(m_scale.y * th + 0.5f);
+
+	// pitch conversion
+
+	if(rt->m_TEX0.TBW != m_TEX0.TBW) // && rt->m_TEX0.PSM == m_TEX0.PSM
+	{
+		// sfex3 uses this trick (bw: 10 -> 5, wraps the right side below the left)
+
+		// ASSERT(rt->m_TEX0.TBW > m_TEX0.TBW); // otherwise scale.x need to be reduced to make the larger texture fit (TODO)
+
+		hr = m_tc->m_renderer->m_dev.CreateRenderTarget(m_texture, rt->m_texture.m_desc.Width, rt->m_texture.m_desc.Height);
+
+		int bw = 64;
+		int bh = m_TEX0.PSM == PSM_PSMCT32 || m_TEX0.PSM == PSM_PSMCT24 ? 32 : 64;
+
+		int sw = (int)rt->m_TEX0.TBW << 6;
+
+		int dw = (int)m_TEX0.TBW << 6;
+		int dh = 1 << m_TEX0.TH;
+
+		for(int dy = 0; dy < dh; dy += bh)
+		{
+			for(int dx = 0; dx < dw; dx += bw)
+			{
+				int o = dy * dw / bh + dx;
+
+				int sx = o % sw;
+				int sy = o / sw;
+
+				D3DXVECTOR4 src, dst;
+
+				src.x = m_scale.x * sx / rt->m_texture.m_desc.Width;
+				src.y = m_scale.y * sy / rt->m_texture.m_desc.Height;
+				src.z = m_scale.x * (sx + bw) / rt->m_texture.m_desc.Width;
+				src.w = m_scale.y * (sy + bh) / rt->m_texture.m_desc.Height;
+
+				dst.x = m_scale.x * dx;
+				dst.y = m_scale.y * dy;
+				dst.z = m_scale.x * (dx + bw);
+				dst.w = m_scale.y * (dy + bh);
+
+				m_tc->m_renderer->m_dev.StretchRect(rt->m_texture, src, m_texture, dst);
+
+				// TODO: this is quite a lot of StretchRect, do it with one Draw
+			}
+		}
+	}
+	else if(tw < tp)
+	{
+		// FIXME: timesplitters blurs the render target by blending itself over a couple of times
+
+		if(tw == 256 && th == 128 && tp == 512 && (m_TEX0.TBP0 == 0 || m_TEX0.TBP0 == 0x00e00))
+		{
+			return false;
+		}
+
+		// TODO
+	}
+
+	// width/height conversion
+
+	if(w != rt->m_texture.m_desc.Width || h != rt->m_texture.m_desc.Height)
+	{
+		D3DXVECTOR4 dst(0, 0, w, h);
+		
+		if(w > rt->m_texture.m_desc.Width) 
+		{
+			float scale = m_scale.x;
+			m_scale.x = (float)rt->m_texture.m_desc.Width / tw;
+			dst.z = (float)rt->m_texture.m_desc.Width * m_scale.x / scale;
+			w = rt->m_texture.m_desc.Width;
+		}
+		
+		if(h > rt->m_texture.m_desc.Height) 
+		{
+			float scale = m_scale.y;
+			m_scale.y = (float)rt->m_texture.m_desc.Height / th;
+			dst.w = (float)rt->m_texture.m_desc.Height * m_scale.y / scale;
+			h = rt->m_texture.m_desc.Height;
+		}
+
+		D3DXVECTOR4 src(0, 0, w, h);
+
+		GSTexture2D* st;
+		GSTexture2D* dt;
+		GSTexture2D tmp;
+
+		if(!m_texture)
+		{
+			st = &rt->m_texture;
+			dt = &m_texture;
+		}
+		else
+		{
+			st = &m_texture;
+			dt = &tmp;
+		}
+
+		hr = m_tc->m_renderer->m_dev.CreateRenderTarget(*dt, w, h);
+
+		if(src == dst)
+		{
+			D3D10_BOX box = {0, 0, 0, w, h, 1};
+
+			m_tc->m_renderer->m_dev->CopySubresourceRegion(*dt, 0, 0, 0, 0, *st, 0, &box);
+		}
+		else
+		{
+			src.z /= st->m_desc.Width;
+			src.w /= st->m_desc.Height;
+
+			m_tc->m_renderer->m_dev.StretchRect(*st, src, *dt, dst);
+		}
+
+		if(tmp)
+		{
+			m_tc->m_renderer->m_dev.Recycle(m_texture);
+
+			m_texture = tmp;
+		}
+	}
+
+	if(!m_texture)
+	{
+		hr = m_tc->m_renderer->m_dev.CreateTexture(m_texture, rt->m_texture.m_desc.Width, rt->m_texture.m_desc.Height);
+
+		m_tc->m_renderer->m_dev->CopyResource(m_texture, rt->m_texture);
+	}
+
+	switch(m_TEX0.PSM)
+	{
+	case PSM_PSMCT32:
+		m_bpp2 = 0;
+		break;
+	case PSM_PSMCT24:
+		m_bpp2 = 1;
+		break;
+	case PSM_PSMCT16:
+	case PSM_PSMCT16S:
+		m_bpp2 = 2;
+		break;
+	case PSM_PSMT8H:
+		m_bpp2 = 4;
+		hr = m_tc->m_renderer->m_dev.CreateTexture(m_palette, 256, 1, m_TEX0.CPSM == PSM_PSMCT32 ? DXGI_FORMAT_R8G8B8A8_UNORM : DXGI_FORMAT_R16_UNORM); // 
+		break;
+	case PSM_PSMT4HL:
+	case PSM_PSMT4HH:
+		ASSERT(0); // TODO
+		break;
+	}
+
+	return true;
+}
+
+bool GSTextureCache::GSTexture::Create(GSDepthStencil* ds)
+{
+	m_rendered = true;
+
+	// TODO
+
+	return false;
+}
+
+void GSTextureCache::GSTexture::Update(GSLocalMemory::readTexture rt)
+{
+	__super::Update();
+
+	if(m_rendered)
+	{
+		return;
+	}
+
+	CRect r;
+
+	if(!GetDirtyRect(r))
+	{
+		return;
+	}
+
+	static BYTE* buff = (BYTE*)::_aligned_malloc(1024 * 1024 * 4, 16);
+
+	int pitch = 1024 * m_bpp >> 3;
+
+	BYTE* bits = buff + pitch * r.top + (r.left * m_bpp >> 3);
+
+	(m_tc->m_renderer->m_mem.*rt)(r, bits, pitch, m_tc->m_renderer->m_context->TEX0, m_tc->m_renderer->m_env.TEXA, m_tc->m_renderer->m_context->CLAMP);
+
+	D3D10_BOX box = {r.left, r.top, 0, r.right, r.bottom, 1};
+
+	m_tc->m_renderer->m_dev->UpdateSubresource(m_texture, 0, &box, bits, pitch, 0); 
+
+	// m_tc->m_renderer->m_perfmon.Put(GSPerfMon::Unswizzle, r.Width() * r.Height() * m_bpp >> 3);
+
+	CRect r2 = m_valid & r;
+
+	if(!r2.IsRectEmpty())
+	{
+		// m_tc->m_renderer->m_perfmon.Put(GSPerfMon::Unswizzle2, r2.Width() * r2.Height() * m_bpp >> 3);
+	}
+
+	m_valid |= r;
+	m_dirty.RemoveAll();
+
+	// m_tc->m_renderer->m_perfmon.Put(GSPerfMon::Texture, r.Width() * r.Height() * m_bpp >> 3);
+}
+
+bool GSTextureCache::GSTexture::GetDirtyRect(CRect& r)
+{
+	int w = 1 << m_TEX0.TW;
+	int h = 1 << m_TEX0.TH;
+
+	r.SetRect(0, 0, w, h);
+
+	m_tc->m_renderer->MinMaxUV(w, h, r);
+
+	CRect dirty = m_dirty.GetDirtyRect(m_TEX0);
+	CRect valid = m_valid;
+
+	dirty &= CRect(0, 0, m_texture.m_desc.Width, m_texture.m_desc.Height);
+
+	if(IsRectInRect(r, valid))
+	{
+		if(dirty.IsRectEmpty()) return false;
+		else if(IsRectInRect(dirty, r)) r = dirty;
+		else if(IsRectInRect(dirty, valid)) r |= dirty;
+		else r = valid & dirty;
+	}
+	else if(IsRectInRectH(r, valid) && (r.left >= valid.left || r.right <= valid.right))
+	{
+		r.top = valid.top;
+		r.bottom = valid.bottom;
+		if(r.left < valid.left) r.right = valid.left;
+		else /*if(r.right > valid.right)*/ r.left = valid.right;
+	}
+	else if(IsRectInRectV(r, valid) && (r.top >= valid.top || r.bottom <= valid.bottom))
+	{
+		r.left = valid.left;
+		r.right = valid.right;
+		if(r.top < valid.top) r.bottom = valid.top;
+		else /*if(r.bottom > valid.bottom)*/ r.top = valid.bottom;
+	}
+	else
+	{
+		r |= valid;
+	}
+
+	return !r.IsRectEmpty();
+}
diff --git a/gsdx10/GSTexture2D.cpp b/gsdx10/GSTexture2D.cpp
new file mode 100644
index 0000000..81027d5
--- /dev/null
+++ b/gsdx10/GSTexture2D.cpp
@@ -0,0 +1,103 @@
+/* 
+ *	Copyright (C) 2007 Gabest
+ *	http://www.gabest.org
+ *
+ *  This Program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *   
+ *  This Program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *   
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
+ *  http://www.gnu.org/copyleft/gpl.html
+ *
+ */
+
+#include "stdafx.h"
+#include "GSTexture2D.h"
+
+GSTexture2D::GSTexture2D()
+{
+	memset(&m_desc, 0, sizeof(m_desc));
+}
+
+GSTexture2D::GSTexture2D(ID3D10Texture2D* texture)
+	: m_texture(texture)
+{
+	ASSERT(m_texture);
+
+	m_texture->GetDevice(&m_dev);
+	m_texture->GetDesc(&m_desc);
+}
+
+GSTexture2D::~GSTexture2D()
+{
+}
+
+GSTexture2D::operator bool()
+{
+	return !!m_texture;
+}
+
+bool GSTexture2D::IsShaderResource() const 
+{
+	return !!(m_desc.BindFlags & D3D10_BIND_SHADER_RESOURCE);
+}
+
+bool GSTexture2D::IsRenderTarget() const
+{
+	return !!(m_desc.BindFlags & D3D10_BIND_RENDER_TARGET);
+}
+
+bool GSTexture2D::IsDepthStencil() const 
+{
+	return !!(m_desc.BindFlags & D3D10_BIND_DEPTH_STENCIL);
+}
+
+ID3D10Texture2D* GSTexture2D::operator->()
+{
+	return m_texture;
+}
+
+GSTexture2D::operator ID3D10Texture2D*()
+{
+	return m_texture;
+}
+
+GSTexture2D::operator ID3D10ShaderResourceView*()
+{
+	if(!m_srv && m_dev && m_texture)
+	{
+		m_dev->CreateShaderResourceView(m_texture, NULL, &m_srv);
+	}
+
+	return m_srv;
+}
+
+GSTexture2D::operator ID3D10RenderTargetView*()
+{
+	ASSERT(m_dev);
+
+	if(!m_rtv && m_dev && m_texture)
+	{
+		m_dev->CreateRenderTargetView(m_texture, NULL, &m_rtv);
+	}
+
+	return m_rtv;
+}
+
+GSTexture2D::operator ID3D10DepthStencilView*()
+{
+	if(!m_dsv && m_dev && m_texture)
+	{
+		m_dev->CreateDepthStencilView(m_texture, NULL, &m_dsv);
+	}
+
+	return m_dsv;
+}
diff --git a/gsdx10/GSTexture2D.h b/gsdx10/GSTexture2D.h
new file mode 100644
index 0000000..1551c95
--- /dev/null
+++ b/gsdx10/GSTexture2D.h
@@ -0,0 +1,51 @@
+/* 
+ *	Copyright (C) 2007 Gabest
+ *	http://www.gabest.org
+ *
+ *  This Program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *   
+ *  This Program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *   
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
+ *  http://www.gnu.org/copyleft/gpl.html
+ *
+ */
+
+#pragma once
+
+class GSTexture2D
+{
+	CComPtr<ID3D10ShaderResourceView> m_srv;
+	CComPtr<ID3D10RenderTargetView> m_rtv;
+	CComPtr<ID3D10DepthStencilView> m_dsv;
+
+public:
+	CComPtr<ID3D10Device> m_dev;
+	CComPtr<ID3D10Texture2D> m_texture;
+	D3D10_TEXTURE2D_DESC m_desc;
+
+	GSTexture2D();
+	explicit GSTexture2D(ID3D10Texture2D* texture);
+	virtual ~GSTexture2D();
+
+	operator bool();
+
+	bool IsShaderResource() const;
+	bool IsRenderTarget() const;
+	bool IsDepthStencil() const;
+
+	ID3D10Texture2D* operator->();
+
+	operator ID3D10Texture2D*();
+	operator ID3D10ShaderResourceView*();
+	operator ID3D10RenderTargetView*();
+	operator ID3D10DepthStencilView*();
+};
diff --git a/gsdx10/GSTextureCache.cpp b/gsdx10/GSTextureCache.cpp
new file mode 100644
index 0000000..6b1a62d
--- /dev/null
+++ b/gsdx10/GSTextureCache.cpp
@@ -0,0 +1,578 @@
+/* 
+ *	Copyright (C) 2007 Gabest
+ *	http://www.gabest.org
+ *
+ *  This Program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *   
+ *  This Program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *   
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
+ *  http://www.gnu.org/copyleft/gpl.html
+ *
+ */
+
+#include "StdAfx.h"
+#include "GSTextureCache.h"
+#include "GSRendererHW.h"
+#include "resource.h"
+
+GSTextureCache::GSTextureCache(GSRendererHW* renderer)
+	: m_renderer(renderer)
+{
+	m_nativeres = !!AfxGetApp()->GetProfileInt(_T("Settings"), _T("nativeres"), FALSE);
+}
+
+GSTextureCache::~GSTextureCache()
+{
+	RemoveAll();
+}
+
+void GSTextureCache::RemoveAll()
+{
+	while(m_rt.GetCount()) delete m_rt.RemoveHead();
+	while(m_ds.GetCount()) delete m_ds.RemoveHead();
+	while(m_tex.GetCount()) delete m_tex.RemoveHead();
+}
+
+GSTextureCache::GSRenderTarget* GSTextureCache::GetRenderTarget(const GIFRegTEX0& TEX0, int w, int h, bool fb)
+{
+	POSITION pos = m_tex.GetHeadPosition();
+
+	while(pos)
+	{
+		POSITION cur = pos;
+
+		GSTexture* t = m_tex.GetNext(pos);
+
+		if(HasSharedBits(TEX0.TBP0, TEX0.PSM, t->m_TEX0.TBP0, t->m_TEX0.PSM))
+		{
+			m_tex.RemoveAt(cur);
+
+			delete t;
+		}
+	}
+
+	GSRenderTarget* rt = NULL;
+
+	if(rt == NULL)
+	{
+		for(POSITION pos = m_rt.GetHeadPosition(); pos; m_rt.GetNext(pos))
+		{
+			GSRenderTarget* rt2 = m_rt.GetAt(pos);
+
+			if(rt2->m_TEX0.TBP0 == TEX0.TBP0)
+			{
+				m_rt.MoveToHead(pos);
+
+				rt = rt2;
+
+				if(!fb) rt->m_TEX0 = TEX0;
+
+				rt->Update();
+
+				break;
+			}
+		}
+	}
+
+	if(rt == NULL && fb)
+	{
+		// HACK: try to find something close to the base pointer
+
+		for(POSITION pos = m_rt.GetHeadPosition(); pos; m_rt.GetNext(pos))
+		{
+			GSRenderTarget* rt2 = m_rt.GetAt(pos);
+
+			if(rt2->m_TEX0.TBP0 <= TEX0.TBP0 && TEX0.TBP0 < rt2->m_TEX0.TBP0 + 0xe00 && (!rt || rt2->m_TEX0.TBP0 >= rt->m_TEX0.TBP0))
+			{
+				rt = rt2;
+			}
+		}
+
+		if(rt)
+		{
+			rt->Update();
+		}
+	}
+
+	if(rt == NULL)
+	{
+		rt = new GSRenderTarget(this);
+
+		rt->m_TEX0 = TEX0;
+
+		if(!rt->Create(w, h))
+		{
+			delete rt;
+
+			return NULL;
+		}
+
+		m_rt.AddHead(rt);
+	}
+
+	if(!m_nativeres)
+	{
+		rt->m_scale.x = (float)w / (m_renderer->GetFramePos().cx + rt->m_TEX0.TBW * 64);
+		rt->m_scale.y = (float)h / (m_renderer->GetFramePos().cy + m_renderer->GetDisplaySize().cy);
+	}
+
+	if(!fb)
+	{
+		rt->m_used = true;
+	}
+
+	return rt;
+}
+
+GSTextureCache::GSDepthStencil* GSTextureCache::GetDepthStencil(const GIFRegTEX0& TEX0, int w, int h)
+{
+	POSITION pos = m_tex.GetHeadPosition();
+
+	while(pos)
+	{
+		POSITION cur = pos;
+
+		GSTexture* t = m_tex.GetNext(pos);
+
+		if(HasSharedBits(TEX0.TBP0, TEX0.PSM, t->m_TEX0.TBP0, t->m_TEX0.PSM))
+		{
+			m_tex.RemoveAt(cur);
+
+			delete t;
+		}
+	}
+
+	GSDepthStencil* ds = NULL;
+
+	if(ds == NULL)
+	{
+		for(POSITION pos = m_ds.GetHeadPosition(); pos; m_ds.GetNext(pos))
+		{
+			GSDepthStencil* ds2 = m_ds.GetAt(pos);
+
+			if(ds2->m_TEX0.TBP0 == TEX0.TBP0)
+			{
+				m_ds.MoveToHead(pos);
+
+				ds = ds2;
+
+				ds->m_TEX0 = TEX0;
+
+				ds->Update();
+
+				break;
+			}
+		}
+	}
+
+	if(ds == NULL)
+	{
+		ds = new GSDepthStencil(this);
+
+		ds->m_TEX0 = TEX0;
+
+		if(!ds->Create(w, h))
+		{
+			delete ds;
+
+			return NULL;
+		}
+
+		m_ds.AddHead(ds);
+	}
+
+	if(!m_renderer->m_context->ZBUF.ZMSK)
+	{
+		ds->m_used = true;
+	}
+
+	return ds;
+}
+
+GSTextureCache::GSTexture* GSTextureCache::GetTexture()
+{
+	const GIFRegTEX0& TEX0 = m_renderer->m_context->TEX0;
+	const GIFRegCLAMP& CLAMP = m_renderer->m_context->CLAMP;
+
+	DWORD clut[256];
+
+	int pal = GSLocalMemory::m_psm[TEX0.PSM].pal;
+
+	if(pal > 0)
+	{
+		m_renderer->m_mem.SetupCLUT(TEX0);
+		m_renderer->m_mem.CopyCLUT32(clut, pal);
+/*
+		POSITION pos = m_tex.GetHeadPosition();
+
+		while(pos)
+		{
+			POSITION cur = pos;
+
+			GSSurface* s = m_tex.GetNext(pos);
+
+			if(s->m_TEX0.TBP0 == TEX0.CBP)
+			{
+				m_tex.RemoveAt(cur);
+
+				delete s;
+			}
+		}
+
+		pos = m_rt.GetHeadPosition();
+
+		while(pos)
+		{
+			POSITION cur = pos;
+
+			GSSurface* s = m_rt.GetNext(pos);
+
+			if(s->m_TEX0.TBP0 == TEX0.CBP)
+			{
+				m_rt.RemoveAt(cur);
+
+				delete s;
+			}
+		}
+
+		pos = m_ds.GetHeadPosition();
+
+		while(pos)
+		{
+			POSITION cur = pos;
+
+			GSSurface* s = m_ds.GetNext(pos);
+
+			if(s->m_TEX0.TBP0 == TEX0.CBP)
+			{
+				m_ds.RemoveAt(cur);
+
+				delete s;
+			}
+		}*/
+	}
+
+	GSTexture* t = NULL;
+
+	for(POSITION pos = m_tex.GetHeadPosition(); pos; m_tex.GetNext(pos))
+	{
+		t = m_tex.GetAt(pos);
+
+		if(HasSharedBits(t->m_TEX0.TBP0, t->m_TEX0.PSM, TEX0.TBP0, TEX0.PSM))
+		{
+			if(TEX0.PSM == t->m_TEX0.PSM && TEX0.TBW == t->m_TEX0.TBW
+			&& TEX0.TW == t->m_TEX0.TW && TEX0.TH == t->m_TEX0.TH
+			&& (CLAMP.WMS != 3 && t->m_CLAMP.WMS != 3 && CLAMP.WMT != 3 && t->m_CLAMP.WMT != 3 || CLAMP.i64 == t->m_CLAMP.i64)
+			&& (pal == 0 || TEX0.CPSM == t->m_TEX0.CPSM && !memcmp(t->m_clut, clut, pal * sizeof(clut[0]))))
+			{
+				m_tex.MoveToHead(pos);
+
+				break;
+			}
+		}
+
+		t = NULL;
+	}
+
+	if(t == NULL)
+	{
+		for(POSITION pos = m_rt.GetHeadPosition(); pos; m_rt.GetNext(pos))
+		{
+			GSRenderTarget* rt = m_rt.GetAt(pos);
+
+			if(rt->m_dirty.IsEmpty() && HasSharedBits(rt->m_TEX0.TBP0, rt->m_TEX0.PSM, TEX0.TBP0, TEX0.PSM))
+			{
+				t = new GSTexture(this);
+
+				if(!t->Create(rt))
+				{
+					delete t;
+
+					return NULL;
+				}
+
+				m_tex.AddHead(t);
+
+				break;
+			}
+		}
+	}
+
+	if(t == NULL)
+	{
+		for(POSITION pos = m_ds.GetHeadPosition(); pos; m_ds.GetNext(pos))
+		{
+			GSDepthStencil* ds = m_ds.GetAt(pos);
+
+			if(ds->m_dirty.IsEmpty() && ds->m_used && HasSharedBits(ds->m_TEX0.TBP0, ds->m_TEX0.PSM, TEX0.TBP0, TEX0.PSM))
+			{
+				t = new GSTexture(this);
+
+				if(!t->Create(ds))
+				{
+					delete t;
+
+					return NULL;
+				}
+
+				m_tex.AddHead(t);
+
+				break;
+			}
+		}
+	}
+
+	if(t == NULL)
+	{
+		t = new GSTexture(this);
+
+		if(!t->Create())
+		{
+			delete t;
+
+			return NULL;
+		}
+
+		m_tex.AddHead(t);
+	}
+
+	if(pal > 0)
+	{
+		int size = pal * sizeof(clut[0]);
+
+		if(t->m_palette)
+		{
+			// TODO: sse2
+
+			DWORD sum = 0;
+			
+			for(int i = 0; i < pal; i++)
+			{
+				sum |= t->m_clut[i] ^ clut[i];
+
+				t->m_clut[i] = clut[i];
+			}
+
+			if(sum != 0) 
+			{
+				D3D10_BOX box = {0, 0, 0, pal, 1, 1};
+
+				m_renderer->m_dev->UpdateSubresource(t->m_palette, 0, &box, t->m_clut, size, 0);
+
+				// m_renderer->m_perfmon.Put(GSPerfMon::Texture, size);
+			}
+		}
+		else
+		{
+			memcpy(t->m_clut, clut, size);
+		}
+	}
+
+	t->Update(&GSLocalMemory::ReadTextureNP);
+
+	return t;
+}
+
+void GSTextureCache::InvalidateTexture(const GIFRegBITBLTBUF& BITBLTBUF, const CRect& r)
+{
+	POSITION pos = m_tex.GetHeadPosition();
+
+	while(pos)
+	{
+		POSITION cur = pos;
+
+		GSTexture* t = m_tex.GetNext(pos);
+
+		if(HasSharedBits(BITBLTBUF.DBP, BITBLTBUF.DPSM, t->m_TEX0.TBP0, t->m_TEX0.PSM))
+		{
+			if(BITBLTBUF.DBW == t->m_TEX0.TBW)
+			{
+				t->m_dirty.AddTail(GSDirtyRect(BITBLTBUF.DPSM, r));
+			}
+			else
+			{
+				m_tex.RemoveAt(cur);
+
+				delete t;
+			}
+		}
+	}
+
+	pos = m_rt.GetHeadPosition();
+
+	while(pos)
+	{
+		POSITION cur = pos;
+
+		GSRenderTarget* rt = m_rt.GetNext(pos);
+
+		if(HasSharedBits(BITBLTBUF.DBP, BITBLTBUF.DPSM, rt->m_TEX0.TBP0, rt->m_TEX0.PSM))
+		{
+			if(BITBLTBUF.DPSM == PSM_PSMCT32 
+			|| BITBLTBUF.DPSM == PSM_PSMCT24 
+			|| BITBLTBUF.DPSM == PSM_PSMCT16 
+			|| BITBLTBUF.DPSM == PSM_PSMCT16S
+			|| BITBLTBUF.DPSM == PSM_PSMZ32 
+			|| BITBLTBUF.DPSM == PSM_PSMZ24 
+			|| BITBLTBUF.DPSM == PSM_PSMZ16 
+			|| BITBLTBUF.DPSM == PSM_PSMZ16S)
+			{
+				rt->m_dirty.AddTail(GSDirtyRect(BITBLTBUF.DPSM, r));
+				rt->m_TEX0.TBW = BITBLTBUF.DBW;
+			}
+			else
+			{
+				m_rt.RemoveAt(cur);
+
+				delete rt;
+
+				continue;
+			}
+		}
+
+		if(HasSharedBits(BITBLTBUF.DPSM, rt->m_TEX0.PSM) && BITBLTBUF.DBP < rt->m_TEX0.TBP0)
+		{
+			DWORD rowsize = BITBLTBUF.DBW * 8192;
+			DWORD offset = (rt->m_TEX0.TBP0 - BITBLTBUF.DBP) * 256;
+
+			if(rowsize > 0 && offset % rowsize == 0)
+			{
+				int y = m_renderer->m_mem.m_psm[BITBLTBUF.DPSM].pgs.cy * offset / rowsize;
+
+				if(r.top >= y)
+				{
+					// TODO: do not add this rect above too
+					rt->m_dirty.AddTail(GSDirtyRect(BITBLTBUF.DPSM, CRect(r.left, r.top - y, r.right, r.bottom - y)));
+					rt->m_TEX0.TBW = BITBLTBUF.DBW;
+					continue;
+				}
+			}
+		}
+	}
+
+	// copypaste for ds
+
+	pos = m_ds.GetHeadPosition();
+
+	while(pos)
+	{
+		POSITION cur = pos;
+
+		GSDepthStencil* ds = m_ds.GetNext(pos);
+
+		if(HasSharedBits(BITBLTBUF.DBP, BITBLTBUF.DPSM, ds->m_TEX0.TBP0, ds->m_TEX0.PSM))
+		{
+			if(BITBLTBUF.DPSM == PSM_PSMCT32 
+			|| BITBLTBUF.DPSM == PSM_PSMCT24 
+			|| BITBLTBUF.DPSM == PSM_PSMCT16 
+			|| BITBLTBUF.DPSM == PSM_PSMCT16S
+			|| BITBLTBUF.DPSM == PSM_PSMZ32 
+			|| BITBLTBUF.DPSM == PSM_PSMZ24 
+			|| BITBLTBUF.DPSM == PSM_PSMZ16 
+			|| BITBLTBUF.DPSM == PSM_PSMZ16S)
+			{
+				ds->m_dirty.AddTail(GSDirtyRect(BITBLTBUF.DPSM, r));
+				ds->m_TEX0.TBW = BITBLTBUF.DBW;
+			}
+			else
+			{
+				m_ds.RemoveAt(cur);
+
+				delete ds;
+
+				continue;
+			}
+		}
+
+		if(HasSharedBits(BITBLTBUF.DPSM, ds->m_TEX0.PSM) && BITBLTBUF.DBP < ds->m_TEX0.TBP0)
+		{
+			DWORD rowsize = BITBLTBUF.DBW * 8192;
+			DWORD offset = (ds->m_TEX0.TBP0 - BITBLTBUF.DBP) * 256;
+
+			if(rowsize > 0 && offset % rowsize == 0)
+			{
+				int y = m_renderer->m_mem.m_psm[BITBLTBUF.DPSM].pgs.cy * offset / rowsize;
+
+				if(r.top >= y)
+				{
+					// TODO: do not add this rect above too
+					ds->m_dirty.AddTail(GSDirtyRect(BITBLTBUF.DPSM, CRect(r.left, r.top - y, r.right, r.bottom - y)));
+					ds->m_TEX0.TBW = BITBLTBUF.DBW;
+					continue;
+				}
+			}
+		}
+	}
+}
+
+void GSTextureCache::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const CRect& r)
+{
+	POSITION pos = m_rt.GetHeadPosition();
+
+	while(pos)
+	{
+		GSRenderTarget* rt = m_rt.GetNext(pos);
+
+		if(HasSharedBits(BITBLTBUF.SBP, BITBLTBUF.SPSM, rt->m_TEX0.TBP0, rt->m_TEX0.PSM))
+		{
+			rt->Read(r);
+			break;
+		}
+	}
+}
+
+void GSTextureCache::IncAge()
+{
+	RecycleByAge(m_tex, 2);
+	RecycleByAge(m_rt);
+	RecycleByAge(m_ds);
+}
+
+template<class T> void GSTextureCache::RecycleByAge(CAtlList<T*>& l, int maxage)
+{
+	POSITION pos = l.GetHeadPosition();
+
+	while(pos)
+	{
+		POSITION cur = pos;
+
+		T* t = l.GetNext(pos);
+
+		if(++t->m_age >= maxage)
+		{
+			l.RemoveAt(cur);
+
+			delete t;
+		}
+	}
+}
+
+//
+
+GSTextureCache::GSSurface::GSSurface(GSTextureCache* tc)
+	: m_tc(tc)
+	, m_scale(1, 1)
+	, m_age(0)
+{
+	m_TEX0.TBP0 = ~0;
+}
+
+GSTextureCache::GSSurface::~GSSurface()
+{
+	m_tc->m_renderer->m_dev.Recycle(m_texture);
+	m_tc->m_renderer->m_dev.Recycle(m_palette);
+}
+
+void GSTextureCache::GSSurface::Update()
+{
+	m_age = 0;
+}
diff --git a/gsdx10/GSTextureCache.h b/gsdx10/GSTextureCache.h
new file mode 100644
index 0000000..64ae419
--- /dev/null
+++ b/gsdx10/GSTextureCache.h
@@ -0,0 +1,116 @@
+/* 
+ *	Copyright (C) 2007 Gabest
+ *	http://www.gabest.org
+ *
+ *  This Program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *   
+ *  This Program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *   
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
+ *  http://www.gnu.org/copyleft/gpl.html
+ *
+ */
+
+#pragma once
+
+#include "GSTexture2D.h"
+
+class GSRendererHW;
+
+class GSTextureCache
+{
+public:
+	class GSSurface
+	{
+	protected:
+		GSTextureCache* m_tc;
+
+	public:
+		GSTexture2D m_texture;
+		GSTexture2D m_palette;
+		GSScale m_scale;
+		int m_age;
+		GSDirtyRectList m_dirty;
+		GIFRegTEX0 m_TEX0;
+
+		explicit GSSurface(GSTextureCache* tc);
+		virtual ~GSSurface();
+
+		void Update();
+	};
+
+	class GSRenderTarget : public GSSurface
+	{
+	public:
+		bool m_used;
+
+		explicit GSRenderTarget(GSTextureCache* tc);
+
+		bool Create(int w, int h);
+		void Update();
+		void Read(CRect r);
+	};
+
+	class GSDepthStencil : public GSSurface
+	{
+	public:
+		bool m_used;
+
+		explicit GSDepthStencil(GSTextureCache* tc);
+
+		bool Create(int w, int h);
+		void Update();
+	};
+
+	class GSTexture : public GSSurface
+	{
+		bool GetDirtyRect(CRect& r);
+
+	public:
+		GIFRegCLAMP m_CLAMP;
+		DWORD m_clut[256]; // *
+		CRect m_valid;
+		int m_bpp;
+		int m_bpp2;
+		bool m_rendered;
+
+		explicit GSTexture(GSTextureCache* tc);
+
+		bool Create();
+		bool Create(GSRenderTarget* rt);
+		bool Create(GSDepthStencil* ds);
+		void Update(GSLocalMemory::readTexture rt);
+	};
+
+protected:
+	GSRendererHW* m_renderer;
+	CAtlList<GSRenderTarget*> m_rt;
+	CAtlList<GSDepthStencil*> m_ds;
+	CAtlList<GSTexture*> m_tex;
+	bool m_nativeres;
+
+	template<class T> void RecycleByAge(CAtlList<T*>& l, int maxage = 10);
+
+public:
+	GSTextureCache(GSRendererHW* renderer);
+	virtual ~GSTextureCache();
+
+	void RemoveAll();
+
+	GSRenderTarget* GetRenderTarget(const GIFRegTEX0& TEX0, int w, int h, bool fb = false);
+	GSDepthStencil* GetDepthStencil(const GIFRegTEX0& TEX0, int w, int h);
+	GSTexture* GetTexture();
+
+	void InvalidateTexture(const GIFRegBITBLTBUF& BITBLTBUF, const CRect& r);
+	void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const CRect& r);
+
+	void IncAge();
+};
diff --git a/gsdx10/GSTextureFX.cpp b/gsdx10/GSTextureFX.cpp
new file mode 100644
index 0000000..b6d30d4
--- /dev/null
+++ b/gsdx10/GSTextureFX.cpp
@@ -0,0 +1,473 @@
+/* 
+ *	Copyright (C) 2007 Gabest
+ *	http://www.gabest.org
+ *
+ *  This Program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *   
+ *  This Program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *   
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
+ *  http://www.gnu.org/copyleft/gpl.html
+ *
+ */
+
+#include "stdafx.h"
+#include "GSTextureFX.h"
+#include "resource.h"
+
+GSTextureFX::GSTextureFX()
+	: m_dev(NULL)
+{
+	memset(m_vb_max, 0, sizeof(m_vb_max));
+	m_vb_cur = 0;
+	memset(&m_vs_cb_cache, 0, sizeof(m_vs_cb_cache));
+	memset(&m_ps_cb_cache, 0, sizeof(m_ps_cb_cache));
+}
+
+bool GSTextureFX::Create(GSDevice* dev)
+{
+	m_dev = dev;
+
+	// shaders
+
+	HRESULT hr;
+
+	D3D10_INPUT_ELEMENT_DESC il[] =
+	{
+		{"POSITION", 0, DXGI_FORMAT_R32G32B32A32_FLOAT, 0, 0, D3D10_INPUT_PER_VERTEX_DATA, 0},
+		{"COLOR", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 16, D3D10_INPUT_PER_VERTEX_DATA, 0},
+		{"COLOR", 1, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 20, D3D10_INPUT_PER_VERTEX_DATA, 0},
+		{"TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 24, D3D10_INPUT_PER_VERTEX_DATA, 0},
+	};
+
+	hr = m_dev->CompileShader(&m_vs, IDR_TFX_FX, "vs_main", il, countof(il), &m_il);
+
+	if(FAILED(hr)) return false;
+
+	// buffers
+
+	D3D10_BUFFER_DESC bd;
+
+	memset(&bd, 0, sizeof(bd));
+
+	bd.ByteWidth = sizeof(VSConstantBuffer);
+	bd.Usage = D3D10_USAGE_DEFAULT;
+	bd.BindFlags = D3D10_BIND_CONSTANT_BUFFER;
+
+	hr = (*m_dev)->CreateBuffer(&bd, NULL, &m_vs_cb);
+
+	if(FAILED(hr)) return false;
+
+	memset(&bd, 0, sizeof(bd));
+
+	bd.ByteWidth = sizeof(PSConstantBuffer);
+	bd.Usage = D3D10_USAGE_DEFAULT;
+	bd.BindFlags = D3D10_BIND_CONSTANT_BUFFER;
+
+	hr = (*m_dev)->CreateBuffer(&bd, NULL, &m_ps_cb);
+
+	if(FAILED(hr)) return false;
+
+	return true;
+}
+
+bool GSTextureFX::SetupIA(const GSVertexHW* vertices, UINT count, D3D10_PRIMITIVE_TOPOLOGY prim)
+{
+	HRESULT hr;
+
+	int i = m_vb_cur;
+
+	m_vb_cur = (m_vb_cur + 1) % countof(m_vb);
+
+	if(m_vb[i])
+	{
+		if(m_vb_max[i] < max(count, 100000))
+		{
+			(*m_dev)->Flush();
+
+			m_vb[i] = NULL;
+		}
+	}
+
+	if(!m_vb[i])
+	{
+		m_vb_max[i] = max(count, 100000);
+
+		D3D10_BUFFER_DESC bd;
+
+		memset(&bd, 0, sizeof(bd));
+
+		bd.Usage = D3D10_USAGE_DEFAULT;
+		bd.ByteWidth = m_vb_max[i] * sizeof(GSVertexHW);
+		bd.BindFlags = D3D10_BIND_VERTEX_BUFFER;
+		bd.CPUAccessFlags = 0;
+		bd.MiscFlags = 0;
+
+		hr = (*m_dev)->CreateBuffer(&bd, NULL, &m_vb[i]);
+
+		if(FAILED(hr)) return false;
+	}
+
+	m_dev->IASet(m_vb[i], count, vertices, m_il, prim);
+
+	return true;
+}
+
+bool GSTextureFX::SetupVS(const VSConstantBuffer* cb)
+{
+	if(memcmp(&m_vs_cb_cache, cb, sizeof(*cb)))
+	{
+		(*m_dev)->UpdateSubresource(m_vs_cb, 0, NULL, cb, 0, 0);
+
+		memcpy(&m_vs_cb_cache, cb, sizeof(*cb));
+	}
+
+	m_dev->VSSet(m_vs, m_vs_cb);
+
+	return true;
+}
+
+bool GSTextureFX::SetupGS(GSSelector sel)
+{
+	HRESULT hr;
+
+	CComPtr<ID3D10GeometryShader> gs;
+
+	if(sel.prim > 0 && (sel.iip == 0 || sel.prim == 3)) // geometry shader works in every case, but not needed
+	{
+		if(!(gs = m_gs.Lookup(sel)))
+		{
+			CStringA str[2];
+
+			str[0].Format("%d", sel.iip);
+			str[1].Format("%d", sel.prim);
+
+			D3D10_SHADER_MACRO macro[] =
+			{
+				{"IIP", str[0]},
+				{"PRIM", str[1]},
+				{NULL, NULL},
+			};
+
+			hr = m_dev->CompileShader(&gs, IDR_TFX_FX, "gs_main", macro);
+
+			ASSERT(SUCCEEDED(hr));
+
+			m_gs.Add(sel, gs);
+		}
+	}
+
+	m_dev->GSSet(gs);
+
+	return true;
+}
+
+bool GSTextureFX::SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel, ID3D10ShaderResourceView* srv, ID3D10ShaderResourceView* pal)
+{
+	if(memcmp(&m_ps_cb_cache, cb, sizeof(*cb)))
+	{
+		(*m_dev)->UpdateSubresource(m_ps_cb, 0, NULL, cb, 0, 0);
+
+		memcpy(&m_ps_cb_cache, cb, sizeof(*cb));
+	}
+
+	(*m_dev)->PSSetConstantBuffers(0, 1, &m_ps_cb.p);
+
+	m_dev->PSSetShaderResources(srv, pal);
+
+	UpdatePS(sel, ssel);
+
+	return true;
+}
+
+void GSTextureFX::UpdatePS(PSSelector sel, PSSamplerSelector ssel)
+{
+	HRESULT hr;
+
+	CComPtr<ID3D10PixelShader> ps;
+
+	if(!(ps = m_ps.Lookup(sel)))
+	{
+		CStringA str[12];
+
+		str[0].Format("%d", sel.fst);
+		str[1].Format("%d", sel.clamp);
+		str[2].Format("%d", sel.bpp);
+		str[3].Format("%d", sel.aem);
+		str[4].Format("%d", sel.tfx);
+		str[5].Format("%d", sel.tcc);
+		str[6].Format("%d", sel.ate);
+		str[7].Format("%d", sel.atst);
+		str[8].Format("%d", sel.fog);
+		str[9].Format("%d", sel.clr1);
+		str[10].Format("%d", sel.fba);
+		str[11].Format("%d", sel.aout);
+
+		D3D10_SHADER_MACRO macro[] =
+		{
+			{"FST", str[0]},
+			{"CLAMP", str[1]},
+			{"BPP", str[2]},
+			{"AEM", str[3]},
+			{"TFX", str[4]},
+			{"TCC", str[5]},
+			{"ATE", str[6]},
+			{"ATST", str[7]},
+			{"FOG", str[8]},
+			{"CLR1", str[9]},
+			{"FBA", str[10]},
+			{"AOUT", str[11]},
+			{NULL, NULL},
+		};
+
+		hr = m_dev->CompileShader(&ps, IDR_TFX_FX, "ps_main", macro);
+
+		ASSERT(SUCCEEDED(hr));
+
+		m_ps.Add(sel, ps);
+	}
+
+	CComPtr<ID3D10SamplerState> ss;
+
+	if(sel.tfx != 4)
+	{
+		if(sel.bpp >= 3) ssel.min = ssel.mag = 0;
+
+		if(!(ss = m_ps_ss.Lookup(ssel)))
+		{
+			D3D10_SAMPLER_DESC sd;
+
+			memset(&sd, 0, sizeof(sd));
+
+			sd.AddressU = ssel.tau ? D3D10_TEXTURE_ADDRESS_WRAP : D3D10_TEXTURE_ADDRESS_CLAMP;
+			sd.AddressV = ssel.tav ? D3D10_TEXTURE_ADDRESS_WRAP : D3D10_TEXTURE_ADDRESS_CLAMP;
+			sd.AddressW = D3D10_TEXTURE_ADDRESS_CLAMP;
+
+			sd.Filter = D3D10_ENCODE_BASIC_FILTER(
+				(ssel.min ? D3D10_FILTER_TYPE_LINEAR : D3D10_FILTER_TYPE_POINT),
+				(ssel.mag ? D3D10_FILTER_TYPE_LINEAR : D3D10_FILTER_TYPE_POINT),
+				D3D10_FILTER_TYPE_POINT,
+				false);
+
+			sd.MaxLOD = FLT_MAX;
+			sd.MaxAnisotropy = 16; 
+			sd.ComparisonFunc = D3D10_COMPARISON_NEVER;
+
+			hr = (*m_dev)->CreateSamplerState(&sd, &ss);
+
+			m_ps_ss.Add(ssel, ss);
+		}
+	}
+
+	m_dev->PSSet(ps, ss);
+}
+
+void GSTextureFX::SetupRS(UINT w, UINT h, const RECT& scissor)
+{
+	m_dev->RSSet(w, h, &scissor);
+}
+
+void GSTextureFX::SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, float bf, ID3D10RenderTargetView* rtv, ID3D10DepthStencilView* dsv)
+{
+	UpdateOM(dssel, bsel, bf);
+
+	m_dev->OMSetRenderTargets(rtv, dsv);
+}
+
+void GSTextureFX::UpdateOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, float bf)
+{
+	HRESULT hr;
+
+	CComPtr<ID3D10DepthStencilState> dss;
+
+	if(!(dss = m_om_dss.Lookup(dssel)))
+	{
+		D3D10_DEPTH_STENCIL_DESC dsd;
+
+		memset(&dsd, 0, sizeof(dsd));
+
+		if(dssel.date)
+		{
+			dsd.StencilEnable = true;
+			dsd.StencilReadMask = 1;
+			dsd.StencilWriteMask = 1;
+			dsd.FrontFace.StencilFunc = D3D10_COMPARISON_EQUAL;
+			dsd.FrontFace.StencilPassOp = D3D10_STENCIL_OP_KEEP;
+			dsd.FrontFace.StencilFailOp = D3D10_STENCIL_OP_KEEP;
+			dsd.FrontFace.StencilDepthFailOp = D3D10_STENCIL_OP_KEEP;
+			dsd.BackFace.StencilFunc = D3D10_COMPARISON_EQUAL;
+			dsd.BackFace.StencilPassOp = D3D10_STENCIL_OP_KEEP;
+			dsd.BackFace.StencilFailOp = D3D10_STENCIL_OP_KEEP;
+			dsd.BackFace.StencilDepthFailOp = D3D10_STENCIL_OP_KEEP;
+		}
+
+		if(!(dssel.zte && dssel.ztst == 1 && !dssel.zwe))
+		{
+			static const D3D10_COMPARISON_FUNC ztst[] = 
+			{
+				D3D10_COMPARISON_NEVER, 
+				D3D10_COMPARISON_ALWAYS, 
+				D3D10_COMPARISON_GREATER_EQUAL, 
+				D3D10_COMPARISON_GREATER
+			};
+
+			dsd.DepthEnable = dssel.zte;
+			dsd.DepthWriteMask = dssel.zwe ? D3D10_DEPTH_WRITE_MASK_ALL : D3D10_DEPTH_WRITE_MASK_ZERO;
+			dsd.DepthFunc = ztst[dssel.ztst];
+		}
+
+		hr = (*m_dev)->CreateDepthStencilState(&dsd, &dss);
+
+		m_om_dss.Add(dssel, dss);
+	}
+
+	CComPtr<ID3D10BlendState> bs;
+
+	if(!(bs = m_om_bs.Lookup(bsel)))
+	{
+		D3D10_BLEND_DESC bd;
+
+		memset(&bd, 0, sizeof(bd));
+
+		bd.BlendEnable[0] = bsel.abe;
+
+		if(bsel.abe)
+		{
+			// (A:Cs/Cd/0 - B:Cs/Cd/0) * C:As/Ad/FIX + D:Cs/Cd/0
+
+			static const struct {int bogus; D3D10_BLEND_OP op; D3D10_BLEND src, dst;} map[3*3*3*3] =
+			{
+				{0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ONE, D3D10_BLEND_ZERO},							// 0000: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + Cs ==> Cs
+				{0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ZERO, D3D10_BLEND_ONE},							// 0001: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + Cd ==> Cd
+				{0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ZERO, D3D10_BLEND_ZERO},						// 0002: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + 0 ==> 0
+				{0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ONE, D3D10_BLEND_ZERO},							// 0010: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + Cs ==> Cs
+				{0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ZERO, D3D10_BLEND_ONE},							// 0011: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + Cd ==> Cd
+				{0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ZERO, D3D10_BLEND_ZERO},						// 0012: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + 0 ==> 0
+				{0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ONE, D3D10_BLEND_ZERO},							// 0020: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + Cs ==> Cs
+				{0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ZERO, D3D10_BLEND_ONE},							// 0021: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + Cd ==> Cd
+				{0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ZERO, D3D10_BLEND_ZERO},						// 0022: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + 0 ==> 0
+				{1, D3D10_BLEND_OP_SUBTRACT, D3D10_BLEND_SRC1_ALPHA, D3D10_BLEND_SRC1_ALPHA},		// * 0100: (Cs - Cd)*As + Cs ==> Cs*(As + 1) - Cd*As
+				{0, D3D10_BLEND_OP_ADD, D3D10_BLEND_SRC1_ALPHA, D3D10_BLEND_INV_SRC1_ALPHA},		// 0101: (Cs - Cd)*As + Cd ==> Cs*As + Cd*(1 - As)
+				{0, D3D10_BLEND_OP_SUBTRACT, D3D10_BLEND_SRC1_ALPHA, D3D10_BLEND_SRC1_ALPHA},		// 0102: (Cs - Cd)*As + 0 ==> Cs*As - Cd*As
+				{1, D3D10_BLEND_OP_SUBTRACT, D3D10_BLEND_DEST_ALPHA, D3D10_BLEND_DEST_ALPHA},		// * 0110: (Cs - Cd)*Ad + Cs ==> Cs*(Ad + 1) - Cd*Ad
+				{0, D3D10_BLEND_OP_ADD, D3D10_BLEND_DEST_ALPHA, D3D10_BLEND_INV_DEST_ALPHA},		// 0111: (Cs - Cd)*Ad + Cd ==> Cs*Ad + Cd*(1 - Ad)
+				{0, D3D10_BLEND_OP_ADD, D3D10_BLEND_DEST_ALPHA, D3D10_BLEND_DEST_ALPHA},			// 0112: (Cs - Cd)*Ad + 0 ==> Cs*Ad - Cd*Ad
+				{1, D3D10_BLEND_OP_SUBTRACT, D3D10_BLEND_BLEND_FACTOR, D3D10_BLEND_BLEND_FACTOR},	// * 0120: (Cs - Cd)*F + Cs ==> Cs*(F + 1) - Cd*F
+				{0, D3D10_BLEND_OP_ADD, D3D10_BLEND_BLEND_FACTOR, D3D10_BLEND_INV_BLEND_FACTOR},	// 0121: (Cs - Cd)*F + Cd ==> Cs*F + Cd*(1 - F)
+				{0, D3D10_BLEND_OP_SUBTRACT, D3D10_BLEND_BLEND_FACTOR, D3D10_BLEND_BLEND_FACTOR},	// 0122: (Cs - Cd)*F + 0 ==> Cs*F - Cd*F
+				{1, D3D10_BLEND_OP_ADD, D3D10_BLEND_SRC1_ALPHA, D3D10_BLEND_ZERO},					// * 0200: (Cs - 0)*As + Cs ==> Cs*(As + 1)
+				{0, D3D10_BLEND_OP_ADD, D3D10_BLEND_SRC1_ALPHA, D3D10_BLEND_ONE},					// 0201: (Cs - 0)*As + Cd ==> Cs*As + Cd
+				{0, D3D10_BLEND_OP_ADD, D3D10_BLEND_SRC1_ALPHA, D3D10_BLEND_ZERO},					// 0202: (Cs - 0)*As + 0 ==> Cs*As
+				{1, D3D10_BLEND_OP_ADD, D3D10_BLEND_SRC1_ALPHA, D3D10_BLEND_ZERO},					// * 0210: (Cs - 0)*Ad + Cs ==> Cs*(Ad + 1)
+				{0, D3D10_BLEND_OP_ADD, D3D10_BLEND_DEST_ALPHA, D3D10_BLEND_ONE},					// 0211: (Cs - 0)*Ad + Cd ==> Cs*Ad + Cd
+				{0, D3D10_BLEND_OP_ADD, D3D10_BLEND_DEST_ALPHA, D3D10_BLEND_ZERO},					// 0212: (Cs - 0)*Ad + 0 ==> Cs*Ad
+				{1, D3D10_BLEND_OP_ADD, D3D10_BLEND_BLEND_FACTOR, D3D10_BLEND_ZERO},				// * 0220: (Cs - 0)*F + Cs ==> Cs*(F + 1)
+				{0, D3D10_BLEND_OP_ADD, D3D10_BLEND_BLEND_FACTOR, D3D10_BLEND_ONE},					// 0221: (Cs - 0)*F + Cd ==> Cs*F + Cd
+				{0, D3D10_BLEND_OP_ADD, D3D10_BLEND_BLEND_FACTOR, D3D10_BLEND_ZERO},				// 0222: (Cs - 0)*F + 0 ==> Cs*F
+				{0, D3D10_BLEND_OP_ADD, D3D10_BLEND_INV_SRC1_ALPHA, D3D10_BLEND_SRC1_ALPHA},		// 1000: (Cd - Cs)*As + Cs ==> Cd*As + Cs*(1 - As)
+				{1, D3D10_BLEND_OP_REV_SUBTRACT, D3D10_BLEND_SRC1_ALPHA, D3D10_BLEND_SRC1_ALPHA},	// * 1001: (Cd - Cs)*As + Cd ==> Cd*(As + 1) - Cs*As
+				{0, D3D10_BLEND_OP_REV_SUBTRACT, D3D10_BLEND_SRC1_ALPHA, D3D10_BLEND_SRC1_ALPHA},	// 1002: (Cd - Cs)*As + 0 ==> Cd*As - Cs*As
+				{0, D3D10_BLEND_OP_ADD, D3D10_BLEND_INV_DEST_ALPHA, D3D10_BLEND_DEST_ALPHA},		// 1010: (Cd - Cs)*Ad + Cs ==> Cd*Ad + Cs*(1 - Ad)
+				{1, D3D10_BLEND_OP_REV_SUBTRACT, D3D10_BLEND_DEST_ALPHA, D3D10_BLEND_DEST_ALPHA},	// * 1011: (Cd - Cs)*Ad + Cd ==> Cd*(Ad + 1) - Cs*Ad
+				{0, D3D10_BLEND_OP_REV_SUBTRACT, D3D10_BLEND_DEST_ALPHA, D3D10_BLEND_DEST_ALPHA},	// 1012: (Cd - Cs)*Ad + 0 ==> Cd*Ad - Cs*Ad
+				{0, D3D10_BLEND_OP_ADD, D3D10_BLEND_INV_BLEND_FACTOR, D3D10_BLEND_BLEND_FACTOR},	// 1020: (Cd - Cs)*F + Cs ==> Cd*F + Cs*(1 - F)
+				{1, D3D10_BLEND_OP_REV_SUBTRACT, D3D10_BLEND_BLEND_FACTOR, D3D10_BLEND_BLEND_FACTOR},// * 1021: (Cd - Cs)*F + Cd ==> Cd*(F + 1) - Cs*F
+				{0, D3D10_BLEND_OP_REV_SUBTRACT, D3D10_BLEND_BLEND_FACTOR, D3D10_BLEND_BLEND_FACTOR},// 1022: (Cd - Cs)*F + 0 ==> Cd*F - Cs*F
+				{0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ONE, D3D10_BLEND_ZERO},							// 1100: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + Cs ==> Cs
+				{0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ZERO, D3D10_BLEND_ONE},							// 1101: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + Cd ==> Cd
+				{0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ZERO, D3D10_BLEND_ZERO},						// 1102: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + 0 ==> 0
+				{0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ONE, D3D10_BLEND_ZERO},							// 1110: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + Cs ==> Cs
+				{0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ZERO, D3D10_BLEND_ONE},							// 1111: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + Cd ==> Cd
+				{0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ZERO, D3D10_BLEND_ZERO},						// 1112: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + 0 ==> 0
+				{0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ONE, D3D10_BLEND_ZERO},							// 1120: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + Cs ==> Cs
+				{0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ZERO, D3D10_BLEND_ONE},							// 1121: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + Cd ==> Cd
+				{0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ZERO, D3D10_BLEND_ZERO},						// 1122: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + 0 ==> 0
+				{0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ONE, D3D10_BLEND_SRC1_ALPHA},					// 1200: (Cd - 0)*As + Cs ==> Cs + Cd*As
+				{2, D3D10_BLEND_OP_ADD, D3D10_BLEND_DEST_COLOR, D3D10_BLEND_SRC1_ALPHA},			// ** 1201: (Cd - 0)*As + Cd ==> Cd*(1 + As)  // ffxii main menu background glow effect
+				{0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ZERO, D3D10_BLEND_SRC1_ALPHA},					// 1202: (Cd - 0)*As + 0 ==> Cd*As
+				{0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ONE, D3D10_BLEND_DEST_ALPHA},					// 1210: (Cd - 0)*Ad + Cs ==> Cs + Cd*Ad
+				{2, D3D10_BLEND_OP_ADD, D3D10_BLEND_DEST_COLOR, D3D10_BLEND_DEST_ALPHA},			// ** 1211: (Cd - 0)*Ad + Cd ==> Cd*(1 + Ad)
+				{0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ZERO, D3D10_BLEND_DEST_ALPHA},					// 1212: (Cd - 0)*Ad + 0 ==> Cd*Ad
+				{0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ONE, D3D10_BLEND_BLEND_FACTOR},					// 1220: (Cd - 0)*F + Cs ==> Cs + Cd*F
+				{2, D3D10_BLEND_OP_ADD, D3D10_BLEND_DEST_COLOR, D3D10_BLEND_BLEND_FACTOR},			// ** 1221: (Cd - 0)*F + Cd ==> Cd*(1 + F)
+				{0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ZERO, D3D10_BLEND_BLEND_FACTOR},				// 1222: (Cd - 0)*F + 0 ==> Cd*F
+				{0, D3D10_BLEND_OP_ADD, D3D10_BLEND_INV_SRC1_ALPHA, D3D10_BLEND_ZERO},				// 2000: (0 - Cs)*As + Cs ==> Cs*(1 - As)
+				{0, D3D10_BLEND_OP_REV_SUBTRACT, D3D10_BLEND_SRC1_ALPHA, D3D10_BLEND_ONE},			// 2001: (0 - Cs)*As + Cd ==> Cd - Cs*As
+				{0, D3D10_BLEND_OP_REV_SUBTRACT, D3D10_BLEND_SRC1_ALPHA, D3D10_BLEND_ZERO},			// 2002: (0 - Cs)*As + 0 ==> 0 - Cs*As
+				{0, D3D10_BLEND_OP_ADD, D3D10_BLEND_INV_DEST_ALPHA, D3D10_BLEND_ZERO},				// 2010: (0 - Cs)*Ad + Cs ==> Cs*(1 - Ad)
+				{0, D3D10_BLEND_OP_REV_SUBTRACT, D3D10_BLEND_DEST_ALPHA, D3D10_BLEND_ONE},			// 2011: (0 - Cs)*Ad + Cd ==> Cd - Cs*Ad
+				{0, D3D10_BLEND_OP_REV_SUBTRACT, D3D10_BLEND_DEST_ALPHA, D3D10_BLEND_ZERO},			// 2012: (0 - Cs)*Ad + 0 ==> 0 - Cs*Ad
+				{0, D3D10_BLEND_OP_ADD, D3D10_BLEND_INV_BLEND_FACTOR, D3D10_BLEND_ZERO},			// 2020: (0 - Cs)*F + Cs ==> Cs*(1 - F)
+				{0, D3D10_BLEND_OP_REV_SUBTRACT, D3D10_BLEND_BLEND_FACTOR, D3D10_BLEND_ONE},		// 2021: (0 - Cs)*F + Cd ==> Cd - Cs*F
+				{0, D3D10_BLEND_OP_REV_SUBTRACT, D3D10_BLEND_BLEND_FACTOR, D3D10_BLEND_ZERO},		// 2022: (0 - Cs)*F + 0 ==> 0 - Cs*F
+				{0, D3D10_BLEND_OP_SUBTRACT, D3D10_BLEND_ONE, D3D10_BLEND_SRC1_ALPHA},				// 2100: (0 - Cd)*As + Cs ==> Cs - Cd*As
+				{0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ZERO, D3D10_BLEND_INV_SRC1_ALPHA},				// 2101: (0 - Cd)*As + Cd ==> Cd*(1 - As)
+				{0, D3D10_BLEND_OP_SUBTRACT, D3D10_BLEND_ZERO, D3D10_BLEND_SRC1_ALPHA},				// 2102: (0 - Cd)*As + 0 ==> 0 - Cd*As
+				{0, D3D10_BLEND_OP_SUBTRACT, D3D10_BLEND_ONE, D3D10_BLEND_DEST_ALPHA},				// 2110: (0 - Cd)*Ad + Cs ==> Cs - Cd*Ad
+				{0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ZERO, D3D10_BLEND_INV_DEST_ALPHA},				// 2111: (0 - Cd)*Ad + Cd ==> Cd*(1 - Ad)
+				{0, D3D10_BLEND_OP_SUBTRACT, D3D10_BLEND_ONE, D3D10_BLEND_DEST_ALPHA},				// 2112: (0 - Cd)*Ad + 0 ==> 0 - Cd*Ad
+				{0, D3D10_BLEND_OP_SUBTRACT, D3D10_BLEND_ONE, D3D10_BLEND_BLEND_FACTOR},			// 2120: (0 - Cd)*F + Cs ==> Cs - Cd*F
+				{0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ZERO, D3D10_BLEND_INV_BLEND_FACTOR},			// 2121: (0 - Cd)*F + Cd ==> Cd*(1 - F)
+				{0, D3D10_BLEND_OP_SUBTRACT, D3D10_BLEND_ONE, D3D10_BLEND_BLEND_FACTOR},			// 2122: (0 - Cd)*F + 0 ==> 0 - Cd*F
+				{0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ONE, D3D10_BLEND_ZERO},							// 2200: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + Cs ==> Cs
+				{0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ZERO, D3D10_BLEND_ONE},							// 2201: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + Cd ==> Cd
+				{0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ZERO, D3D10_BLEND_ZERO},						// 2202: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + 0 ==> 0
+				{0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ONE, D3D10_BLEND_ZERO},							// 2210: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + Cs ==> Cs
+				{0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ZERO, D3D10_BLEND_ONE},							// 2211: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + Cd ==> Cd
+				{0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ZERO, D3D10_BLEND_ZERO},						// 2212: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + 0 ==> 0
+				{0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ONE, D3D10_BLEND_ZERO},							// 2220: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + Cs ==> Cs
+				{0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ZERO, D3D10_BLEND_ONE},							// 2221: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + Cd ==> Cd
+				{0, D3D10_BLEND_OP_ADD, D3D10_BLEND_ZERO, D3D10_BLEND_ZERO},						// 2222: (Cs/Cd/0 - Cs/Cd/0)*As/Ad/F + 0 ==> 0
+			};
+
+			// bogus: 0100, 0110, 0120, 0200, 0210, 0220, 1001, 1011, 1021
+
+			// tricky: 1201, 1211, 1221
+			//
+			// Source.rgb = float3(1, 1, 1);
+			// 1201 Cd*(1 + As) => Source * Dest color + Dest * Source1 alpha
+			// 1211 Cd*(1 + Ad) => Source * Dest color + Dest * Dest alpha
+			// 1221 Cd*(1 + F) => Source * Dest color + Dest * Factor
+
+			int i = (((bsel.a & 3) * 3 + (bsel.b & 3)) * 3 + (bsel.c & 3)) * 3 + (bsel.d & 3);
+
+			ASSERT(bsel.a != 3);
+			ASSERT(bsel.b != 3);
+			ASSERT(bsel.c != 3);
+			ASSERT(bsel.d != 3);
+
+			bd.BlendOp = map[i].op;
+			bd.SrcBlend = map[i].src;
+			bd.DestBlend = map[i].dst;
+			bd.BlendOpAlpha = D3D10_BLEND_OP_ADD;
+			bd.SrcBlendAlpha = D3D10_BLEND_ONE;
+			bd.DestBlendAlpha = D3D10_BLEND_ZERO;
+
+			if(map[i].bogus == 1)
+			{
+				ASSERT(0);
+
+				(bsel.a == 0 ? bd.SrcBlend : bd.DestBlend) = D3D10_BLEND_ONE;
+			}
+		}
+
+		if(bsel.wr) bd.RenderTargetWriteMask[0] |= D3D10_COLOR_WRITE_ENABLE_RED;
+		if(bsel.wg) bd.RenderTargetWriteMask[0] |= D3D10_COLOR_WRITE_ENABLE_GREEN;
+		if(bsel.wb) bd.RenderTargetWriteMask[0] |= D3D10_COLOR_WRITE_ENABLE_BLUE;
+		if(bsel.wa) bd.RenderTargetWriteMask[0] |= D3D10_COLOR_WRITE_ENABLE_ALPHA;
+
+		hr = (*m_dev)->CreateBlendState(&bd, &bs);
+
+		m_om_bs.Add(bsel, bs);
+	}
+
+	m_dev->OMSet(dss, 1, bs, bf);
+}
diff --git a/gsdx10/GSTextureFX.h b/gsdx10/GSTextureFX.h
new file mode 100644
index 0000000..602d27e
--- /dev/null
+++ b/gsdx10/GSTextureFX.h
@@ -0,0 +1,175 @@
+/* 
+ *	Copyright (C) 2007 Gabest
+ *	http://www.gabest.org
+ *
+ *  This Program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *   
+ *  This Program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *   
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
+ *  http://www.gnu.org/copyleft/gpl.html
+ *
+ */
+
+#pragma once
+
+#include "GSVertexHW.h"
+#include "GSDevice.h"
+
+class GSTextureFX
+{
+public:
+	#pragma pack(push, 1)
+
+	struct VSConstantBuffer
+	{
+		D3DXVECTOR4 VertexScale;
+		D3DXVECTOR4 VertexOffset;
+		D3DXVECTOR2 TextureScale;
+		float _pad[2];
+	};
+
+	union GSSelector
+	{
+		struct
+		{
+			DWORD iip:1;
+			DWORD prim:2;
+		};
+
+		DWORD dw;
+
+		operator DWORD() {return dw & 0x7;}
+	};
+
+	struct PSConstantBuffer
+	{
+		D3DXVECTOR4 FogColor;
+		D3DXVECTOR2 ClampMin;
+		D3DXVECTOR2 ClampMax;
+		float TA0;
+		float TA1;
+		float AREF;
+		float _pad[1];
+		D3DXVECTOR2 WH;
+		D3DXVECTOR2 rWrH;
+		D3DXVECTOR2 rWZ;
+		D3DXVECTOR2 ZrH;
+	};
+
+	union PSSelector
+	{
+		struct
+		{
+			DWORD fst:1;
+			DWORD clamp:1;
+			DWORD bpp:3;
+			DWORD aem:1;
+			DWORD tfx:3;
+			DWORD tcc:1;
+			DWORD ate:1;
+			DWORD atst:3;
+			DWORD fog:1;
+			DWORD clr1:1;
+			DWORD fba:1;
+			DWORD aout:1;
+		};
+
+		DWORD dw;
+
+		operator DWORD() {return dw & 0x3ffff;}
+	};
+
+	union PSSamplerSelector
+	{
+		struct
+		{
+			DWORD tau:1;
+			DWORD tav:1;
+			DWORD min:1;
+			DWORD mag:1;
+		};
+
+		DWORD dw;
+
+		operator DWORD() {return dw & 0xf;}
+	};
+
+	union OMDepthStencilSelector
+	{
+		struct
+		{
+			DWORD zte:1;
+			DWORD ztst:2;
+			DWORD zwe:1;
+			DWORD date:1;
+		};
+
+		DWORD dw;
+
+		operator DWORD() {return dw & 0x1f;}
+	};
+
+	union OMBlendSelector
+	{
+		struct
+		{
+			DWORD abe:1;
+			DWORD a:2;
+			DWORD b:2;
+			DWORD c:2;
+			DWORD d:2;
+			DWORD wr:1;
+			DWORD wg:1;
+			DWORD wb:1;
+			DWORD wa:1;
+		};
+
+		DWORD dw;
+
+		operator DWORD() {return dw & 0x1fff;}
+	};
+
+	#pragma pack(pop)
+
+private:
+	GSDevice* m_dev;
+	CComPtr<ID3D10InputLayout> m_il;
+	CComPtr<ID3D10VertexShader> m_vs;
+	CComPtr<ID3D10Buffer> m_vs_cb;
+	CSimpleMap<DWORD, CComPtr<ID3D10GeometryShader> > m_gs;
+	CSimpleMap<DWORD, CComPtr<ID3D10PixelShader> > m_ps;
+	CComPtr<ID3D10Buffer> m_ps_cb;
+	CSimpleMap<DWORD, CComPtr<ID3D10SamplerState> > m_ps_ss;
+	CSimpleMap<DWORD, CComPtr<ID3D10DepthStencilState> > m_om_dss;	
+	CSimpleMap<DWORD, CComPtr<ID3D10BlendState> > m_om_bs;	
+
+	CComPtr<ID3D10Buffer> m_vb[1];
+	int m_vb_max[1];
+	int m_vb_cur;
+
+	VSConstantBuffer m_vs_cb_cache;
+	PSConstantBuffer m_ps_cb_cache;
+	
+public:
+	GSTextureFX();
+
+	bool Create(GSDevice* dev);
+	
+	bool SetupIA(const GSVertexHW* vertices, UINT count, D3D10_PRIMITIVE_TOPOLOGY prim);
+	bool SetupVS(const VSConstantBuffer* cb);
+	bool SetupGS(GSSelector sel);
+	bool SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel, ID3D10ShaderResourceView* srv, ID3D10ShaderResourceView* pal);
+	void UpdatePS(PSSelector sel, PSSamplerSelector ssel);
+	void SetupRS(UINT w, UINT h, const RECT& scissor);
+	void SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, float bf, ID3D10RenderTargetView* rtv, ID3D10DepthStencilView* dsv);
+	void UpdateOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, float bf);
+};
diff --git a/gsdx10/GSVertexHW.h b/gsdx10/GSVertexHW.h
new file mode 100644
index 0000000..4f9f455
--- /dev/null
+++ b/gsdx10/GSVertexHW.h
@@ -0,0 +1,40 @@
+/* 
+ *	Copyright (C) 2007 Gabest
+ *	http://www.gabest.org
+ *
+ *  This Program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *   
+ *  This Program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *   
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
+ *  http://www.gnu.org/copyleft/gpl.html
+ *
+ */
+
+#pragma once
+
+__declspec(align(16)) union GSVertexHW
+{
+	struct
+	{
+		float x, y, z, w;
+		union {struct {BYTE r, g, b, a;}; DWORD c;};
+		DWORD f;
+		float u, v;
+	};
+	
+	struct {__m128i m128i[2];};
+	struct {__m128 m128[2];};
+
+#if _M_IX86_FP >= 2 || defined(_M_AMD64)
+	GSVertexHW& operator = (GSVertexHW& v) {m128i[0] = v.m128i[0]; m128i[1] = v.m128i[1]; return *this;}
+#endif
+};
diff --git a/gsdx10/GSVertexSW.h b/gsdx10/GSVertexSW.h
new file mode 100644
index 0000000..5ea65d2
--- /dev/null
+++ b/gsdx10/GSVertexSW.h
@@ -0,0 +1,315 @@
+/* 
+ *	Copyright (C) 2007 Gabest
+ *	http://www.gabest.org
+ *
+ *  This Program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *   
+ *  This Program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *   
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
+ *  http://www.gnu.org/copyleft/gpl.html
+ *
+ */
+
+#pragma once
+
+//
+// GSVertexSWFP
+//
+
+__declspec(align(16)) union GSVertexSWFP
+{
+	class __declspec(novtable) Scalar
+	{
+		float val;
+
+	public:
+		Scalar() {}
+		explicit Scalar(float f) {val = f;}
+		explicit Scalar(int i) {val = (float)i;}
+
+		float GetValue() const {return val;}
+		void SetValue(int i) {val = (float)i;}
+
+#if _M_IX86_FP >= 2 || defined(_M_AMD64)
+		void sat() {_mm_store_ss(&val, _mm_min_ss(_mm_max_ss(_mm_set_ss(val), _mm_setzero_ps()), _mm_set_ss(255)));}
+		void rcp() {_mm_store_ss(&val, _mm_rcp_ss(_mm_set_ss(val)));}
+#else
+		void sat() {val = val < 0 ? 0 : val > 255 ? 255 : val;}
+		void rcp() {val = 1.0f / val;}
+#endif
+		void abs() {val = fabs(val);}
+
+		Scalar floor_s() const {return Scalar(floor(val));}
+		int floor_i() const {return (int)floor(val);}
+
+		Scalar ceil_s() const {return Scalar(-floor(-val));}
+		int ceil_i() const {return -(int)floor(-val);}
+
+		void operator = (float f) {val = f;}
+		void operator = (int i) {val = (float)i;}
+
+		operator float() const {return val;}
+		operator int() const {return (int)val;}
+
+		void operator += (const Scalar& s) {val += s.val;}
+		void operator -= (const Scalar& s) {val -= s.val;}
+		void operator *= (const Scalar& s) {val *= s.val;}
+		void operator /= (const Scalar& s) {val /= s.val;}
+
+		friend Scalar operator + (const Scalar& s1, const Scalar& s2) {return Scalar(s1.val + s2.val);}
+		friend Scalar operator - (const Scalar& s1, const Scalar& s2) {return Scalar(s1.val - s2.val);}
+		friend Scalar operator * (const Scalar& s1, const Scalar& s2) {return Scalar(s1.val * s2.val);}
+		friend Scalar operator / (const Scalar& s1, const Scalar& s2) {return Scalar(s1.val / s2.val);}
+
+		friend Scalar operator + (const Scalar& s, int i) {return Scalar(s.val + i);}
+		friend Scalar operator - (const Scalar& s, int i) {return Scalar(s.val - i);}
+		friend Scalar operator * (const Scalar& s, int i) {return Scalar(s.val * i);}
+		friend Scalar operator / (const Scalar& s, int i) {return Scalar(s.val / i);}
+
+		friend Scalar operator << (const Scalar& s, int i) {return Scalar(s.val * (1<<i));}
+		friend Scalar operator >> (const Scalar& s, int i) {return Scalar(s.val / (1<<i));}
+
+		friend bool operator == (const Scalar& s1, const Scalar& s2) {return s1.val == s2.val;}
+		friend bool operator <= (const Scalar& s1, const Scalar& s2) {return s1.val <= s2.val;}
+		friend bool operator < (const Scalar& s1, const Scalar& s2) {return s1.val < s2.val;}
+		friend bool operator > (const Scalar& s1, const Scalar& s2) {return s1.val > s2.val;}
+	};
+
+	__declspec(align(16)) class __declspec(novtable) Vector
+	{
+	public:
+		union
+		{
+			union {struct {Scalar x, y, z, q;}; struct {Scalar r, g, b, a;};};
+			union {struct {Scalar v[4];}; struct {Scalar c[4];};};
+#if _M_IX86_FP >= 2 || defined(_M_AMD64)
+			union {__m128 xyzq; __m128 rgba;};
+#endif
+		};
+
+		Vector() {}
+		Vector(const Vector& v) {*this = v;}
+		Vector(Scalar s) {*this = s;}
+		Vector(Scalar s0, Scalar s1, Scalar s2, Scalar s3) {x = s0; y = s1; z = s2; q = s3;}
+		explicit Vector(DWORD dw) {*this = dw;}
+#if _M_IX86_FP >= 2 || defined(_M_AMD64)
+		Vector(__m128 f0123) {*this = f0123;}
+#endif
+
+#if _M_IX86_FP >= 2 || defined(_M_AMD64)
+
+		void operator = (const Vector& v) {xyzq = v.xyzq;}
+		void operator = (Scalar s) {xyzq = _mm_set1_ps(s);}
+
+		void operator = (__m128 f0123) {xyzq = f0123;}
+		operator __m128() const {return xyzq;}
+
+		void operator = (DWORD dw) {__m128i zero = _mm_setzero_si128(); xyzq = _mm_cvtepi32_ps(_mm_unpacklo_epi16(_mm_unpacklo_epi8(_mm_cvtsi32_si128(dw), zero), zero));}
+		operator DWORD() const {__m128i r0 = _mm_cvttps_epi32(xyzq); r0 = _mm_packs_epi32(r0, r0); r0 = _mm_packus_epi16(r0, r0); return (DWORD)_mm_cvtsi128_si32(r0);}
+		operator UINT64() const {__m128i r0 = _mm_cvttps_epi32(xyzq); r0 = _mm_packs_epi32(r0, r0); return *(UINT64*)&r0;}
+
+		void sat() {xyzq = _mm_min_ps(_mm_max_ps(xyzq, _mm_setzero_ps()), _mm_set1_ps(255));}
+		void rcp() {xyzq = _mm_rcp_ps(xyzq);}
+
+		Vector floor()
+		{
+			const __m128i _80000000 = _mm_set1_epi32(0x80000000);
+			const __m128i _4b000000 = _mm_set1_epi32(0x4b000000);
+			const __m128i _3f800000 = _mm_set1_epi32(0x3f800000);
+
+			__m128 sign = _mm_and_ps(xyzq, *(__m128*)&_80000000);
+			__m128 r0 = _mm_or_ps(sign, *(__m128*)&_4b000000);
+			__m128 r1 = _mm_sub_ps(_mm_add_ps(xyzq, r0), r0);
+			__m128 r2 = _mm_sub_ps(r1, xyzq);
+			__m128 r3 = _mm_and_ps(_mm_cmpnle_ps(r2, sign), *(__m128*)&_3f800000);
+			__m128 r4 = _mm_sub_ps(r1, r3);
+			return r4;
+		}
+
+		void operator += (const Vector& v) {xyzq = _mm_add_ps(xyzq, v);}
+		void operator -= (const Vector& v) {xyzq = _mm_sub_ps(xyzq, v);}
+		void operator *= (const Vector& v) {xyzq = _mm_mul_ps(xyzq, v);}
+		void operator /= (const Vector& v) {xyzq = _mm_div_ps(xyzq, v);}
+
+#else
+
+		void operator = (const Vector& v) {x = v.x; y = v.y; z = v.z; q = v.q;}
+		void operator = (Scalar s) {x = y = z = q = s;}
+
+		void operator = (DWORD dw)
+		{
+			x = Scalar((int)((dw>>0)&0xff));
+			y = Scalar((int)((dw>>8)&0xff));
+			z = Scalar((int)((dw>>16)&0xff));
+			q = Scalar((int)((dw>>24)&0xff));
+		}
+
+		operator DWORD() const
+		{
+			return (DWORD)(
+				(((DWORD)(int)x&0xff)<<0) |
+				(((DWORD)(int)y&0xff)<<8) |
+				(((DWORD)(int)z&0xff)<<16) |
+				(((DWORD)(int)q&0xff)<<24));
+		}
+
+		operator UINT64() const
+		{
+			return (DWORD)(
+				(((UINT64)(int)x&0xffff)<<0) |
+				(((UINT64)(int)y&0xffff)<<16) |
+				(((UINT64)(int)z&0xffff)<<32) |
+				(((UINT64)(int)q&0xffff)<<48));
+		}
+
+		void sat() {x.sat(); y.sat(); z.sat(); q.sat();}
+		void rcp() {x.rcp(); y.rcp(); z.rcp(); q.rcp();}
+		
+		Vector floor() {return Vector(x.floor_s(), y.floor_s(), z.floor_s(), q.floor_s());}
+
+		void operator += (const Vector& v) {*this = *this + v;}
+		void operator -= (const Vector& v) {*this = *this - v;}
+		void operator *= (const Vector& v) {*this = *this * v;}
+		void operator /= (const Vector& v) {*this = *this / v;}
+
+#endif
+
+		friend Vector operator + (const Vector& v1, const Vector& v2);
+		friend Vector operator - (const Vector& v1, const Vector& v2);
+		friend Vector operator * (const Vector& v1, const Vector& v2);
+		friend Vector operator / (const Vector& v1, const Vector& v2);
+
+		friend Vector operator + (const Vector& v, Scalar s);
+		friend Vector operator - (const Vector& v, Scalar s);
+		friend Vector operator * (const Vector& v, Scalar s);
+		friend Vector operator / (const Vector& v, Scalar s);
+	};
+
+	struct {__declspec(align(16)) Vector c, p, t;};
+	struct {__declspec(align(16)) Vector sv[3];};
+	struct {__declspec(align(16)) Scalar s[12];};
+
+	GSVertexSWFP() {}
+	GSVertexSWFP(const GSVertexSWFP& v) {*this = v;}
+
+	void operator = (const GSVertexSWFP& v) {c = v.c; p = v.p; t = v.t;}
+	void operator += (const GSVertexSWFP& v) {c += v.c; p += v.p; t += v.t;}
+
+	operator CPoint() const {return CPoint((int)p.x, (int)p.y);}
+
+	__forceinline DWORD GetZ() const 
+	{
+		return (int)p.z;
+
+		ASSERT((float)p.z >= 0 && (float)p.q >= 0);
+#if _M_IX86_FP >= 2 || defined(_M_AMD64)
+		__m128 z = _mm_shuffle_ps(p, p, _MM_SHUFFLE(2,2,2,2));
+		__m128 q = _mm_shuffle_ps(p, p, _MM_SHUFFLE(3,3,3,3));
+		// TODO: check if our floor is faster than doing ss->si->ss
+		int zh = _mm_cvttss_si32(z);
+		__m128 zhi = _mm_cvtsi32_ss(zhi, zh);
+		__m128 zhf = _mm_mul_ss(_mm_sub_ss(z, zhi), _mm_set_ss(65536));
+		int zl = _mm_cvtss_si32(_mm_add_ss(zhf, q));
+		return ((DWORD)zh << 16) + (DWORD)zl;
+#else
+		// return ((DWORD)(int)p.z << 16) + (DWORD)(int)((p.z - p.z.floor_s())*65536 + p.q);
+
+		int z = (int)p.z;
+		return ((DWORD)z << 16) + (DWORD)(((float)p.z - z)*65536 + (float)p.q);
+#endif
+	}
+
+	friend GSVertexSWFP operator + (const GSVertexSWFP& v1, const GSVertexSWFP& v2);
+	friend GSVertexSWFP operator - (const GSVertexSWFP& v1, const GSVertexSWFP& v2);
+	friend GSVertexSWFP operator * (const GSVertexSWFP& v, Scalar s);
+	friend GSVertexSWFP operator / (const GSVertexSWFP& v, Scalar s);
+
+	static void Exchange(GSVertexSWFP* RESTRICT v1, GSVertexSWFP* RESTRICT v2)
+	{
+		Vector c = v1->c, p = v1->p, t = v1->t;
+		v1->c = v2->c; v1->p = v2->p; v1->t = v2->t;
+		v2->c = c; v2->p = p; v2->t = t;
+	}
+};
+
+#if _M_IX86_FP >= 2 || defined(_M_AMD64)
+
+__forceinline GSVertexSWFP::Vector operator + (const GSVertexSWFP::Vector& v1, const GSVertexSWFP::Vector& v2) {return GSVertexSWFP::Vector(_mm_add_ps(v1, v2));}
+__forceinline GSVertexSWFP::Vector operator - (const GSVertexSWFP::Vector& v1, const GSVertexSWFP::Vector& v2) {return GSVertexSWFP::Vector(_mm_sub_ps(v1, v2));}
+__forceinline GSVertexSWFP::Vector operator * (const GSVertexSWFP::Vector& v1, const GSVertexSWFP::Vector& v2) {return GSVertexSWFP::Vector(_mm_mul_ps(v1, v2));}
+__forceinline GSVertexSWFP::Vector operator / (const GSVertexSWFP::Vector& v1, const GSVertexSWFP::Vector& v2) {return GSVertexSWFP::Vector(_mm_div_ps(v1, v2));}
+
+__forceinline GSVertexSWFP::Vector operator + (const GSVertexSWFP::Vector& v, GSVertexSWFP::Scalar s) {return GSVertexSWFP::Vector(_mm_add_ps(v, _mm_set1_ps(s)));}
+__forceinline GSVertexSWFP::Vector operator - (const GSVertexSWFP::Vector& v, GSVertexSWFP::Scalar s) {return GSVertexSWFP::Vector(_mm_sub_ps(v, _mm_set1_ps(s)));}
+__forceinline GSVertexSWFP::Vector operator * (const GSVertexSWFP::Vector& v, GSVertexSWFP::Scalar s) {return GSVertexSWFP::Vector(_mm_mul_ps(v, _mm_set1_ps(s)));}
+__forceinline GSVertexSWFP::Vector operator / (const GSVertexSWFP::Vector& v, GSVertexSWFP::Scalar s) {return GSVertexSWFP::Vector(_mm_div_ps(v, _mm_set1_ps(s)));}
+
+__forceinline GSVertexSWFP::Vector operator << (const GSVertexSWFP::Vector& v, int i) {return GSVertexSWFP::Vector(_mm_mul_ps(v, _mm_set1_ps((float)(1 << i))));}
+__forceinline GSVertexSWFP::Vector operator >> (const GSVertexSWFP::Vector& v, int i) {return GSVertexSWFP::Vector(_mm_mul_ps(v, _mm_set1_ps(1.0f / (1 << i))));}
+
+#else
+
+__forceinline GSVertexSWFP::Vector operator + (const GSVertexSWFP::Vector& v1, const GSVertexSWFP::Vector& v2) {return GSVertexSWFP::Vector(v1.x + v2.x, v1.y + v2.y, v1.z + v2.z, v1.q + v2.q);}
+__forceinline GSVertexSWFP::Vector operator - (const GSVertexSWFP::Vector& v1, const GSVertexSWFP::Vector& v2) {return GSVertexSWFP::Vector(v1.x - v2.x, v1.y - v2.y, v1.z - v2.z, v1.q - v2.q);}
+__forceinline GSVertexSWFP::Vector operator * (const GSVertexSWFP::Vector& v1, const GSVertexSWFP::Vector& v2) {return GSVertexSWFP::Vector(v1.x * v2.x, v1.y * v2.y, v1.z * v2.z, v1.q * v2.q);}
+__forceinline GSVertexSWFP::Vector operator / (const GSVertexSWFP::Vector& v1, const GSVertexSWFP::Vector& v2) {return GSVertexSWFP::Vector(v1.x / v2.x, v1.y / v2.y, v1.z / v2.z, v1.q / v2.q);}
+
+__forceinline GSVertexSWFP::Vector operator + (const GSVertexSWFP::Vector& v, GSVertexSWFP::Scalar s) {return GSVertexSWFP::Vector(v.x + s, v.y + s, v.z + s, v.q + s);}
+__forceinline GSVertexSWFP::Vector operator - (const GSVertexSWFP::Vector& v, GSVertexSWFP::Scalar s) {return GSVertexSWFP::Vector(v.x - s, v.y - s, v.z - s, v.q - s);}
+__forceinline GSVertexSWFP::Vector operator * (const GSVertexSWFP::Vector& v, GSVertexSWFP::Scalar s) {return GSVertexSWFP::Vector(v.x * s, v.y * s, v.z * s, v.q * s);}
+__forceinline GSVertexSWFP::Vector operator / (const GSVertexSWFP::Vector& v, GSVertexSWFP::Scalar s) {return GSVertexSWFP::Vector(v.x / s, v.y / s, v.z / s, v.q / s);}
+
+__forceinline GSVertexSWFP::Vector operator << (const GSVertexSWFP::Vector& v, int i) {return GSVertexSWFP::Vector(v.x << i, v.y << i, v.z << i, v.q << i);}
+__forceinline GSVertexSWFP::Vector operator >> (const GSVertexSWFP::Vector& v, int i) {return GSVertexSWFP::Vector(v.x >> i, v.y >> i, v.z >> i, v.q >> i);}
+
+#endif
+
+__forceinline GSVertexSWFP operator + (const GSVertexSWFP& v1, const GSVertexSWFP& v2)
+{
+	GSVertexSWFP v0;
+	v0.c = v1.c + v2.c;
+	v0.p = v1.p + v2.p;
+	v0.t = v1.t + v2.t;
+	return v0;
+}
+
+__forceinline GSVertexSWFP operator - (const GSVertexSWFP& v1, const GSVertexSWFP& v2)
+{
+	GSVertexSWFP v0;
+	v0.c = v1.c - v2.c;
+	v0.p = v1.p - v2.p;
+	v0.t = v1.t - v2.t;
+	return v0;
+}
+
+__forceinline GSVertexSWFP operator * (const GSVertexSWFP& v, GSVertexSWFP::Scalar s)
+{
+	GSVertexSWFP v0;
+	GSVertexSWFP::Vector vs(s);
+	v0.c = v.c * vs;
+	v0.p = v.p * vs;
+	v0.t = v.t * vs;
+	return v0;
+}
+
+__forceinline GSVertexSWFP operator / (const GSVertexSWFP& v, GSVertexSWFP::Scalar s)
+{
+	GSVertexSWFP v0;
+	GSVertexSWFP::Vector vs(s);
+	v0.c = v.c / vs;
+	v0.p = v.p / vs;
+	v0.t = v.t / vs;
+	return v0;
+}
+
+// #include "GSVertexSWFX.h"
+
diff --git a/gsdx10/GSdx10.cpp b/gsdx10/GSdx10.cpp
new file mode 100644
index 0000000..e6ab6c4
--- /dev/null
+++ b/gsdx10/GSdx10.cpp
@@ -0,0 +1,326 @@
+/* 
+ *	Copyright (C) 2007 Gabest
+ *	http://www.gabest.org
+ *
+ *  This Program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *   
+ *  This Program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *   
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
+ *  http://www.gnu.org/copyleft/gpl.html
+ *
+ */
+
+#include "stdafx.h"
+#include "GSdx10.h"
+#include "GSRendererHW.h"
+#include "GSRendererSW.h"
+#include "GSRendererNull.h"
+#include "GSSettingsDlg.h"
+
+#ifdef _DEBUG
+#define new DEBUG_NEW
+#endif
+
+//
+//	Note!
+//
+//		If this DLL is dynamically linked against the MFC
+//		DLLs, any functions exported from this DLL which
+//		call into MFC must have the AFX_MANAGE_STATE macro
+//		added at the very beginning of the function.
+//
+//		For example:
+//
+//		extern "C" BOOL PASCAL EXPORT ExportedFunction()
+//		{
+//			AFX_MANAGE_STATE(AfxGetStaticModuleState());
+//			// normal function body here
+//		}
+//
+//		It is very important that this macro appear in each
+//		function, prior to any calls into MFC.  This means that
+//		it must appear as the first statement within the 
+//		function, even before any object variable declarations
+//		as their constructors may generate calls into the MFC
+//		DLL.
+//
+//		Please see MFC Technical Notes 33 and 58 for additional
+//		details.
+//
+
+BEGIN_MESSAGE_MAP(GSdx10App, CWinApp)
+END_MESSAGE_MAP()
+
+GSdx10App::GSdx10App()
+{
+}
+
+GSdx10App theApp;
+
+BOOL GSdx10App::InitInstance()
+{
+	__super::InitInstance();
+
+	SetRegistryKey(_T("Gabest"));
+
+	return TRUE;
+}
+
+//
+
+#define PS2E_LT_GS 0x01
+#define PS2E_GS_VERSION 0x0006
+#define PS2E_X86 0x01   // 32 bit
+#define PS2E_X86_64 0x02   // 64 bit
+
+EXPORT_C_(UINT32) PS2EgetLibType()
+{
+	return PS2E_LT_GS;
+}
+
+EXPORT_C_(char*) PS2EgetLibName()
+{
+	CString str = _T("GSdx10");
+
+#if _M_AMD64
+	str += _T(" 64-bit");
+#endif
+
+	CAtlList<CString> sl;
+
+#ifdef __INTEL_COMPILER
+	CString s;
+	s.Format(_T("Intel C++ %d.%02d"), __INTEL_COMPILER/100, __INTEL_COMPILER%100);
+	sl.AddTail(s);
+#elif _MSC_VER
+	CString s;
+	s.Format(_T("MSVC %d.%02d"), _MSC_VER/100, _MSC_VER%100);
+	sl.AddTail(s);
+#endif
+
+#if _M_IX86_FP >= 2
+	sl.AddTail(_T("SSE2"));
+#elif _M_IX86_FP >= 1
+	sl.AddTail(_T("SSE"));
+#endif
+
+	POSITION pos = sl.GetHeadPosition();
+
+	while(pos)
+	{
+		if(pos == sl.GetHeadPosition()) str += _T(" (");
+		str += sl.GetNext(pos);
+		str += pos ? _T(", ") : _T(")");
+	}
+
+	static char buff[256];
+	strncpy(buff, CStringA(str), min(countof(buff)-1, str.GetLength()));
+	return buff;
+}
+
+EXPORT_C_(UINT32) PS2EgetLibVersion2(UINT32 type)
+{
+	const UINT32 revision = 0;
+	const UINT32 build = 1;
+	const UINT32 minor = 0;
+
+	return (build << 0) | (revision << 8) | (PS2E_GS_VERSION << 16) | (minor << 24);
+}
+
+EXPORT_C_(UINT32) PS2EgetCpuPlatform()
+{
+#if _M_AMD64
+	return PS2E_X86_64;
+#else
+	return PS2E_X86;
+#endif
+}
+
+//////////////////
+
+static HRESULT s_hr = E_FAIL;
+static GSRenderer* s_gs;
+static void (*s_irq)() = NULL;
+static BYTE* s_basemem = NULL;
+
+EXPORT_C GSsetBaseMem(BYTE* mem)
+{
+	s_basemem = mem - 0x12000000;
+}
+
+EXPORT_C_(INT32) GSinit()
+{
+	AFX_MANAGE_STATE(AfxGetStaticModuleState());
+
+	return 0;
+}
+
+EXPORT_C GSshutdown()
+{
+	AFX_MANAGE_STATE(AfxGetStaticModuleState());
+}
+
+EXPORT_C GSclose()
+{
+	delete s_gs; 
+	
+	s_gs = NULL;
+
+	if(SUCCEEDED(s_hr))
+	{
+		::CoUninitialize();
+
+		s_hr = E_FAIL;
+	}
+}
+
+EXPORT_C_(INT32) GSopen(void* dsp, char* title, int mt)
+{
+	AFX_MANAGE_STATE(AfxGetStaticModuleState());
+
+	GSclose();
+
+	bool nloophack = AfxGetApp()->GetProfileInt(_T("Settings"), _T("nloophack"), 2) == 1;
+
+	switch(AfxGetApp()->GetProfileInt(_T("Settings"), _T("renderer"), 0))
+	{
+	case 0: s_gs = new GSRendererHW(s_basemem, !!mt, s_irq, nloophack); break;
+	case 1: s_gs = new GSRendererSWFP(s_basemem, !!mt, s_irq, nloophack); break;
+	case 2: s_gs = new GSRendererNull(s_basemem, !!mt, s_irq, nloophack); break;
+	default: return -1;
+	}
+
+	s_hr = ::CoInitialize(0);
+
+	if(!s_gs->Create(CString(title)))
+	{
+		GSclose();
+		return -1;
+	}
+
+	s_gs->Show();
+
+	*(HWND*)dsp = *s_gs;
+
+	return 0;
+}
+
+EXPORT_C GSreset()
+{
+	s_gs->Reset();
+}
+
+EXPORT_C GSwriteCSR(UINT32 csr)
+{
+	s_gs->WriteCSR(csr);
+}
+
+EXPORT_C GSreadFIFO(BYTE* mem)
+{
+	s_gs->ReadFIFO(mem, 1);
+}
+
+EXPORT_C GSreadFIFO2(BYTE* mem, UINT32 size)
+{
+	s_gs->ReadFIFO(mem, size);
+}
+
+EXPORT_C GSgifTransfer1(BYTE* mem, UINT32 addr)
+{
+	s_gs->Transfer(mem + addr, (0x4000 - addr) / 16, 0);
+}
+
+EXPORT_C GSgifTransfer2(BYTE* mem, UINT32 size)
+{
+	s_gs->Transfer(mem, size, 1);
+}
+
+EXPORT_C GSgifTransfer3(BYTE* mem, UINT32 size)
+{
+	s_gs->Transfer(mem, size, 2);
+}
+
+EXPORT_C GSvsync(int field)
+{
+	s_gs->VSync(field);
+}
+
+EXPORT_C_(UINT32) GSmakeSnapshot(char* path)
+{
+	return s_gs->MakeSnapshot(path);
+}
+
+EXPORT_C GSkeyEvent(keyEvent* ev)
+{
+}
+
+EXPORT_C_(INT32) GSfreeze(int mode, freezeData* data)
+{
+	if(mode == FREEZE_SAVE)
+	{
+		return s_gs->Freeze(data, false);
+	}
+	else if(mode == FREEZE_SIZE)
+	{
+		return s_gs->Freeze(data, true);
+	}
+	else if(mode == FREEZE_LOAD)
+	{
+		return s_gs->Defrost(data);
+	}
+
+	return 0;
+}
+
+EXPORT_C GSconfigure()
+{
+	AFX_MANAGE_STATE(AfxGetStaticModuleState());
+
+	if(IDOK == GSSettingsDlg().DoModal())
+	{
+		GSshutdown();
+		GSinit();
+	}
+}
+
+EXPORT_C_(INT32) GStest()
+{
+	AFX_MANAGE_STATE(AfxGetStaticModuleState());
+
+	CComPtr<ID3D10Device> dev;
+
+	return SUCCEEDED(D3D10CreateDevice(NULL, D3D10_DRIVER_TYPE_HARDWARE, NULL, 0, D3D10_SDK_VERSION, &dev)) ? 0 : -1;
+}
+
+EXPORT_C GSabout()
+{
+}
+
+EXPORT_C GSirqCallback(void (*irq)())
+{
+	s_irq = irq;
+}
+
+EXPORT_C GSsetGameCRC(int crc, int options)
+{
+	s_gs->SetGameCRC(crc, options);
+}
+
+EXPORT_C GSgetLastTag(UINT32* tag) 
+{
+	s_gs->GetLastTag(tag);
+}
+
+EXPORT_C GSsetFrameSkip(int frameskip)
+{
+	s_gs->SetFrameSkip(frameskip);
+}
diff --git a/gsdx10/GSdx10.def b/gsdx10/GSdx10.def
new file mode 100644
index 0000000..693232b
--- /dev/null
+++ b/gsdx10/GSdx10.def
@@ -0,0 +1,33 @@
+; GSdx10.def : Declares the module parameters for the DLL.
+
+LIBRARY      "GSdx10"
+
+EXPORTS
+    ; Explicit exports can go here
+	PS2EgetLibType		
+	PS2EgetLibName		
+	PS2EgetLibVersion2	
+	PS2EgetCpuPlatform
+	GSsetBaseMem
+	GSinit				
+	GSshutdown			
+	GSopen				
+	GSclose		
+	GSreset
+	GSwriteCSR		
+	GSgifTransfer1		
+	GSgifTransfer2		
+	GSgifTransfer3		
+	GSvsync				
+	GSmakeSnapshot		
+	GSkeyEvent			
+	GSfreeze            
+	GSconfigure			
+	GStest				
+	GSabout				
+	GSreadFIFO
+	GSreadFIFO2
+	GSirqCallback
+	GSsetGameCRC
+	GSsetFrameSkip
+	; GSReplay
\ No newline at end of file
diff --git a/gsdx10/GSdx10.h b/gsdx10/GSdx10.h
new file mode 100644
index 0000000..8424900
--- /dev/null
+++ b/gsdx10/GSdx10.h
@@ -0,0 +1,37 @@
+/* 
+ *	Copyright (C) 2007 Gabest
+ *	http://www.gabest.org
+ *
+ *  This Program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; either version 2, or (at your option)
+ *  any later version.
+ *   
+ *  This Program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ *  GNU General Public License for more details.
+ *   
+ *  You should have received a copy of the GNU General Public License
+ *  along with GNU Make; see the file COPYING.  If not, write to
+ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. 
+ *  http://www.gnu.org/copyleft/gpl.html
+ *
+ */
+
+#pragma once
+
+#ifndef __AFXWIN_H__
+	#error include 'stdafx.h' before including this file for PCH
+#endif
+
+class GSdx10App : public CWinApp
+{
+public:
+	GSdx10App();
+
+public:
+	virtual BOOL InitInstance();
+
+	DECLARE_MESSAGE_MAP()
+};
diff --git a/gsdx10/GSdx10.rc b/gsdx10/GSdx10.rc
new file mode 100644
index 0000000..e1b800a
--- /dev/null
+++ b/gsdx10/GSdx10.rc
@@ -0,0 +1,197 @@
+// Microsoft Visual C++ generated resource script.
+//
+#include "resource.h"
+
+#define APSTUDIO_READONLY_SYMBOLS
+/////////////////////////////////////////////////////////////////////////////
+//
+// Generated from the TEXTINCLUDE 2 resource.
+//
+#include "afxres.h"
+
+/////////////////////////////////////////////////////////////////////////////
+#undef APSTUDIO_READONLY_SYMBOLS
+
+/////////////////////////////////////////////////////////////////////////////
+// Hungarian resources
+
+#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_HUN)
+#ifdef _WIN32
+LANGUAGE LANG_HUNGARIAN, SUBLANG_DEFAULT
+#pragma code_page(1250)
+#endif //_WIN32
+
+#ifdef APSTUDIO_INVOKED
+/////////////////////////////////////////////////////////////////////////////
+//
+// TEXTINCLUDE
+//
+
+1 TEXTINCLUDE 
+BEGIN
+    "resource.h\0"
+END
+
+2 TEXTINCLUDE 
+BEGIN
+    "#include ""afxres.h""\r\n"
+    "\0"
+END
+
+3 TEXTINCLUDE 
+BEGIN
+    "#define _AFX_NO_SPLITTER_RESOURCES\r\n"
+    "#define _AFX_NO_OLE_RESOURCES\r\n"
+    "#define _AFX_NO_TRACKER_RESOURCES\r\n"
+    "#define _AFX_NO_PROPERTY_RESOURCES\r\n"
+    "\r\n"
+    "#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU)\r\n"
+    "LANGUAGE 9, 1\r\n"
+    "#pragma code_page(1252)\r\n"
+    "#include ""res\\GSdx10.rc2""  // non-Microsoft Visual C++ edited resources\r\n"
+    "#include ""afxres.rc""     // Standard components\r\n"
+    "#endif\r\n"
+    "\0"
+END
+
+#endif    // APSTUDIO_INVOKED
+
+#endif    // Hungarian resources
+/////////////////////////////////////////////////////////////////////////////
+
+
+/////////////////////////////////////////////////////////////////////////////
+// English (U.S.) resources
+
+#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU)
+#ifdef _WIN32
+LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US
+#pragma code_page(1252)
+#endif //_WIN32
+
+/////////////////////////////////////////////////////////////////////////////
+//
+// Bitmap
+//
+
+IDB_LOGO1               BITMAP                  "res\\logo1.bmp"
+
+/////////////////////////////////////////////////////////////////////////////
+//
+// Dialog
+//
+
+IDD_CONFIG DIALOGEX 0, 0, 189, 204
+STYLE DS_SETFONT | DS_MODALFRAME | DS_FIXEDSYS | WS_POPUP | WS_CAPTION | WS_SYSMENU
+CAPTION "Settings..."
+FONT 8, "MS Shell Dlg", 400, 0, 0x1
+BEGIN
+    CONTROL         2023,IDC_STATIC,"Static",SS_BITMAP,7,7,175,44,WS_EX_CLIENTEDGE
+    LTEXT           "Resolution:",IDC_STATIC,7,59,37,8
+    COMBOBOX        IDC_COMBO3,71,57,111,125,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP
+    LTEXT           "Renderer:",IDC_STATIC,7,74,34,8
+    COMBOBOX        IDC_COMBO1,71,72,111,69,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP
+    LTEXT           "Interlacing (F5):",IDC_STATIC,7,90,53,8
+    COMBOBOX        IDC_COMBO2,71,87,111,98,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP
+    LTEXT           "Aspect Ratio (F6):",IDC_STATIC,7,105,60,8
+    COMBOBOX        IDC_COMBO5,71,102,111,98,CBS_DROPDOWNLIST | WS_VSCROLL | WS_TABSTOP
+    LTEXT           "D3D internal res:",IDC_STATIC,7,120,55,8
+    EDITTEXT        IDC_EDIT1,71,117,35,13,ES_AUTOHSCROLL | ES_NUMBER
+    CONTROL         "",IDC_SPIN1,"msctls_updown32",UDS_SETBUDDYINT | UDS_ALIGNRIGHT | UDS_AUTOBUDDY | UDS_ARROWKEYS | UDS_NOTHOUSANDS,99,132,11,14
+    EDITTEXT        IDC_EDIT2,109,117,35,13,ES_AUTOHSCROLL | ES_NUMBER
+    CONTROL         "",IDC_SPIN2,"msctls_updown32",UDS_SETBUDDYINT | UDS_ALIGNRIGHT | UDS_AUTOBUDDY | UDS_ARROWKEYS | UDS_NOTHOUSANDS,134,132,11,14
+    CONTROL         "Native",IDC_CHECK1,"Button",BS_AUTOCHECKBOX | WS_TABSTOP,149,119,33,10
+    CONTROL         "Texture filtering",IDC_CHECK4,"Button",BS_AUTO3STATE | WS_TABSTOP,7,139,67,10
+    CONTROL         "Enable NLOOP hack (third state => auto)",IDC_CHECK6,
+                    "Button",BS_AUTO3STATE | WS_TABSTOP,7,151,149,10
+    DEFPUSHBUTTON   "OK",IDOK,42,183,50,14
+    PUSHBUTTON      "Cancel",IDCANCEL,96,183,50,14
+    CONTROL         "Wait vsync",IDC_CHECK2,"Button",BS_AUTOCHECKBOX | WS_DISABLED | WS_TABSTOP,7,163,51,10
+END
+
+
+/////////////////////////////////////////////////////////////////////////////
+//
+// DESIGNINFO
+//
+
+#ifdef APSTUDIO_INVOKED
+GUIDELINES DESIGNINFO 
+BEGIN
+    IDD_CONFIG, DIALOG
+    BEGIN
+        LEFTMARGIN, 7
+        RIGHTMARGIN, 182
+        VERTGUIDE, 71
+        VERTGUIDE, 182
+        TOPMARGIN, 7
+        BOTTOMMARGIN, 197
+    END
+END
+#endif    // APSTUDIO_INVOKED
+
+
+/////////////////////////////////////////////////////////////////////////////
+//
+// Version
+//
+
+VS_VERSION_INFO VERSIONINFO
+ FILEVERSION 1,0,0,0
+ PRODUCTVERSION 1,0,0,0
+ FILEFLAGSMASK 0x3fL
+#ifdef _DEBUG
+ FILEFLAGS 0x1L
+#else
+ FILEFLAGS 0x0L
+#endif
+ FILEOS 0x4L
+ FILETYPE 0x2L
+ FILESUBTYPE 0x0L
+BEGIN
+    BLOCK "StringFileInfo"
+    BEGIN
+        BLOCK "040904e4"
+        BEGIN
+            VALUE "Comments", "http://gabest.org/"
+            VALUE "CompanyName", "Gabest"
+            VALUE "FileDescription", "GS plugin for ps2 emulators"
+            VALUE "FileVersion", "1, 0, 0, 0"
+            VALUE "InternalName", "GSdx10.dll"
+            VALUE "LegalCopyright", "Copyright (c) 2007 Gabest.  All rights reserved."
+            VALUE "OriginalFilename", "GSdx10.dll"
+            VALUE "ProductName", "GSdx10"
+            VALUE "ProductVersion", "1, 0, 0, 0"
+        END
+    END
+    BLOCK "VarFileInfo"
+    BEGIN
+        VALUE "Translation", 0x409, 1252
+    END
+END
+
+#endif    // English (U.S.) resources
+/////////////////////////////////////////////////////////////////////////////
+
+
+
+#ifndef APSTUDIO_INVOKED
+/////////////////////////////////////////////////////////////////////////////
+//
+// Generated from the TEXTINCLUDE 3 resource.
+//
+#define _AFX_NO_SPLITTER_RESOURCES
+#define _AFX_NO_OLE_RESOURCES
+#define _AFX_NO_TRACKER_RESOURCES
+#define _AFX_NO_PROPERTY_RESOURCES
+
+#if !defined(AFX_RESOURCE_DLL) || defined(AFX_TARG_ENU)
+LANGUAGE 9, 1
+#pragma code_page(1252)
+#include "res\GSdx10.rc2"  // non-Microsoft Visual C++ edited resources
+#include "afxres.rc"     // Standard components
+#endif
+
+/////////////////////////////////////////////////////////////////////////////
+#endif    // not APSTUDIO_INVOKED
+
diff --git a/gsdx10/GSdx10_vs2005.vcproj b/gsdx10/GSdx10_vs2005.vcproj
new file mode 100644
index 0000000..7517957
--- /dev/null
+++ b/gsdx10/GSdx10_vs2005.vcproj
@@ -0,0 +1,835 @@
+<?xml version="1.0" encoding="windows-1250"?>
+<VisualStudioProject
+	ProjectType="Visual C++"
+	Version="8.00"
+	Name="GSdx10"
+	ProjectGUID="{345C9F24-0B9A-4289-B375-ADD3B63461B7}"
+	RootNamespace="GSdx10"
+	Keyword="MFCDLLProj"
+	TargetFrameworkVersion="131072"
+	>
+	<Platforms>
+		<Platform
+			Name="Win32"
+		/>
+		<Platform
+			Name="x64"
+		/>
+	</Platforms>
+	<ToolFiles>
+	</ToolFiles>
+	<Configurations>
+		<Configuration
+			Name="Debug|Win32"
+			ConfigurationType="2"
+			InheritedPropertySheets="..\debug.vsprops;..\common.vsprops"
+			UseOfMFC="1"
+			CharacterSet="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+				PreprocessorDefinitions="_DEBUG"
+				MkTypLibCompatible="false"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				PreprocessorDefinitions="_USRDLL"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+				PreprocessorDefinitions="_DEBUG"
+				Culture="1033"
+				AdditionalIncludeDirectories="$(IntDir)"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				AdditionalDependencies="d3d10.lib d3dx10.lib dxguid.lib winmm.lib"
+				OutputFile="$(OutDir)\GSdx10.dll"
+				ModuleDefinitionFile=".\GSdx10.def"
+				DataExecutionPrevention="0"
+				ImportLibrary="$(OutDir)/GSdx10.lib"
+				TargetMachine="1"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+				EmbedManifest="false"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Debug|x64"
+			ConfigurationType="2"
+			InheritedPropertySheets="..\debug.vsprops;..\common.vsprops"
+			UseOfMFC="1"
+			CharacterSet="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+				PreprocessorDefinitions="_DEBUG"
+				MkTypLibCompatible="false"
+				TargetEnvironment="3"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				PreprocessorDefinitions="_USRDLL"
+				StructMemberAlignment="5"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+				PreprocessorDefinitions="_DEBUG"
+				Culture="1033"
+				AdditionalIncludeDirectories="$(IntDir)"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				AdditionalDependencies="d3d10.lib d3dx10.lib dxguid.lib winmm.lib"
+				OutputFile="$(OutDir)\GSdx10.dll"
+				ModuleDefinitionFile=""
+				DataExecutionPrevention="0"
+				ImportLibrary="$(OutDir)/GSdx10.lib"
+				TargetMachine="17"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Release|Win32"
+			ConfigurationType="2"
+			InheritedPropertySheets="..\release.vsprops;..\common.vsprops"
+			UseOfMFC="1"
+			CharacterSet="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+				PreprocessorDefinitions="NDEBUG"
+				MkTypLibCompatible="false"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				PreprocessorDefinitions="_USRDLL"
+				RuntimeLibrary="0"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+				PreprocessorDefinitions="NDEBUG"
+				Culture="1033"
+				AdditionalIncludeDirectories="$(IntDir)"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				AdditionalDependencies="d3d10.lib d3dx10.lib dxguid.lib winmm.lib"
+				OutputFile="$(OutDir)\GSdx10.dll"
+				ModuleDefinitionFile=".\GSdx10.def"
+				DataExecutionPrevention="0"
+				ImportLibrary="$(OutDir)/GSdx10.lib"
+				TargetMachine="1"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+				EmbedManifest="false"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Release|x64"
+			ConfigurationType="2"
+			InheritedPropertySheets="..\release.vsprops;..\common.vsprops"
+			UseOfMFC="1"
+			CharacterSet="1"
+			WholeProgramOptimization="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+				PreprocessorDefinitions="NDEBUG"
+				MkTypLibCompatible="false"
+				TargetEnvironment="3"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				PreprocessorDefinitions="_USRDLL"
+				RuntimeLibrary="0"
+				StructMemberAlignment="5"
+				ForceConformanceInForLoopScope="true"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+				PreprocessorDefinitions="NDEBUG"
+				Culture="1033"
+				AdditionalIncludeDirectories="$(IntDir)"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				AdditionalDependencies="d3d10.lib d3dx10.lib dxguid.lib winmm.lib"
+				OutputFile="$(OutDir)\GSdx10.dll"
+				ModuleDefinitionFile=""
+				DataExecutionPrevention="0"
+				ImportLibrary="$(OutDir)/GSdx10.lib"
+				TargetMachine="17"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Debug SSE2|Win32"
+			ConfigurationType="2"
+			InheritedPropertySheets="..\debug.vsprops;..\common.vsprops;..\sse2.vsprops"
+			UseOfMFC="1"
+			CharacterSet="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+				PreprocessorDefinitions="_DEBUG"
+				MkTypLibCompatible="false"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				PreprocessorDefinitions="_USRDLL"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+				PreprocessorDefinitions="_DEBUG"
+				Culture="1033"
+				AdditionalIncludeDirectories="$(IntDir)"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				AdditionalDependencies="d3d10.lib d3dx10.lib dxguid.lib winmm.lib"
+				OutputFile="C:\Users\Gabest\Desktop\pcsx2\plugins\GSdx10.dll"
+				ModuleDefinitionFile=".\GSdx10.def"
+				DataExecutionPrevention="0"
+				ImportLibrary="$(OutDir)/GSdx10.lib"
+				TargetMachine="1"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+				EmbedManifest="false"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Debug SSE2|x64"
+			OutputDirectory="$(SolutionDir)$(PlatformName)\$(ConfigurationName)"
+			IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+			ConfigurationType="1"
+			UseOfMFC="1"
+			CharacterSet="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				PreprocessorDefinitions="_USRDLL"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Release SSE2|Win32"
+			ConfigurationType="2"
+			InheritedPropertySheets="..\release.vsprops;..\common.vsprops;..\sse2.vsprops"
+			UseOfMFC="1"
+			CharacterSet="1"
+			WholeProgramOptimization="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+				PreprocessorDefinitions="NDEBUG"
+				MkTypLibCompatible="false"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				PreprocessorDefinitions="_USRDLL"
+				RuntimeLibrary="0"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+				PreprocessorDefinitions="NDEBUG"
+				Culture="1033"
+				AdditionalIncludeDirectories="$(IntDir)"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				AdditionalDependencies="d3d10.lib d3dx10.lib dxguid.lib winmm.lib"
+				OutputFile="C:\Users\Gabest\Desktop\pcsx2\plugins\GSdx10.dll"
+				ModuleDefinitionFile=".\GSdx10.def"
+				DataExecutionPrevention="0"
+				ImportLibrary="$(OutDir)/GSdx10.lib"
+				TargetMachine="1"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+				EmbedManifest="true"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Release SSE2|x64"
+			OutputDirectory="$(SolutionDir)$(PlatformName)\$(ConfigurationName)"
+			IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+			ConfigurationType="1"
+			UseOfMFC="1"
+			CharacterSet="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				PreprocessorDefinitions="_USRDLL"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+	</Configurations>
+	<References>
+	</References>
+	<Files>
+		<Filter
+			Name="Source Files"
+			Filter="cpp;c;cxx;def;odl;idl;hpj;bat;asm;asmx"
+			UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
+			>
+			<File
+				RelativePath=".\GSDepthStencil.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\GSDevice.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\GSdx10.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\GSdx10.def"
+				>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath=".\GSRenderer.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\GSRendererHW.cpp"
+				>
+				<FileConfiguration
+					Name="Release SSE2|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AssemblerOutput="4"
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath=".\GSRendererNull.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\GSRendererSW.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\GSRenderTarget.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\GSSettingsDlg.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\GSTexture.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\GSTexture2D.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\GSTextureCache.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\GSTextureFX.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\stdafx.cpp"
+				>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						UsePrecompiledHeader="1"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						UsePrecompiledHeader="1"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						UsePrecompiledHeader="1"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						UsePrecompiledHeader="1"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug SSE2|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						UsePrecompiledHeader="1"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release SSE2|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						UsePrecompiledHeader="1"
+					/>
+				</FileConfiguration>
+			</File>
+		</Filter>
+		<Filter
+			Name="Header Files"
+			Filter="h;hpp;hxx;hm;inl;inc;xsd"
+			UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
+			>
+			<File
+				RelativePath=".\GSDevice.h"
+				>
+			</File>
+			<File
+				RelativePath=".\GSdx10.h"
+				>
+			</File>
+			<File
+				RelativePath=".\GSRenderer.h"
+				>
+			</File>
+			<File
+				RelativePath=".\GSRendererHW.h"
+				>
+			</File>
+			<File
+				RelativePath=".\GSRendererNull.h"
+				>
+			</File>
+			<File
+				RelativePath=".\GSRendererSW.h"
+				>
+			</File>
+			<File
+				RelativePath=".\GSSettingsDlg.h"
+				>
+			</File>
+			<File
+				RelativePath=".\GSTexture2D.h"
+				>
+			</File>
+			<File
+				RelativePath=".\GSTextureCache.h"
+				>
+			</File>
+			<File
+				RelativePath=".\GSTextureFX.h"
+				>
+			</File>
+			<File
+				RelativePath=".\GSUtil.h"
+				>
+			</File>
+			<File
+				RelativePath=".\GSVertexHW.h"
+				>
+			</File>
+			<File
+				RelativePath=".\GSVertexSW.h"
+				>
+			</File>
+			<File
+				RelativePath=".\Resource.h"
+				>
+			</File>
+			<File
+				RelativePath=".\stdafx.h"
+				>
+			</File>
+		</Filter>
+		<Filter
+			Name="Resource Files"
+			Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx"
+			UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
+			>
+			<File
+				RelativePath=".\GSdx10.rc"
+				>
+			</File>
+			<File
+				RelativePath=".\res\GSdx10.rc2"
+				>
+			</File>
+			<File
+				RelativePath=".\res\logo1.bmp"
+				>
+			</File>
+		</Filter>
+		<Filter
+			Name="Shaders"
+			>
+			<File
+				RelativePath=".\res\convert.fx"
+				>
+			</File>
+			<File
+				RelativePath=".\res\interlace.fx"
+				>
+			</File>
+			<File
+				RelativePath=".\res\merge.fx"
+				>
+			</File>
+			<File
+				RelativePath=".\res\tfx.fx"
+				>
+			</File>
+		</Filter>
+	</Files>
+	<Globals>
+		<Global
+			Name="RESOURCE_FILE"
+			Value="GSdx10.rc"
+		/>
+	</Globals>
+</VisualStudioProject>
diff --git a/gsdx10/GSdx10_vs2008.vcproj b/gsdx10/GSdx10_vs2008.vcproj
new file mode 100644
index 0000000..54eda1b
--- /dev/null
+++ b/gsdx10/GSdx10_vs2008.vcproj
@@ -0,0 +1,835 @@
+<?xml version="1.0" encoding="windows-1250"?>
+<VisualStudioProject
+	ProjectType="Visual C++"
+	Version="9.00"
+	Name="GSdx10"
+	ProjectGUID="{345C9F24-0B9A-4289-B375-ADD3B63461B7}"
+	RootNamespace="GSdx10"
+	Keyword="MFCDLLProj"
+	TargetFrameworkVersion="131072"
+	>
+	<Platforms>
+		<Platform
+			Name="Win32"
+		/>
+		<Platform
+			Name="x64"
+		/>
+	</Platforms>
+	<ToolFiles>
+	</ToolFiles>
+	<Configurations>
+		<Configuration
+			Name="Debug|Win32"
+			ConfigurationType="2"
+			InheritedPropertySheets="..\debug.vsprops;..\common.vsprops"
+			UseOfMFC="1"
+			CharacterSet="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+				PreprocessorDefinitions="_DEBUG"
+				MkTypLibCompatible="false"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				PreprocessorDefinitions="_USRDLL"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+				PreprocessorDefinitions="_DEBUG"
+				Culture="1033"
+				AdditionalIncludeDirectories="$(IntDir)"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				AdditionalDependencies="d3d10.lib d3dx10.lib dxguid.lib winmm.lib"
+				OutputFile="$(OutDir)\GSdx10.dll"
+				ModuleDefinitionFile=".\GSdx10.def"
+				DataExecutionPrevention="0"
+				ImportLibrary="$(OutDir)/GSdx10.lib"
+				TargetMachine="1"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+				EmbedManifest="false"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Debug|x64"
+			ConfigurationType="2"
+			InheritedPropertySheets="..\debug.vsprops;..\common.vsprops"
+			UseOfMFC="1"
+			CharacterSet="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+				PreprocessorDefinitions="_DEBUG"
+				MkTypLibCompatible="false"
+				TargetEnvironment="3"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				PreprocessorDefinitions="_USRDLL"
+				StructMemberAlignment="5"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+				PreprocessorDefinitions="_DEBUG"
+				Culture="1033"
+				AdditionalIncludeDirectories="$(IntDir)"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				AdditionalDependencies="d3d10.lib d3dx10.lib dxguid.lib winmm.lib"
+				OutputFile="$(OutDir)\GSdx10.dll"
+				ModuleDefinitionFile=""
+				DataExecutionPrevention="0"
+				ImportLibrary="$(OutDir)/GSdx10.lib"
+				TargetMachine="17"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Release|Win32"
+			ConfigurationType="2"
+			InheritedPropertySheets="..\release.vsprops;..\common.vsprops"
+			UseOfMFC="1"
+			CharacterSet="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+				PreprocessorDefinitions="NDEBUG"
+				MkTypLibCompatible="false"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				PreprocessorDefinitions="_USRDLL"
+				RuntimeLibrary="0"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+				PreprocessorDefinitions="NDEBUG"
+				Culture="1033"
+				AdditionalIncludeDirectories="$(IntDir)"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				AdditionalDependencies="d3d10.lib d3dx10.lib dxguid.lib winmm.lib"
+				OutputFile="$(OutDir)\GSdx10.dll"
+				ModuleDefinitionFile=".\GSdx10.def"
+				DataExecutionPrevention="0"
+				ImportLibrary="$(OutDir)/GSdx10.lib"
+				TargetMachine="1"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+				EmbedManifest="false"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Release|x64"
+			ConfigurationType="2"
+			InheritedPropertySheets="..\release.vsprops;..\common.vsprops"
+			UseOfMFC="1"
+			CharacterSet="1"
+			WholeProgramOptimization="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+				PreprocessorDefinitions="NDEBUG"
+				MkTypLibCompatible="false"
+				TargetEnvironment="3"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				PreprocessorDefinitions="_USRDLL"
+				RuntimeLibrary="0"
+				StructMemberAlignment="5"
+				ForceConformanceInForLoopScope="true"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+				PreprocessorDefinitions="NDEBUG"
+				Culture="1033"
+				AdditionalIncludeDirectories="$(IntDir)"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				AdditionalDependencies="d3d10.lib d3dx10.lib dxguid.lib winmm.lib"
+				OutputFile="$(OutDir)\GSdx10.dll"
+				ModuleDefinitionFile=""
+				DataExecutionPrevention="0"
+				ImportLibrary="$(OutDir)/GSdx10.lib"
+				TargetMachine="17"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Debug SSE2|Win32"
+			ConfigurationType="2"
+			InheritedPropertySheets="..\debug.vsprops;..\common.vsprops;..\sse2.vsprops"
+			UseOfMFC="1"
+			CharacterSet="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+				PreprocessorDefinitions="_DEBUG"
+				MkTypLibCompatible="false"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				PreprocessorDefinitions="_USRDLL"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+				PreprocessorDefinitions="_DEBUG"
+				Culture="1033"
+				AdditionalIncludeDirectories="$(IntDir)"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				AdditionalDependencies="d3d10.lib d3dx10.lib dxguid.lib winmm.lib"
+				OutputFile="C:\Users\Gabest\Desktop\pcsx2\plugins\GSdx10.dll"
+				ModuleDefinitionFile=".\GSdx10.def"
+				DataExecutionPrevention="0"
+				ImportLibrary="$(OutDir)/GSdx10.lib"
+				TargetMachine="1"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+				EmbedManifest="false"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Debug SSE2|x64"
+			OutputDirectory="$(SolutionDir)$(PlatformName)\$(ConfigurationName)"
+			IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+			ConfigurationType="1"
+			UseOfMFC="1"
+			CharacterSet="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				PreprocessorDefinitions="_USRDLL"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Release SSE2|Win32"
+			ConfigurationType="2"
+			InheritedPropertySheets="..\release.vsprops;..\common.vsprops;..\sse2.vsprops"
+			UseOfMFC="1"
+			CharacterSet="1"
+			WholeProgramOptimization="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+				PreprocessorDefinitions="NDEBUG"
+				MkTypLibCompatible="false"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				PreprocessorDefinitions="_USRDLL"
+				RuntimeLibrary="0"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+				PreprocessorDefinitions="NDEBUG"
+				Culture="1033"
+				AdditionalIncludeDirectories="$(IntDir)"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+				AdditionalDependencies="d3d10.lib d3dx10.lib dxguid.lib winmm.lib"
+				OutputFile="C:\Users\Gabest\Desktop\pcsx2\plugins\GSdx10.dll"
+				ModuleDefinitionFile=".\GSdx10.def"
+				DataExecutionPrevention="0"
+				ImportLibrary="$(OutDir)/GSdx10.lib"
+				TargetMachine="1"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+				EmbedManifest="true"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+		<Configuration
+			Name="Release SSE2|x64"
+			OutputDirectory="$(SolutionDir)$(PlatformName)\$(ConfigurationName)"
+			IntermediateDirectory="$(PlatformName)\$(ConfigurationName)"
+			ConfigurationType="1"
+			UseOfMFC="1"
+			CharacterSet="1"
+			>
+			<Tool
+				Name="VCPreBuildEventTool"
+			/>
+			<Tool
+				Name="VCCustomBuildTool"
+			/>
+			<Tool
+				Name="VCXMLDataGeneratorTool"
+			/>
+			<Tool
+				Name="VCWebServiceProxyGeneratorTool"
+			/>
+			<Tool
+				Name="VCMIDLTool"
+			/>
+			<Tool
+				Name="VCCLCompilerTool"
+				PreprocessorDefinitions="_USRDLL"
+			/>
+			<Tool
+				Name="VCManagedResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCResourceCompilerTool"
+			/>
+			<Tool
+				Name="VCPreLinkEventTool"
+			/>
+			<Tool
+				Name="VCLinkerTool"
+			/>
+			<Tool
+				Name="VCALinkTool"
+			/>
+			<Tool
+				Name="VCManifestTool"
+			/>
+			<Tool
+				Name="VCXDCMakeTool"
+			/>
+			<Tool
+				Name="VCBscMakeTool"
+			/>
+			<Tool
+				Name="VCFxCopTool"
+			/>
+			<Tool
+				Name="VCAppVerifierTool"
+			/>
+			<Tool
+				Name="VCPostBuildEventTool"
+			/>
+		</Configuration>
+	</Configurations>
+	<References>
+	</References>
+	<Files>
+		<Filter
+			Name="Source Files"
+			Filter="cpp;c;cxx;def;odl;idl;hpj;bat;asm;asmx"
+			UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
+			>
+			<File
+				RelativePath=".\GSDepthStencil.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\GSDevice.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\GSdx10.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\GSdx10.def"
+				>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="VCCustomBuildTool"
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath=".\GSRenderer.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\GSRendererHW.cpp"
+				>
+				<FileConfiguration
+					Name="Release SSE2|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						AssemblerOutput="4"
+					/>
+				</FileConfiguration>
+			</File>
+			<File
+				RelativePath=".\GSRendererNull.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\GSRendererSW.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\GSRenderTarget.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\GSSettingsDlg.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\GSTexture.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\GSTexture2D.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\GSTextureCache.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\GSTextureFX.cpp"
+				>
+			</File>
+			<File
+				RelativePath=".\stdafx.cpp"
+				>
+				<FileConfiguration
+					Name="Debug|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						UsePrecompiledHeader="1"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						UsePrecompiledHeader="1"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						UsePrecompiledHeader="1"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release|x64"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						UsePrecompiledHeader="1"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Debug SSE2|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						UsePrecompiledHeader="1"
+					/>
+				</FileConfiguration>
+				<FileConfiguration
+					Name="Release SSE2|Win32"
+					>
+					<Tool
+						Name="VCCLCompilerTool"
+						UsePrecompiledHeader="1"
+					/>
+				</FileConfiguration>
+			</File>
+		</Filter>
+		<Filter
+			Name="Header Files"
+			Filter="h;hpp;hxx;hm;inl;inc;xsd"
+			UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
+			>
+			<File
+				RelativePath=".\GSDevice.h"
+				>
+			</File>
+			<File
+				RelativePath=".\GSdx10.h"
+				>
+			</File>
+			<File
+				RelativePath=".\GSRenderer.h"
+				>
+			</File>
+			<File
+				RelativePath=".\GSRendererHW.h"
+				>
+			</File>
+			<File
+				RelativePath=".\GSRendererNull.h"
+				>
+			</File>
+			<File
+				RelativePath=".\GSRendererSW.h"
+				>
+			</File>
+			<File
+				RelativePath=".\GSSettingsDlg.h"
+				>
+			</File>
+			<File
+				RelativePath=".\GSTexture2D.h"
+				>
+			</File>
+			<File
+				RelativePath=".\GSTextureCache.h"
+				>
+			</File>
+			<File
+				RelativePath=".\GSTextureFX.h"
+				>
+			</File>
+			<File
+				RelativePath=".\GSUtil.h"
+				>
+			</File>
+			<File
+				RelativePath=".\GSVertexHW.h"
+				>
+			</File>
+			<File
+				RelativePath=".\GSVertexSW.h"
+				>
+			</File>
+			<File
+				RelativePath=".\Resource.h"
+				>
+			</File>
+			<File
+				RelativePath=".\stdafx.h"
+				>
+			</File>
+		</Filter>
+		<Filter
+			Name="Resource Files"
+			Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx"
+			UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
+			>
+			<File
+				RelativePath=".\GSdx10.rc"
+				>
+			</File>
+			<File
+				RelativePath=".\res\GSdx10.rc2"
+				>
+			</File>
+			<File
+				RelativePath=".\res\logo1.bmp"
+				>
+			</File>
+		</Filter>
+		<Filter
+			Name="Shaders"
+			>
+			<File
+				RelativePath=".\res\convert.fx"
+				>
+			</File>
+			<File
+				RelativePath=".\res\interlace.fx"
+				>
+			</File>
+			<File
+				RelativePath=".\res\merge.fx"
+				>
+			</File>
+			<File
+				RelativePath=".\res\tfx.fx"
+				>
+			</File>
+		</Filter>
+	</Files>
+	<Globals>
+		<Global
+			Name="RESOURCE_FILE"
+			Value="GSdx10.rc"
+		/>
+	</Globals>
+</VisualStudioProject>
diff --git a/gsdx10/Resource.h b/gsdx10/Resource.h
new file mode 100644
index 0000000..8a4247f
--- /dev/null
+++ b/gsdx10/Resource.h
@@ -0,0 +1,39 @@
+//{{NO_DEPENDENCIES}}
+// Microsoft Visual C++ generated include file.
+// Used by GSdx10.rc
+//
+#define IDD_CONFIG                      2001
+#define IDC_CHECK1                      2002
+#define IDC_CHECK2                      2003
+#define IDC_CHECK3                      2004
+#define IDC_CHECK5                      2005
+#define IDC_CHECK6                      2006
+#define IDC_COMBO1                      2007
+#define IDC_COMBO3                      2008
+#define IDC_COMBO4                      2009
+#define IDC_EDIT1                       2010
+#define IDC_EDIT2                       2011
+#define IDC_CUSTOM1                     2014
+#define IDC_CHECK4                      2015
+#define IDC_COMBO2                      2016
+#define IDC_COMBO5                      2017
+#define IDC_RADIO1                      2018
+#define IDC_SPIN1                       2020
+#define IDC_SPIN2                       2021
+#define IDB_BITMAP1                     2022
+#define IDB_LOGO1                       2023
+#define IDR_CONVERT_FX                  10000
+#define IDR_TFX_FX                      10001
+#define IDR_MERGE_FX                    10002
+#define IDR_INTERLACE_FX                10003
+
+// Next default values for new objects
+// 
+#ifdef APSTUDIO_INVOKED
+#ifndef APSTUDIO_READONLY_SYMBOLS
+#define _APS_NEXT_RESOURCE_VALUE        10004
+#define _APS_NEXT_COMMAND_VALUE         32771
+#define _APS_NEXT_CONTROL_VALUE         2024
+#define _APS_NEXT_SYMED_VALUE           5000
+#endif
+#endif
diff --git a/gsdx10/res/GSdx10.rc2 b/gsdx10/res/GSdx10.rc2
new file mode 100644
index 0000000..0c79a14
--- /dev/null
+++ b/gsdx10/res/GSdx10.rc2
@@ -0,0 +1,18 @@
+//
+// GSdx10.RC2 - resources Microsoft Visual C++ does not edit directly
+//
+
+#ifdef APSTUDIO_INVOKED
+#error this file is not editable by Microsoft Visual C++
+#endif //APSTUDIO_INVOKED
+
+
+/////////////////////////////////////////////////////////////////////////////
+// Add manually edited resources here...
+
+IDR_CONVERT_FX		RCDATA "res\\convert.fx"
+IDR_TFX_FX			RCDATA "res\\tfx.fx"
+IDR_MERGE_FX		RCDATA "res\\merge.fx"
+IDR_INTERLACE_FX	RCDATA "res\\interlace.fx"
+
+/////////////////////////////////////////////////////////////////////////////
diff --git a/gsdx10/res/convert.fx b/gsdx10/res/convert.fx
new file mode 100644
index 0000000..8c41a33
--- /dev/null
+++ b/gsdx10/res/convert.fx
@@ -0,0 +1,60 @@
+struct VS_INPUT
+{
+    float4 p : POSITION; 
+    float2 t : TEXCOORD0;
+};
+
+struct VS_OUTPUT
+{
+	float4 p : SV_Position;
+	float2 t : TEXCOORD0;
+};
+
+VS_OUTPUT vs_main(VS_INPUT input)
+{
+	VS_OUTPUT output;
+	
+	output.p = input.p;
+	output.t = input.t;
+	
+	return output;
+}
+
+Texture2D Texture;
+SamplerState Sampler;
+
+struct PS_INPUT
+{
+	float4 p : SV_Position;
+	float2 t : TEXCOORD0;
+};
+
+float4 ps_main0(PS_INPUT input) : SV_Target0
+{
+	return Texture.Sample(Sampler, input.t);
+}
+
+uint ps_main1(PS_INPUT input) : SV_Target0
+{
+	float4 f = Texture.Sample(Sampler, input.t);
+	
+	f.a *= 256.0f/127; // hm, 0.5 won't give us 1.0 if we just multiply with 2
+	
+	uint4 i = f * float4(0x001f, 0x03e0, 0x7c00, 0x8000);
+	
+	return (i.x & 0x001f) | (i.y & 0x03e0) | (i.z & 0x7c00) | (i.w & 0x8000);	
+}
+
+float4 ps_main2(PS_INPUT input) : SV_Target0
+{
+	clip(Texture.Sample(Sampler, input.t).a - (0.5 - 0.9f/256));
+
+	return 0;
+}
+
+float4 ps_main3(PS_INPUT input) : SV_Target0
+{
+	clip((0.5 - 0.9f/256) -  Texture.Sample(Sampler, input.t).a);
+	
+	return 0;
+}
diff --git a/gsdx10/res/interlace.fx b/gsdx10/res/interlace.fx
new file mode 100644
index 0000000..9eed8ed
--- /dev/null
+++ b/gsdx10/res/interlace.fx
@@ -0,0 +1,43 @@
+
+Texture2D Texture;
+SamplerState Sampler;
+
+cbuffer cb0
+{
+	float2 ZrH;
+	float hH;
+};
+
+struct PS_INPUT
+{
+	float4 p : SV_Position;
+	float2 t : TEXCOORD0;
+};
+
+float4 ps_main0(PS_INPUT input) : SV_Target0
+{
+	clip(frac(input.t.y * hH) - 0.5);
+	
+	return Texture.Sample(Sampler, input.t);
+}
+
+float4 ps_main1(PS_INPUT input) : SV_Target0
+{
+	clip(0.5 - frac(input.t.y * hH));
+	
+	return Texture.Sample(Sampler, input.t);
+}
+
+float4 ps_main2(PS_INPUT input) : SV_Target0
+{
+	float4 c0 = Texture.Sample(Sampler, input.t - ZrH);
+	float4 c1 = Texture.Sample(Sampler, input.t);
+	float4 c2 = Texture.Sample(Sampler, input.t + ZrH);
+	
+	return (c0 + c1 * 2 + c2) / 4;
+}
+
+float4 ps_main3(PS_INPUT input) : SV_Target0
+{
+	return Texture.Sample(Sampler, input.t);
+}
diff --git a/gsdx10/res/logo1.bmp b/gsdx10/res/logo1.bmp
new file mode 100644
index 0000000..25f6b17
Binary files /dev/null and b/gsdx10/res/logo1.bmp differ
diff --git a/gsdx10/res/merge.fx b/gsdx10/res/merge.fx
new file mode 100644
index 0000000..aa72c3c
--- /dev/null
+++ b/gsdx10/res/merge.fx
@@ -0,0 +1,53 @@
+struct VS_INPUT
+{
+    float4 p : POSITION; 
+    float2 t0 : TEXCOORD0;
+    float2 t1 : TEXCOORD1;
+};
+
+struct VS_OUTPUT
+{
+	float4 p : SV_Position;
+	float2 t0 : TEXCOORD0;
+	float2 t1 : TEXCOORD1;
+};
+
+VS_OUTPUT vs_main(VS_INPUT input)
+{
+	VS_OUTPUT output;
+	
+	output.p = input.p;
+	output.t0 = input.t0;
+	output.t1 = input.t1;
+	
+	return output;
+}
+
+Texture2D RA01;
+Texture2D RA02;
+SamplerState Sampler;
+
+cbuffer cb1
+{
+	float4 BGColor;
+	float Alpha;
+	float EN1;
+	float EN2;
+	int MMOD;
+	int SLBG;
+};
+
+struct PS_INPUT
+{
+	float4 p : SV_Position;
+	float2 t0 : TEXCOORD0;
+	float2 t1 : TEXCOORD1;
+};
+
+float4 ps_main(PS_INPUT input) : SV_Target0
+{
+	float4 c0 = EN1 * RA01.Sample(Sampler, input.t0);
+	float4 c1 = SLBG ? BGColor : EN2 * RA02.Sample(Sampler, input.t1);
+	float a = EN1 * (MMOD ? Alpha : min(c0.a * 2, 1));
+	return lerp(c1, c0, a);
+}
diff --git a/gsdx10/res/tfx.fx b/gsdx10/res/tfx.fx
new file mode 100644
index 0000000..c889033
--- /dev/null
+++ b/gsdx10/res/tfx.fx
@@ -0,0 +1,343 @@
+cbuffer cb0
+{
+	float4 VertexScale;
+	float4 VertexOffset;
+	float2 TextureScale;
+};
+
+struct VS_INPUT
+{
+    float4 p : POSITION; 
+	float4 c : COLOR0;
+	float4 f : COLOR1;
+    float2 t : TEXCOORD0;
+};
+
+struct VS_OUTPUT
+{
+	float4 p : SV_Position;
+	float4 c : COLOR0;
+	float4 t : TEXCOORD0;
+};
+
+VS_OUTPUT vs_main(VS_INPUT input)
+{
+	VS_OUTPUT output;
+	
+	output.p = input.p * VertexScale - VertexOffset;
+	
+	output.c = input.c;
+	
+	output.t.xy = input.t.xy * TextureScale;
+	output.t.z = input.f.a;
+	output.t.w = input.p.w < 0 ? 1 : input.p.w; // FIXME: <= takes small but not 0 numbers as 0
+	
+	return output;
+}
+
+#ifndef IIP
+#define IIP 0
+#define PRIM 3
+#endif
+	
+#if PRIM == 0
+
+[maxvertexcount(1)]
+void gs_main(point VS_OUTPUT input[1], inout PointStream<VS_OUTPUT> stream)
+{
+	stream.Append(input[0]);
+}
+
+#elif PRIM == 1
+
+[maxvertexcount(2)]
+void gs_main(line VS_OUTPUT input[2], inout LineStream<VS_OUTPUT> stream)
+{
+	#if IIP == 0
+	input[0].c = input[1].c;
+	input[0].t.z = input[1].t.z;
+	#endif
+
+	stream.Append(input[0]);
+	stream.Append(input[1]);
+}
+
+#elif PRIM == 2
+
+[maxvertexcount(3)]
+void gs_main(triangle VS_OUTPUT input[3], inout TriangleStream<VS_OUTPUT> stream)
+{
+	#if IIP == 0
+	input[0].c = input[2].c;
+	input[0].t.z = input[2].t.z;
+	input[1].c = input[2].c;
+	input[1].t.z = input[2].t.z;
+	#endif
+	
+	stream.Append(input[0]);
+	stream.Append(input[1]);
+	stream.Append(input[2]);
+}
+
+#elif PRIM == 3
+
+[maxvertexcount(4)]
+void gs_main(line VS_OUTPUT input[2], inout TriangleStream<VS_OUTPUT> stream)
+{
+	input[0].p.z = input[1].p.z;
+	input[0].t.zw = input[1].t.zw;
+	
+	VS_OUTPUT lb = input[1];
+	
+	lb.p.x = input[0].p.x;
+	lb.t.x = input[0].t.x;
+	
+	VS_OUTPUT rt = input[1];
+	
+	rt.p.y = input[0].p.y;
+	rt.t.y = input[0].t.y;
+	
+	stream.Append(input[0]);
+	stream.Append(lb);
+	stream.Append(rt);
+	stream.Append(input[1]);
+}
+
+#endif
+
+Texture2D Texture;
+Texture2D Palette;
+SamplerState Sampler;
+
+cbuffer cb1
+{
+	float4 FogColor;
+	float2 ClampMin;
+	float2 ClampMax;
+	float TA0;
+	float TA1;
+	float AREF;
+	float _pad;
+	float2 WH;
+	float2 rWrH;
+	float2 rWZ;
+	float2 ZrH;
+};
+
+struct PS_INPUT
+{
+	float4 p : SV_Position;
+	float4 c : COLOR0;
+	float4 t : TEXCOORD0;
+};
+
+struct PS_OUTPUT
+{
+	float4 c0 : SV_Target0;
+	float4 c1 : SV_Target1;
+};
+
+#ifndef FST
+#define FST 0
+#define CLAMP 0
+#define BPP 0
+#define AEM 0
+#define TFX 0
+#define TCC 1
+#define ATE 1
+#define ATST 2
+#define FOG 0
+#define CLR1 0
+#define FBA 0
+#define AOUT 0
+#endif
+
+float4 Normalize16(float4 f)
+{
+	return f / float4(0x001f, 0x03e0, 0x7c00, 0x8000);
+}
+
+float4 Extract16(uint i)
+{
+	float4 f;
+
+	f.r = i & 0x001f;
+	f.g = i & 0x03e0;
+	f.b = i & 0x7c00;
+	f.a = i & 0x8000;
+	
+	return f;
+}
+
+PS_OUTPUT ps_main(PS_INPUT input)
+{
+	float2 tc = input.t.xy;
+
+	if(FST == 0)
+	{
+		tc /= input.t.w;
+	}
+
+	if(CLAMP == 1)
+	{
+		tc = clamp(tc, ClampMin, ClampMax);
+	}
+	
+	// TODO: region repeat (PITA, would loose automatic bilinear then)
+	
+	float4 t;
+	
+	if(BPP == 0) // 32
+	{
+		t = Texture.Sample(Sampler, tc);
+	}
+	else if(BPP == 1) // 24
+	{
+		t = Texture.Sample(Sampler, tc);
+		
+		t.a = AEM == 0 || any(t.rgb) ? TA0 : 0;
+	}
+	else if(BPP == 2) // 16
+	{
+		t = Texture.Sample(Sampler, tc);
+		
+		t.a = t.a >= 0.5 ? TA1 : AEM == 0 || any(t.rgb) ? TA0 : 0; // a bit incompatible with up-scaling because the 1 bit alpha is interpolated
+	}
+	else if(BPP == 3) // 16P
+	{
+		// tc -= 0.5 * rWrH; // ?
+		
+		uint4 i = float4(
+			Texture.Sample(Sampler, tc).r,
+			Texture.Sample(Sampler, tc + rWZ).r,
+			Texture.Sample(Sampler, tc + ZrH).r,
+			Texture.Sample(Sampler, tc + rWrH).r) * 65535;
+			
+		float4 t00 = Extract16(i.x);
+		float4 t01 = Extract16(i.y);
+		float4 t10 = Extract16(i.z);
+		float4 t11 = Extract16(i.w);
+
+		float2 dd = frac(tc * WH); 
+
+		t = lerp(lerp(t00, t01, dd.x), lerp(t10, t11, dd.x), dd.y);
+		
+		t = Normalize16(t);
+		
+		t.a = t.a >= 0.5 ? TA1 : AEM == 0 || any(t.rgb) ? TA0 : 0; // a bit incompatible with up-scaling because the 1 bit alpha is interpolated
+	}
+	else if(BPP == 4) // 8HP / 32-bit palette
+	{
+		// tc -= 0.5 * rWrH; // ?
+		
+		float4 f = float4(
+			Texture.Sample(Sampler, tc).a,
+			Texture.Sample(Sampler, tc + rWZ).a,
+			Texture.Sample(Sampler, tc + ZrH).a,
+			Texture.Sample(Sampler, tc + rWrH).a);
+			
+		float4 t00 = Palette.Sample(Sampler, f.x);
+		float4 t01 = Palette.Sample(Sampler, f.y);
+		float4 t10 = Palette.Sample(Sampler, f.z);
+		float4 t11 = Palette.Sample(Sampler, f.w);
+		
+		float2 dd = frac(tc * WH);
+		
+		t = lerp(lerp(t00, t01, dd.x), lerp(t10, t11, dd.x), dd.y);
+	}
+	else if(BPP == 5) // 8HP / 16-bit palette
+	{
+		// TODO: yuck, just pre-convert the palette to 32-bit
+	}
+	
+	float4 c = input.c;
+	
+	if(TFX == 0)
+	{
+		if(TCC == 0) 
+		{
+			c.rgb = c.rgb * t.rgb * 2;
+		}
+		else
+		{
+			c = c * t * 2;
+		}
+	}
+	else if(TFX == 1)
+	{
+		c = t;
+	}
+	else if(TFX == 2)
+	{
+		c.rgb = c.rgb * t.rgb * 2 + c.a;
+		
+		if(TCC == 1) 
+		{
+			c.a += t.a;
+		}
+	}
+	else if(TFX == 3)
+	{
+		c.rgb = c.rgb * t.rgb * 2 + c.a;
+		
+		if(TCC == 1) 
+		{
+			c.a = t.a;
+		}
+	}
+
+	c = saturate(c);
+	
+	// TODO: alpha test hurts a lot
+	
+	if(ATE == 1)
+	{
+		if(ATST == 0)
+		{
+			discard;
+		}
+		else if(ATST == 2 || ATST == 3) // l, le
+		{
+			clip(AREF - c.a);
+		}
+		else if(ATST == 4) // e
+		{
+			clip(0.9f/256 - abs(c.a - AREF));
+		}
+		else if(ATST == 5 || ATST == 6) // ge, g
+		{
+			clip(c.a - AREF);
+		}
+		else if(ATST == 7) // ne
+		{
+			clip(abs(c.a - AREF) - 0.9f/256);
+		}
+	}
+
+	if(FOG == 1)
+	{
+		c.rgb = lerp(FogColor.rgb, c.rgb, input.t.z);
+	}
+	
+	if(CLR1 == 1) // needed for Cd * (As/Ad/F + 1) blending modes
+	{
+		c.rgb = 1; 
+	}
+	
+	PS_OUTPUT output;
+	
+	output.c1 = c.a * 2; // used for alpha blending
+	
+	if(AOUT == 1) // 16 bit output
+	{
+		c.a = FBA == 1 ? 0.5 : step(0.5, c.a) * 0.5;
+	}
+	else if(FBA == 1)
+	{
+		if(c.a < 0.5) c.a += 0.5;
+	}
+
+	output.c0 = c;
+	
+	return output;
+}
diff --git a/gsdx10/stdafx.cpp b/gsdx10/stdafx.cpp
new file mode 100644
index 0000000..aa3b996
--- /dev/null
+++ b/gsdx10/stdafx.cpp
@@ -0,0 +1,7 @@
+// stdafx.cpp : source file that includes just the standard includes
+// GSdx10.pch will be the pre-compiled header
+// stdafx.obj will contain the pre-compiled type information
+
+#include "stdafx.h"
+
+
diff --git a/gsdx10/stdafx.h b/gsdx10/stdafx.h
new file mode 100644
index 0000000..d7c9ad9
--- /dev/null
+++ b/gsdx10/stdafx.h
@@ -0,0 +1,64 @@
+// stdafx.h : include file for standard system include files,
+// or project specific include files that are used frequently, but
+// are changed infrequently
+
+#pragma once
+
+#pragma warning(disable: 4996)
+
+#ifndef VC_EXTRALEAN
+#define VC_EXTRALEAN		// Exclude rarely-used stuff from Windows headers
+#endif
+
+// Modify the following defines if you have to target a platform prior to the ones specified below.
+// Refer to MSDN for the latest info on corresponding values for different platforms.
+#ifndef WINVER				// Allow use of features specific to Windows 95 and Windows NT 4 or later.
+#define WINVER 0x0510		// Change this to the appropriate value to target Windows 98 and Windows 2000 or later.
+#endif
+
+#ifndef _WIN32_WINNT		// Allow use of features specific to Windows NT 4 or later.
+#define _WIN32_WINNT 0x0400	// Change this to the appropriate value to target Windows 2000 or later.
+#endif						
+
+#ifndef _WIN32_WINDOWS		// Allow use of features specific to Windows 98 or later.
+#define _WIN32_WINDOWS 0x0410 // Change this to the appropriate value to target Windows Me or later.
+#endif
+
+#ifndef _WIN32_IE			// Allow use of features specific to IE 4.0 or later.
+#define _WIN32_IE 0x0400	// Change this to the appropriate value to target IE 5.0 or later.
+#endif
+
+#define _ATL_CSTRING_EXPLICIT_CONSTRUCTORS	// some CString constructors will be explicit
+
+#include <afxwin.h>         // MFC core and standard components
+#include <afxext.h>         // MFC extensions
+
+#ifndef _AFX_NO_AFXCMN_SUPPORT
+#include <afxcmn.h>			// MFC support for Windows Common Controls
+#endif // _AFX_NO_AFXCMN_SUPPORT
+
+#include <afxmt.h>
+#include <atlbase.h>
+#include <atlcoll.h>
+#include <atlpath.h>
+#include <d3d10.h>
+#include <d3dx10.h>
+#include <xmmintrin.h>
+#include <emmintrin.h>
+
+#include "../GSdx/GSState.h"
+
+#define countof(a) (sizeof(a)/sizeof(a[0]))
+
+#ifndef RESTRICT
+	#ifdef __INTEL_COMPILER
+		#define RESTRICT restrict
+	#elif _MSC_VER >= 1400
+		#define RESTRICT __restrict
+	#else
+		#define RESTRICT
+	#endif
+#endif
+
+#define EXPORT_C extern "C" __declspec(dllexport) void __stdcall
+#define EXPORT_C_(type) extern "C" __declspec(dllexport) type __stdcall
\ No newline at end of file
diff --git a/release.vsprops b/release.vsprops
new file mode 100644
index 0000000..5cc4030
--- /dev/null
+++ b/release.vsprops
@@ -0,0 +1,25 @@
+<?xml version="1.0" encoding="windows-1250"?>
+<VisualStudioPropertySheet
+	ProjectType="Visual C++"
+	Version="8.00"
+	Name="release"
+	>
+	<Tool
+		Name="VCCLCompilerTool"
+		Optimization="2"
+		InlineFunctionExpansion="2"
+		EnableIntrinsicFunctions="true"
+		FavorSizeOrSpeed="1"
+		OmitFramePointers="true"
+		WholeProgramOptimization="true"
+		PreprocessorDefinitions="NDEBUG"
+		StringPooling="true"
+		BufferSecurityCheck="false"
+	/>
+	<Tool
+		Name="VCLinkerTool"
+		LinkIncremental="1"
+		OptimizeReferences="2"
+		EnableCOMDATFolding="2"
+	/>
+</VisualStudioPropertySheet>
diff --git a/sse2.vsprops b/sse2.vsprops
new file mode 100644
index 0000000..9ba4ff3
--- /dev/null
+++ b/sse2.vsprops
@@ -0,0 +1,11 @@
+<?xml version="1.0" encoding="windows-1250"?>
+<VisualStudioPropertySheet
+	ProjectType="Visual C++"
+	Version="8.00"
+	Name="sse2"
+	>
+	<Tool
+		Name="VCCLCompilerTool"
+		EnableEnhancedInstructionSet="2"
+	/>
+</VisualStudioPropertySheet>